From b6ea240c9b000c3862cc9e2161728f5a4edd337a Mon Sep 17 00:00:00 2001 From: Nikolai Shcheglov Date: Sun, 26 Apr 2026 15:49:32 -0500 Subject: [PATCH 1/3] feat(route): add Meta AI global search Adds /meta/ai/global-search/:routeParams? backed by the useFBAIGlobalSearchQuery GraphQL query (doc_id 9716930201759979). Filters supported via path-encoded routeParams: q, content_types (person/publication/blog/dataset/event/tool), research_areas, filter_tags, years, location_cities, alphabetical_filter, sort_by (RELEVANCE/MOST_RECENT/ALPHABETICAL/RANDOM), offset. Limit stays as ?limit=N because the cache layer keys on it. Path-based filters keep each combination in its own cache entry, matching the routeParams pattern used by the weibo/keyword route. Extracts the LSD/SiteData fetch and GraphQL body builder into routes/meta/utils.ts so ai-blog.ts and ai-global-search.ts share the boilerplate. --- lib/routes/meta/ai-blog.ts | 87 +++---------------- lib/routes/meta/ai-global-search.ts | 129 ++++++++++++++++++++++++++++ lib/routes/meta/utils.ts | 82 ++++++++++++++++++ 3 files changed, 222 insertions(+), 76 deletions(-) create mode 100644 lib/routes/meta/ai-global-search.ts create mode 100644 lib/routes/meta/utils.ts diff --git a/lib/routes/meta/ai-blog.ts b/lib/routes/meta/ai-blog.ts index ab308d5629e8..26512e762b8e 100644 --- a/lib/routes/meta/ai-blog.ts +++ b/lib/routes/meta/ai-blog.ts @@ -1,9 +1,9 @@ -import { load } from 'cheerio'; - import type { Route } from '@/types'; import ofetch from '@/utils/ofetch'; import { parseDate } from '@/utils/parse-date'; +import { buildGraphqlBody, getMetaServerContext, GRAPHQL_ENDPOINT, metaGraphqlHeaders } from './utils'; + export const route: Route = { path: '/ai/blog', categories: ['programming'], @@ -23,82 +23,17 @@ async function handler(ctx) { const limit = Number.parseInt(ctx.req.query('limit') || 12, 10); const link = 'https://ai.meta.com/blog/'; - const res = await ofetch(link, { - headers: { - // All these headers are required - 'sec-fetch-dest': 'document', - 'sec-fetch-mode': 'navigate', - 'sec-fetch-site': 'none', - 'sec-fetch-user': '?1', - }, - }); - const $ = load(res); - const script = $('script:contains("DTSGInitialData"):first').text(); - const serverJs = JSON.parse(script.match(/\(new ServerJS\(\)\)\.handle\((\{[\s\S]*?\})\);/)?.[1] || '{}'); - - type ServerData = { - LSD: { token: string }; - SiteData: { - haste_session: string; - hsi: string; - __spin_r: number; - __spin_b: string; - __spin_t: number; - }; - }; + const { $, server } = await getMetaServerContext(link); + const friendlyName = 'MetaAIBlogRecentPostSearchQuery'; - const server: ServerData = { - LSD: { token: '' }, - SiteData: { - haste_session: '', - hsi: '', - __spin_r: 0, - __spin_b: 'trunk', - __spin_t: Date.now(), - }, - }; - - for (const obj of serverJs.define) { - const key = obj[0]; - const value = obj[2]; - server[key as keyof ServerData] = value; - } - - const data = await ofetch('https://ai.meta.com/api/graphql/', { + const data = await ofetch(GRAPHQL_ENDPOINT, { method: 'POST', - headers: { - 'content-type': 'application/x-www-form-urlencoded', - 'sec-fetch-dest': 'empty', - 'sec-fetch-mode': 'cors', - 'sec-fetch-site': 'same-origin', - 'x-asbd-id': '359341', - 'x-fb-friendly-name': 'MetaAIBlogRecentPostSearchQuery', - 'x-fb-lsd': server.LSD.token, - }, - body: new URLSearchParams({ - av: '0', - __user: '0', - __a: '1', - __req: '1', - // __hs: server.SiteData.haste_session || '', - dpr: '1', - __ccg: 'EXCELLENT', - __rev: String(server.SiteData.__spin_r || ''), - // __s: '', - // __hsi: server.SiteData.hsi || '', - // __dyn: '', - // __hsdp: '', - // __hblp: '', - lsd: server.LSD.token, - // jazoest: '', - __spin_r: String(server.SiteData.__spin_r || ''), - __spin_b: String(server.SiteData.__spin_b || 'trunk'), - __spin_t: String(server.SiteData.__spin_t || Date.now()), - fb_api_caller_class: 'RelayModern', - fb_api_req_friendly_name: 'MetaAIBlogRecentPostSearchQuery', - variables: JSON.stringify({ input: { query: '', from: 0, limit, tags: [], excludeObjectIDs: ['27568536916124137'] } }), - server_timestamps: 'true', - doc_id: '9516719638450392', + headers: metaGraphqlHeaders(server, friendlyName), + body: buildGraphqlBody({ + server, + friendlyName, + docId: '9516719638450392', + variables: { input: { query: '', from: 0, limit, tags: [], excludeObjectIDs: ['27568536916124137'] } }, }), parseResponse: JSON.parse, }); diff --git a/lib/routes/meta/ai-global-search.ts b/lib/routes/meta/ai-global-search.ts new file mode 100644 index 000000000000..c04c623be33c --- /dev/null +++ b/lib/routes/meta/ai-global-search.ts @@ -0,0 +1,129 @@ +import querystring from 'node:querystring'; + +import type { Route } from '@/types'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; + +import { buildGraphqlBody, getMetaServerContext, GRAPHQL_ENDPOINT, metaGraphqlHeaders } from './utils'; + +export const route: Route = { + path: '/ai/global-search/:routeParams?', + categories: ['programming'], + example: '/meta/ai/global-search/content_types=blog', + name: 'AI Global Search', + maintainers: ['shcheglovnd'], + url: 'ai.meta.com/global_search/', + parameters: { + routeParams: + 'URL-encoded query string of filters (path-based so each combination caches independently). Supported keys: `q` (search query), `content_types` (comma-separated: `person`, `publication`, `blog`, `dataset`, `event`, `tool`), `research_areas` (e.g. `natural-language-processing,computer-vision`), `filter_tags` (`research`, `ml-applications`, `open-source`, `developer-tools`, `ar-vr`, `hardware`), `years` (e.g. `2024,2025`), `location_cities` (publication venues like `AAAI,ACL`), `alphabetical_filter` (single letter, pairs with `content_types=person`+`sort_by=ALPHABETICAL`), `sort_by` (`RELEVANCE`, `MOST_RECENT`, `ALPHABETICAL`, `RANDOM`, default `RELEVANCE`), `offset` (default `0`).', + limit: 'Number of items to return (default `36`). Provided as a normal query string (`?limit=N`) since the cache layer keys on it.', + }, + radar: [ + { + source: ['ai.meta.com/global_search/', 'ai.meta.com/global_search', 'ai.meta.com/results/'], + }, + ], + handler, +}; + +const toList = (value: string | undefined): string[] | null => { + if (!value) { + return null; + } + const list = value + .split(',') + .map((s) => s.trim()) + .filter(Boolean); + return list.length ? list : null; +}; + +const firstString = (value: string | string[] | undefined): string | undefined => (Array.isArray(value) ? value[0] : value); + +type ResultShape = { + title: string; + description: string | null; + href: string; + image_src: string | null; + cmsid: string; + type: string; + authors: string | null; + tags: string[] | null; + location: string | null; + journal_number: string | null; + published_time: string | null; + year: string | null; +}; + +async function handler(ctx) { + const link = 'https://ai.meta.com/global_search/'; + const { server } = await getMetaServerContext(link); + + const params = querystring.parse(ctx.req.param('routeParams') || ''); + + const limit = Number.parseInt(ctx.req.query('limit') ?? '36', 10); + const offset = Number.parseInt(firstString(params.offset) ?? '0', 10); + const searchQuery = firstString(params.q) ?? firstString(params.search_query) ?? ''; + const sortBy = firstString(params.sort_by) ?? 'RELEVANCE'; + const alphabeticalFilter = firstString(params.alphabetical_filter) || null; + + const variables = { + input: { + alphabetical_filter: alphabeticalFilter, + content_types: toList(firstString(params.content_types)), + offset, + search_query: searchQuery, + sort_by: sortBy, + filter_tags: toList(firstString(params.filter_tags)), + location_cities: toList(firstString(params.location_cities)), + research_areas: toList(firstString(params.research_areas)), + years: toList(firstString(params.years)), + }, + }; + + const friendlyName = 'useFBAIGlobalSearchQuery'; + const data = await ofetch(GRAPHQL_ENDPOINT, { + method: 'POST', + headers: metaGraphqlHeaders(server, friendlyName), + body: buildGraphqlBody({ + server, + friendlyName, + docId: '9716930201759979', + variables, + }), + parseResponse: JSON.parse, + }); + + const result = data?.data?.result; + const shapes: ResultShape[] = result?.result_shapes ?? []; + + const items = shapes.slice(0, limit).map((item) => ({ + title: item.title, + description: item.description ?? '', + link: item.href?.startsWith('http') ? item.href : `https://ai.meta.com${item.href}`, + pubDate: item.published_time ? parseDate(Number(item.published_time) * 1000) : undefined, + author: item.authors || undefined, + category: [item.type, ...(item.tags ?? [])].filter(Boolean) as string[], + image: item.image_src || undefined, + guid: item.cmsid, + })); + + const filterSummary = [ + searchQuery && `q=${searchQuery}`, + variables.input.content_types && `content_types=${variables.input.content_types.join(',')}`, + variables.input.research_areas && `research_areas=${variables.input.research_areas.join(',')}`, + variables.input.filter_tags && `filter_tags=${variables.input.filter_tags.join(',')}`, + variables.input.years && `years=${variables.input.years.join(',')}`, + variables.input.location_cities && `location_cities=${variables.input.location_cities.join(',')}`, + alphabeticalFilter && `alphabetical_filter=${alphabeticalFilter}`, + ] + .filter(Boolean) + .join(' · '); + + const baseTitle = 'Meta AI Global Search'; + return { + title: filterSummary ? `${baseTitle} — ${filterSummary}` : baseTitle, + description: `Search results from ai.meta.com/global_search/ (sort: ${sortBy}, total hits: ${result?.total_hits ?? 0}).`, + link, + item: items, + }; +} diff --git a/lib/routes/meta/utils.ts b/lib/routes/meta/utils.ts new file mode 100644 index 000000000000..35adf8434dec --- /dev/null +++ b/lib/routes/meta/utils.ts @@ -0,0 +1,82 @@ +import { type CheerioAPI, load } from 'cheerio'; + +import ofetch from '@/utils/ofetch'; + +export type ServerData = { + LSD: { token: string }; + SiteData: { + haste_session: string; + hsi: string; + __spin_r: number; + __spin_b: string; + __spin_t: number; + }; +}; + +export async function getMetaServerContext(link: string): Promise<{ $: CheerioAPI; server: ServerData }> { + const res = await ofetch(link, { + headers: { + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'none', + 'sec-fetch-user': '?1', + }, + }); + const $ = load(res); + const script = $('script:contains("DTSGInitialData"):first').text(); + const serverJs = JSON.parse(script.match(/\(new ServerJS\(\)\)\.handle\((\{[\s\S]*?\})\);/)?.[1] || '{}'); + + const server: ServerData = { + LSD: { token: '' }, + SiteData: { + haste_session: '', + hsi: '', + __spin_r: 0, + __spin_b: 'trunk', + __spin_t: Date.now(), + }, + }; + + for (const obj of serverJs.define ?? []) { + const key = obj[0]; + const value = obj[2]; + server[key as keyof ServerData] = value; + } + + return { $, server }; +} + +export function buildGraphqlBody({ server, friendlyName, docId, variables }: { server: ServerData; friendlyName: string; docId: string; variables: unknown }) { + return new URLSearchParams({ + av: '0', + __user: '0', + __a: '1', + __req: '1', + dpr: '1', + __ccg: 'EXCELLENT', + __rev: String(server.SiteData.__spin_r || ''), + lsd: server.LSD.token, + __spin_r: String(server.SiteData.__spin_r || ''), + __spin_b: String(server.SiteData.__spin_b || 'trunk'), + __spin_t: String(server.SiteData.__spin_t || Date.now()), + fb_api_caller_class: 'RelayModern', + fb_api_req_friendly_name: friendlyName, + variables: JSON.stringify(variables), + server_timestamps: 'true', + doc_id: docId, + }); +} + +export function metaGraphqlHeaders(server: ServerData, friendlyName: string) { + return { + 'content-type': 'application/x-www-form-urlencoded', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'x-asbd-id': '359341', + 'x-fb-friendly-name': friendlyName, + 'x-fb-lsd': server.LSD.token, + }; +} + +export const GRAPHQL_ENDPOINT = 'https://ai.meta.com/api/graphql/'; From a7a25d373768302e1d4a0b1c5a0db0abc5bae830 Mon Sep 17 00:00:00 2001 From: Nikolai Shcheglov Date: Sun, 26 Apr 2026 16:03:15 -0500 Subject: [PATCH 2/3] refactor: lower handler complexity in ai-global-search CodeFactor flagged the handler as a Complex Method (cyclomatic complexity 16). Extract buildSearchInput, summarizeFilters, and mapItem helpers so handler becomes a thin orchestrator and each helper has a small, isolated branch count. No behavior change; verified locally against the same filter combinations as the original (default, single content_type, multi-filter with q+years+sort_by, alphabetical+person). --- lib/routes/meta/ai-global-search.ts | 110 +++++++++++++++++----------- 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/lib/routes/meta/ai-global-search.ts b/lib/routes/meta/ai-global-search.ts index c04c623be33c..d276388c9475 100644 --- a/lib/routes/meta/ai-global-search.ts +++ b/lib/routes/meta/ai-global-search.ts @@ -54,31 +54,74 @@ type ResultShape = { year: string | null; }; +type SearchInput = { + alphabetical_filter: string | null; + content_types: string[] | null; + offset: number; + search_query: string; + sort_by: string; + filter_tags: string[] | null; + location_cities: string[] | null; + research_areas: string[] | null; + years: string[] | null; +}; + +const buildSearchInput = (params: querystring.ParsedUrlQuery): SearchInput => ({ + alphabetical_filter: firstString(params.alphabetical_filter) || null, + content_types: toList(firstString(params.content_types)), + offset: Number.parseInt(firstString(params.offset) ?? '0', 10), + search_query: firstString(params.q) ?? firstString(params.search_query) ?? '', + sort_by: firstString(params.sort_by) ?? 'RELEVANCE', + filter_tags: toList(firstString(params.filter_tags)), + location_cities: toList(firstString(params.location_cities)), + research_areas: toList(firstString(params.research_areas)), + years: toList(firstString(params.years)), +}); + +const summarizeFilters = (input: SearchInput): string => { + const parts: string[] = []; + if (input.search_query) { + parts.push(`q=${input.search_query}`); + } + if (input.content_types) { + parts.push(`content_types=${input.content_types.join(',')}`); + } + if (input.research_areas) { + parts.push(`research_areas=${input.research_areas.join(',')}`); + } + if (input.filter_tags) { + parts.push(`filter_tags=${input.filter_tags.join(',')}`); + } + if (input.years) { + parts.push(`years=${input.years.join(',')}`); + } + if (input.location_cities) { + parts.push(`location_cities=${input.location_cities.join(',')}`); + } + if (input.alphabetical_filter) { + parts.push(`alphabetical_filter=${input.alphabetical_filter}`); + } + return parts.join(' · '); +}; + +const mapItem = (item: ResultShape) => ({ + title: item.title, + description: item.description ?? '', + link: item.href?.startsWith('http') ? item.href : `https://ai.meta.com${item.href}`, + pubDate: item.published_time ? parseDate(Number(item.published_time) * 1000) : undefined, + author: item.authors || undefined, + category: [item.type, ...(item.tags ?? [])].filter(Boolean) as string[], + image: item.image_src || undefined, + guid: item.cmsid, +}); + async function handler(ctx) { const link = 'https://ai.meta.com/global_search/'; const { server } = await getMetaServerContext(link); const params = querystring.parse(ctx.req.param('routeParams') || ''); - const limit = Number.parseInt(ctx.req.query('limit') ?? '36', 10); - const offset = Number.parseInt(firstString(params.offset) ?? '0', 10); - const searchQuery = firstString(params.q) ?? firstString(params.search_query) ?? ''; - const sortBy = firstString(params.sort_by) ?? 'RELEVANCE'; - const alphabeticalFilter = firstString(params.alphabetical_filter) || null; - - const variables = { - input: { - alphabetical_filter: alphabeticalFilter, - content_types: toList(firstString(params.content_types)), - offset, - search_query: searchQuery, - sort_by: sortBy, - filter_tags: toList(firstString(params.filter_tags)), - location_cities: toList(firstString(params.location_cities)), - research_areas: toList(firstString(params.research_areas)), - years: toList(firstString(params.years)), - }, - }; + const input = buildSearchInput(params); const friendlyName = 'useFBAIGlobalSearchQuery'; const data = await ofetch(GRAPHQL_ENDPOINT, { @@ -88,41 +131,20 @@ async function handler(ctx) { server, friendlyName, docId: '9716930201759979', - variables, + variables: { input }, }), parseResponse: JSON.parse, }); const result = data?.data?.result; const shapes: ResultShape[] = result?.result_shapes ?? []; + const items = shapes.slice(0, limit).map((item) => mapItem(item)); - const items = shapes.slice(0, limit).map((item) => ({ - title: item.title, - description: item.description ?? '', - link: item.href?.startsWith('http') ? item.href : `https://ai.meta.com${item.href}`, - pubDate: item.published_time ? parseDate(Number(item.published_time) * 1000) : undefined, - author: item.authors || undefined, - category: [item.type, ...(item.tags ?? [])].filter(Boolean) as string[], - image: item.image_src || undefined, - guid: item.cmsid, - })); - - const filterSummary = [ - searchQuery && `q=${searchQuery}`, - variables.input.content_types && `content_types=${variables.input.content_types.join(',')}`, - variables.input.research_areas && `research_areas=${variables.input.research_areas.join(',')}`, - variables.input.filter_tags && `filter_tags=${variables.input.filter_tags.join(',')}`, - variables.input.years && `years=${variables.input.years.join(',')}`, - variables.input.location_cities && `location_cities=${variables.input.location_cities.join(',')}`, - alphabeticalFilter && `alphabetical_filter=${alphabeticalFilter}`, - ] - .filter(Boolean) - .join(' · '); - + const filterSummary = summarizeFilters(input); const baseTitle = 'Meta AI Global Search'; return { title: filterSummary ? `${baseTitle} — ${filterSummary}` : baseTitle, - description: `Search results from ai.meta.com/global_search/ (sort: ${sortBy}, total hits: ${result?.total_hits ?? 0}).`, + description: `Search results from ai.meta.com/global_search/ (sort: ${input.sort_by}, total hits: ${result?.total_hits ?? 0}).`, link, item: items, }; From bb24ae6449bc20c65214b8fcd5371c2738eaaf10 Mon Sep 17 00:00:00 2001 From: Nikolai Shcheglov Date: Sun, 26 Apr 2026 16:16:10 -0500 Subject: [PATCH 3/3] fix(meta/ai-global-search): address auto-review feedback - Drop `limit` from `parameters` (Rule 7); only path params belong there. Move the `limit` hint into the route's `description` field. - Strip dynamic search metadata (`sort`, `total hits`) from the feed `description` (Rule 11). Keep it as a static description of the feed. --- lib/routes/meta/ai-global-search.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/routes/meta/ai-global-search.ts b/lib/routes/meta/ai-global-search.ts index d276388c9475..9f7c0fc538bb 100644 --- a/lib/routes/meta/ai-global-search.ts +++ b/lib/routes/meta/ai-global-search.ts @@ -15,9 +15,9 @@ export const route: Route = { url: 'ai.meta.com/global_search/', parameters: { routeParams: - 'URL-encoded query string of filters (path-based so each combination caches independently). Supported keys: `q` (search query), `content_types` (comma-separated: `person`, `publication`, `blog`, `dataset`, `event`, `tool`), `research_areas` (e.g. `natural-language-processing,computer-vision`), `filter_tags` (`research`, `ml-applications`, `open-source`, `developer-tools`, `ar-vr`, `hardware`), `years` (e.g. `2024,2025`), `location_cities` (publication venues like `AAAI,ACL`), `alphabetical_filter` (single letter, pairs with `content_types=person`+`sort_by=ALPHABETICAL`), `sort_by` (`RELEVANCE`, `MOST_RECENT`, `ALPHABETICAL`, `RANDOM`, default `RELEVANCE`), `offset` (default `0`).', - limit: 'Number of items to return (default `36`). Provided as a normal query string (`?limit=N`) since the cache layer keys on it.', + 'URL-encoded query string of filters (path-based so each combination caches independently). Supported keys: `q` (search query), `content_types` (comma-separated: `person`, `publication`, `blog`, `dataset`, `event`, `tool`), `research_areas` (e.g. `natural-language-processing,computer-vision`), `filter_tags` (`research`, `ml-applications`, `open-source`, `developer-tools`, `ar-vr`, `hardware`), `years` (e.g. `2024,2025`), `location_cities` (publication venues like `AAAI,ACL`), `alphabetical_filter` (single letter, pairs with `content_types=person`+`sort_by=ALPHABETICAL`), `sort_by` (`RELEVANCE`, `MOST_RECENT`, `ALPHABETICAL`, `RANDOM`, default `RELEVANCE`), `offset` (default `0`). Combine multiple filters by encoding `&` as `%26`.', }, + description: 'Page size can be tuned with the `limit` query string parameter (default `36`).', radar: [ { source: ['ai.meta.com/global_search/', 'ai.meta.com/global_search', 'ai.meta.com/results/'], @@ -144,7 +144,7 @@ async function handler(ctx) { const baseTitle = 'Meta AI Global Search'; return { title: filterSummary ? `${baseTitle} — ${filterSummary}` : baseTitle, - description: `Search results from ai.meta.com/global_search/ (sort: ${input.sort_by}, total hits: ${result?.total_hits ?? 0}).`, + description: 'Search results from ai.meta.com/global_search/.', link, item: items, };