From dc8dea6913169e98d6f8adbde813db2536ccbbec Mon Sep 17 00:00:00 2001 From: ubuntu Date: Wed, 13 May 2026 11:06:54 +0800 Subject: [PATCH 1/6] feat(route): add Fars News showcase --- lib/routes/farsnews/namespace.ts | 7 +++ lib/routes/farsnews/showcase.ts | 84 ++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 lib/routes/farsnews/namespace.ts create mode 100644 lib/routes/farsnews/showcase.ts diff --git a/lib/routes/farsnews/namespace.ts b/lib/routes/farsnews/namespace.ts new file mode 100644 index 000000000000..9216b08d6be6 --- /dev/null +++ b/lib/routes/farsnews/namespace.ts @@ -0,0 +1,7 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'Fars News', + url: 'farsnews.ir', + lang: 'fa', +}; diff --git a/lib/routes/farsnews/showcase.ts b/lib/routes/farsnews/showcase.ts new file mode 100644 index 000000000000..6080e3b3c78b --- /dev/null +++ b/lib/routes/farsnews/showcase.ts @@ -0,0 +1,84 @@ +import { load } from 'cheerio'; +import type { Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { parseDate } from '@/utils/parse-date'; + +export const route: Route = { + path: '/showcase/:category?', + categories: ['traditional-media'], + example: '/farsnews/showcase', + parameters: { category: 'Category slug from farsnews.ir/showcase URL' }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [{ + source: ['farsnews.ir/showcase'], + target: '/showcase', + }], + name: 'Showcase', + maintainers: [], + handler, + description: 'Fars News showcase articles. Persian news agency.', +}; + +async function handler(ctx) { + const category = ctx.req.param('category') ?? ''; + const baseUrl = 'https://farsnews.ir'; + const currentUrl = category ? `${baseUrl}/showcase/${category}` : `${baseUrl}/showcase`; + + const response = await got({ method: 'get', url: currentUrl }); + const $ = load(response.data); + + const items = $('a[href^="/"]') + .toArray() + .map((item) => { + item = $(item); + const href = item.attr('href'); + const title = item.find('h2, h3').first().text().trim() || item.text().trim(); + + if (!href || !title || !href.match(/^\/[^\/]+\/\d+\//)) { + return null; + } + + return { + title, + link: `${baseUrl}${href}`, + }; + }) + .filter((item) => item !== null) + .filter((item, index, self) => self.findIndex((i) => i.link === item.link) === index); + + const processedItems = await Promise.all( + items.map((item) => + cache.tryGet(item.link, async () => { + try { + const detailResponse = await got({ method: 'get', url: item.link }); + const detail$ = load(detailResponse.data); + + const desc = detail$('meta[name="description"]').attr('content') || ''; + item.description = desc; + + const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text(); + if (timeText) { + item.pubDate = parseDate(timeText); + } + } catch { + // Silently continue if detail fetch fails + } + return item; + }) + ) + ); + + return { + title: 'Fars News - Showcase', + link: currentUrl, + item: processedItems, + }; +} From 057e04409415cd09b17d14ab728084bef90b5b9f Mon Sep 17 00:00:00 2001 From: ubuntu Date: Wed, 13 May 2026 14:51:41 +0800 Subject: [PATCH 2/6] style: fix oxlint - use RegExp.test() and remove unnecessary escape --- lib/routes/farsnews/showcase.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/farsnews/showcase.ts b/lib/routes/farsnews/showcase.ts index 6080e3b3c78b..0804c49f7424 100644 --- a/lib/routes/farsnews/showcase.ts +++ b/lib/routes/farsnews/showcase.ts @@ -42,7 +42,7 @@ async function handler(ctx) { const href = item.attr('href'); const title = item.find('h2, h3').first().text().trim() || item.text().trim(); - if (!href || !title || !href.match(/^\/[^\/]+\/\d+\//)) { + if (!href || !title || !/^\/[^/]+\/\d+\//.test(href)) { return null; } From 8fa7840f0ed781efda96df3e4bd589399facadbe Mon Sep 17 00:00:00 2001 From: github-oysl Date: Thu, 14 May 2026 12:22:41 +0800 Subject: [PATCH 3/6] fix(route/farsnews): add maintainer and remove unnecessary try-catch --- lib/routes/farsnews/showcase.ts | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/lib/routes/farsnews/showcase.ts b/lib/routes/farsnews/showcase.ts index 0804c49f7424..b92d60ac6eee 100644 --- a/lib/routes/farsnews/showcase.ts +++ b/lib/routes/farsnews/showcase.ts @@ -22,7 +22,7 @@ export const route: Route = { target: '/showcase', }], name: 'Showcase', - maintainers: [], + maintainers: ['github-oysl'], handler, description: 'Fars News showcase articles. Persian news agency.', }; @@ -57,19 +57,15 @@ async function handler(ctx) { const processedItems = await Promise.all( items.map((item) => cache.tryGet(item.link, async () => { - try { - const detailResponse = await got({ method: 'get', url: item.link }); - const detail$ = load(detailResponse.data); + const detailResponse = await got({ method: 'get', url: item.link }); + const detail$ = load(detailResponse.data); - const desc = detail$('meta[name="description"]').attr('content') || ''; - item.description = desc; + const desc = detail$('meta[name="description"]').attr('content') || ''; + item.description = desc; - const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text(); - if (timeText) { - item.pubDate = parseDate(timeText); - } - } catch { - // Silently continue if detail fetch fails + const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text(); + if (timeText) { + item.pubDate = parseDate(timeText); } return item; }) From 297f8fbaadd76e9b9b0521a2d30beb9de09e5d89 Mon Sep 17 00:00:00 2001 From: github-oysl Date: Mon, 18 May 2026 10:45:09 +0800 Subject: [PATCH 4/6] fix(route/farsnews): correct radar target and use hydration data for description --- lib/routes/farsnews/showcase.ts | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/lib/routes/farsnews/showcase.ts b/lib/routes/farsnews/showcase.ts index b92d60ac6eee..f99384d6b42c 100644 --- a/lib/routes/farsnews/showcase.ts +++ b/lib/routes/farsnews/showcase.ts @@ -19,7 +19,7 @@ export const route: Route = { }, radar: [{ source: ['farsnews.ir/showcase'], - target: '/showcase', + target: '/farsnews/showcase', }], name: 'Showcase', maintainers: ['github-oysl'], @@ -60,8 +60,31 @@ async function handler(ctx) { const detailResponse = await got({ method: 'get', url: item.link }); const detail$ = load(detailResponse.data); - const desc = detail$('meta[name="description"]').attr('content') || ''; - item.description = desc; + // Try to extract full article content from hydration data first + const hydrationScript = detail$('script') + .toArray() + .map((script) => detail$(script).html()) + .find((html) => html?.includes('window.__hydrationDataString')); + + if (hydrationScript) { + const match = hydrationScript.match(/window\.__hydrationDataString\s*=\s*'([^']+)'/); + if (match) { + try { + const hydrationData = JSON.parse(match[1]); + const articleBody = hydrationData?.article?.body || hydrationData?.content?.body || ''; + if (articleBody) { + item.description = articleBody; + } + } catch { + // Fall back to meta description if hydration parse fails + } + } + } + + // Fallback to meta description if no hydration data + if (!item.description) { + item.description = detail$('meta[name="description"]').attr('content') || ''; + } const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text(); if (timeText) { From 61aa8d3596603f859c6b28db15863a1464fadb54 Mon Sep 17 00:00:00 2001 From: github-oysl Date: Mon, 18 May 2026 10:57:29 +0800 Subject: [PATCH 5/6] refactor(route/farsnews): use hydration data for list page, remove anti-pattern selector --- lib/routes/farsnews/showcase.ts | 129 ++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 41 deletions(-) diff --git a/lib/routes/farsnews/showcase.ts b/lib/routes/farsnews/showcase.ts index f99384d6b42c..83285bfa921d 100644 --- a/lib/routes/farsnews/showcase.ts +++ b/lib/routes/farsnews/showcase.ts @@ -27,69 +27,116 @@ export const route: Route = { description: 'Fars News showcase articles. Persian news agency.', }; +function extractHydrationData(html: string): any { + const $ = load(html); + const hydrationScript = $('script') + .toArray() + .map((script) => $(script).html()) + .find((html) => html?.includes('window.__hydrationDataString')); + + if (hydrationScript) { + const match = hydrationScript.match(/window\.__hydrationDataString\s*=\s*'([^']+)'/); + if (match) { + try { + return JSON.parse(match[1]); + } catch { + return null; + } + } + } + return null; +} + async function handler(ctx) { const category = ctx.req.param('category') ?? ''; const baseUrl = 'https://farsnews.ir'; const currentUrl = category ? `${baseUrl}/showcase/${category}` : `${baseUrl}/showcase`; const response = await got({ method: 'get', url: currentUrl }); - const $ = load(response.data); + const hydrationData = extractHydrationData(response.data); - const items = $('a[href^="/"]') - .toArray() - .map((item) => { - item = $(item); - const href = item.attr('href'); - const title = item.find('h2, h3').first().text().trim() || item.text().trim(); + // Try to get articles from hydration data first + let items: any[] = []; + if (hydrationData?.articles) { + items = hydrationData.articles.map((article: any) => ({ + title: article.title || '', + link: article.url ? `${baseUrl}${article.url}` : `${baseUrl}/showcase`, + pubDate: article.published_at ? parseDate(article.published_at) : undefined, + description: article.lead || article.summary || '', + })); + } else if (hydrationData?.showcase?.articles) { + items = hydrationData.showcase.articles.map((article: any) => ({ + title: article.title || '', + link: article.url ? `${baseUrl}${article.url}` : `${baseUrl}/showcase`, + pubDate: article.published_at ? parseDate(article.published_at) : undefined, + description: article.lead || article.summary || '', + })); + } else if (hydrationData?.data?.articles) { + items = hydrationData.data.articles.map((article: any) => ({ + title: article.title || '', + link: article.url ? `${baseUrl}${article.url}` : `${baseUrl}/showcase`, + pubDate: article.published_at ? parseDate(article.published_at) : undefined, + description: article.lead || article.summary || '', + })); + } - if (!href || !title || !/^\/[^/]+\/\d+\//.test(href)) { - return null; - } + // Fallback to cheerio if hydration data doesn't contain articles + if (items.length === 0) { + const $ = load(response.data); + items = $('a[href^="/"]') + .toArray() + .map((item) => { + item = $(item); + const href = item.attr('href'); + const title = item.find('h2, h3').first().text().trim() || item.text().trim(); - return { - title, - link: `${baseUrl}${href}`, - }; - }) - .filter((item) => item !== null) - .filter((item, index, self) => self.findIndex((i) => i.link === item.link) === index); + if (!href || !title || !/^\/[^/]+\/\d+\//.test(href)) { + return null; + } + + return { + title, + link: `${baseUrl}${href}`, + }; + }) + .filter((item) => item !== null) + .filter((item, index, self) => self.findIndex((i) => i.link === item.link) === index); + } + // Fetch detail pages for full description if not available from list const processedItems = await Promise.all( items.map((item) => cache.tryGet(item.link, async () => { + // Skip detail fetch if we already have description from list page + if (item.description && item.description.length > 50) { + return item; + } + const detailResponse = await got({ method: 'get', url: item.link }); - const detail$ = load(detailResponse.data); - - // Try to extract full article content from hydration data first - const hydrationScript = detail$('script') - .toArray() - .map((script) => detail$(script).html()) - .find((html) => html?.includes('window.__hydrationDataString')); - - if (hydrationScript) { - const match = hydrationScript.match(/window\.__hydrationDataString\s*=\s*'([^']+)'/); - if (match) { - try { - const hydrationData = JSON.parse(match[1]); - const articleBody = hydrationData?.article?.body || hydrationData?.content?.body || ''; - if (articleBody) { - item.description = articleBody; - } - } catch { - // Fall back to meta description if hydration parse fails - } + const detailHydration = extractHydrationData(detailResponse.data); + + if (detailHydration) { + const articleBody = detailHydration?.article?.body || detailHydration?.content?.body || detailHydration?.data?.article?.body || ''; + if (articleBody) { + item.description = articleBody; } } - // Fallback to meta description if no hydration data + // Fallback to meta description if (!item.description) { + const detail$ = load(detailResponse.data); item.description = detail$('meta[name="description"]').attr('content') || ''; } - const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text(); - if (timeText) { - item.pubDate = parseDate(timeText); + // Extract pubDate from detail page if not available from list + if (!item.pubDate) { + const detail$ = load(detailResponse.data); + const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text(); + if (timeText) { + item.pubDate = parseDate(timeText); + } } + return item; }) ) From 63c631c03757dcce21759a6c9dace7a0d9788106 Mon Sep 17 00:00:00 2001 From: github-oysl Date: Mon, 18 May 2026 11:27:46 +0800 Subject: [PATCH 6/6] style(route/farsnews): fix oxlint issues --- lib/routes/farsnews/showcase.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/routes/farsnews/showcase.ts b/lib/routes/farsnews/showcase.ts index 83285bfa921d..606c2d3bb625 100644 --- a/lib/routes/farsnews/showcase.ts +++ b/lib/routes/farsnews/showcase.ts @@ -1,8 +1,8 @@ -import { load } from 'cheerio'; import type { Route } from '@/types'; import cache from '@/utils/cache'; import got from '@/utils/got'; import { parseDate } from '@/utils/parse-date'; +import { load } from 'cheerio'; export const route: Route = { path: '/showcase/:category?', @@ -35,7 +35,7 @@ function extractHydrationData(html: string): any { .find((html) => html?.includes('window.__hydrationDataString')); if (hydrationScript) { - const match = hydrationScript.match(/window\.__hydrationDataString\s*=\s*'([^']+)'/); + const match = /window\.__hydrationDataString\s*=\s*'([^']+)'/.exec(hydrationScript); if (match) { try { return JSON.parse(match[1]);