Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lib/routes/farsnews/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'Fars News',
url: 'farsnews.ir',
lang: 'fa',
};
150 changes: 150 additions & 0 deletions lib/routes/farsnews/showcase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import type { Route } from '@/types';

Check failure

Code scanning / oxlint

simple-import-sort(imports) Error

Run autofix to sort these imports!
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import { load } from 'cheerio';

export const route: Route = {
path: '/showcase/:category?',
categories: ['traditional-media'],
example: '/farsnews/showcase',
parameters: { category: 'Category slug from farsnews.ir/showcase URL' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [{
source: ['farsnews.ir/showcase'],
target: '/farsnews/showcase',
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
target: '/farsnews/showcase',
target: '/showcase',

}],
name: 'Showcase',
maintainers: ['github-oysl'],
handler,
description: 'Fars News showcase articles. Persian news agency.',
};

function extractHydrationData(html: string): any {
const $ = load(html);
const hydrationScript = $('script')
.toArray()
.map((script) => $(script).html())
.find((html) => html?.includes('window.__hydrationDataString'));

if (hydrationScript) {
const match = /window\.__hydrationDataString\s*=\s*'([^']+)'/.exec(hydrationScript);
if (match) {
try {
return JSON.parse(match[1]);
} catch {
return null;
}
}
}
return null;
}

async function handler(ctx) {
const category = ctx.req.param('category') ?? '';
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unncessary ?? ''. If one visits the route without category, ctx.req.param('category') will be undefined. undefined is a falsy value which will work for

const currentUrl = category ? `${baseUrl}/showcase/${category}` : `${baseUrl}/showcase`;

const baseUrl = 'https://farsnews.ir';
const currentUrl = category ? `${baseUrl}/showcase/${category}` : `${baseUrl}/showcase`;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The site places 'showcase' after category. Are you sure ${baseUrl}/showcase/${category} is a proper URL?


const response = await got({ method: 'get', url: currentUrl });
const hydrationData = extractHydrationData(response.data);

// Try to get articles from hydration data first
let items: any[] = [];
if (hydrationData?.articles) {
items = hydrationData.articles.map((article: any) => ({
title: article.title || '',
link: article.url ? `${baseUrl}${article.url}` : `${baseUrl}/showcase`,
pubDate: article.published_at ? parseDate(article.published_at) : undefined,
description: article.lead || article.summary || '',
}));
} else if (hydrationData?.showcase?.articles) {
items = hydrationData.showcase.articles.map((article: any) => ({
title: article.title || '',
link: article.url ? `${baseUrl}${article.url}` : `${baseUrl}/showcase`,
pubDate: article.published_at ? parseDate(article.published_at) : undefined,
description: article.lead || article.summary || '',
}));
} else if (hydrationData?.data?.articles) {
items = hydrationData.data.articles.map((article: any) => ({
title: article.title || '',
link: article.url ? `${baseUrl}${article.url}` : `${baseUrl}/showcase`,
pubDate: article.published_at ? parseDate(article.published_at) : undefined,
description: article.lead || article.summary || '',
}));
}
Comment on lines +60 to +81
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These extracted nothing from window.__hydrationDataString as the property is apiCache.


// Fallback to cheerio if hydration data doesn't contain articles
if (items.length === 0) {
const $ = load(response.data);
items = $('a[href^="/"]')
.toArray()
.map((item) => {
item = $(item);
const href = item.attr('href');
const title = item.find('h2, h3').first().text().trim() || item.text().trim();

if (!href || !title || !/^\/[^/]+\/\d+\//.test(href)) {
return null;
}

return {
title,
link: `${baseUrl}${href}`,
};
})
.filter((item) => item !== null)
.filter((item, index, self) => self.findIndex((i) => i.link === item.link) === index);
}

// Fetch detail pages for full description if not available from list
const processedItems = await Promise.all(
items.map((item) =>
cache.tryGet(item.link, async () => {
// Skip detail fetch if we already have description from list page
if (item.description && item.description.length > 50) {
return item;
}

const detailResponse = await got({ method: 'get', url: item.link });
const detailHydration = extractHydrationData(detailResponse.data);

if (detailHydration) {
const articleBody = detailHydration?.article?.body || detailHydration?.content?.body || detailHydration?.data?.article?.body || '';
if (articleBody) {
item.description = articleBody;
}
}

// Fallback to meta description
if (!item.description) {
const detail$ = load(detailResponse.data);
item.description = detail$('meta[name="description"]').attr('content') || '';
}

// Extract pubDate from detail page if not available from list
if (!item.pubDate) {
const detail$ = load(detailResponse.data);
const timeText = detail$('time').attr('datetime') || detail$('.text-gray-400').first().text();
if (timeText) {
item.pubDate = parseDate(timeText);
}
}

return item;
})
)
);
Comment on lines +55 to +143
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out the site has an API, so use the official API endpoint instead.

import { randomBytes, webcrypto } from 'node:crypto';
import { Packr } from 'msgpackr';

const subtle = webcrypto.subtle;
const packr = new Packr({ encodeUndefinedAsNil: true, useRecords: false });
function generateDuid() {
    const id = Date.now().toString(32) + Math.floor(10000000 * Math.random() + 1000000).toString(32) + '-web';
    return id.padStart(16, '0');
}

function deriveKey(duid) {
    const raw = new TextEncoder().encode(duid).slice(0, 16);
    return subtle.importKey('raw', raw, { name: 'AES-GCM' }, true, ['encrypt', 'decrypt']);
}

async function encrypt(key, plaintext) {
    const iv = randomBytes(12);
    const ct = await subtle.encrypt({ name: 'AES-GCM', tagLength: 128, iv }, key, plaintext);
    const ctArray = new Uint8Array(ct);
    const out = new Uint8Array(iv.length + ctArray.length);
    out.set(iv, 0);
    out.set(ctArray, iv.length);
    return out;
}

async function decrypt(key, payload) {
    const iv = payload.slice(0, 12);
    const ct = payload.slice(12);
    const pt = await subtle.decrypt({ name: 'AES-GCM', tagLength: 128, iv }, key, ct);
    return new Uint8Array(pt);
}
async function handler(ctx) {
	// ...
    const duid = generateDuid();
    const key = await deriveKey(duid);
    const headers = {
        'accept-language': 'fa',
        duid,
        platform: 'web',
        os: 'macOS',
        'app-version': '1',
        'api-version': '1',
    };
    const body: {
        location: string;
        showcaseType: 'global' | 'user';
        userID?: string;
    } = {
        location: 'showcase',
        showcaseType: category ? 'user' : 'global',
    };

    if (category) {
        const packed = packr.pack({
            usernames: [category],
        });
        const encryptedBody = await encrypt(key, packed);

        const response = await ofetch('https://api.farsnews.ir/user/info', {
            headers,
            method: 'POST',
            body: encryptedBody,
            responseType: 'arrayBuffer',
        });

        const decrypted = await decrypt(key, response);
        const { data } = packr.unpack(decrypted);
        body.userID = data.users[0].userID;
    }

    const packed = packr.pack(body);
    // encrypt

    const response = await ofetch('same base api url as above/showcase/block/listV2', {
        // same options as the category request
    });

    // decrypt and unpack

    const list = data.centerBlocks
        .find((b) => b.type === 'card-list')
        .tabs[0].contents.map((c) => {
            const post = data.showcaseContents.find((p) => p.postID === c.postID);
            const author = data.usersSummary.find((u) => u.userID === post.userID);
            return {
                title: // ...,
                description: // ...,
                link: `${baseUrl}/${author.username}/${c.postID}`,
                pubDate: // ...,
                author: // ...,
                postId: c.postID,
            };
        });

and if you want full article

const packed = packr.pack({
    postID: item.postId,
    checkPublicVisibility: false,
});

// encrypt
const response = await ofetch('same base api url as above/post/get', {
    // same options as the category request
});
// decrypt and unpack


return {
title: 'Fars News - Showcase',
link: currentUrl,
item: processedItems,
};
}