From ede8d9370b82157a5f0ad99bf1aa89b089e408a0 Mon Sep 17 00:00:00 2001 From: ml-scout Date: Sat, 6 Jun 2026 17:20:33 +0800 Subject: [PATCH] feat(reddit): expose post_hint / url / preview / gallery on read The read command dropped Reddit's media metadata. Surface four columns from the post payload so callers can tell link/image/gallery posts apart and fetch media: - post_hint - url_overridden_by_dest - preview_image_url (decoded from preview.images[0].source.url) - gallery_urls (decoded from gallery_data / media_metadata) Empty-string / empty-array fallbacks keep the row shape stable for text posts. Manifest regenerated. --- cli-manifest.json | 6 ++++- clis/reddit/read.js | 57 +++++++++++++++++++++++++++++++++++++++- clis/reddit/read.test.js | 29 ++++++++++++++++---- 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/cli-manifest.json b/cli-manifest.json index 36d8255f5..2878f6357 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -25050,7 +25050,11 @@ "type", "author", "score", - "text" + "text", + "post_hint", + "url_overridden_by_dest", + "preview_image_url", + "gallery_urls" ], "type": "js", "modulePath": "reddit/read.js", diff --git a/clis/reddit/read.js b/clis/reddit/read.js index 027d3eafa..4ce468230 100644 --- a/clis/reddit/read.js +++ b/clis/reddit/read.js @@ -125,7 +125,7 @@ cli({ help: `Max expansion passes when --expand-more is on (${REDDIT_EXPAND_ROUNDS_MIN}–${REDDIT_EXPAND_ROUNDS_MAX}; each round can fan out new "more" stubs)`, }, ], - columns: ['type', 'author', 'score', 'text'], + columns: ['type', 'author', 'score', 'text', 'post_hint', 'url_overridden_by_dest', 'preview_image_url', 'gallery_urls'], func: async (page, kwargs) => { // Note: --limit / --depth / --replies / --max-length keep their original // Math.max-style behaviour for backward compatibility (grandfathered in @@ -161,6 +161,40 @@ cli({ // `text`) to sidestep the silent-column-drop audit (PR #1329). const result = await page.evaluate(` (async function() { + function decodeHtml(s) { + if (typeof s !== 'string' || !s) return ''; + return s + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/gi, "'") + .replace(/'/g, "'"); + } + function extractRedditMedia(d) { + var post_hint = (d && d.post_hint) || ''; + var url_overridden_by_dest = (d && d.url_overridden_by_dest) || ''; + var preview_image_url = decodeHtml( + (d && d.preview && d.preview.images && d.preview.images[0] && d.preview.images[0].source && d.preview.images[0].source.url) || '' + ); + var gallery_urls = []; + var items = d && d.gallery_data && d.gallery_data.items; + var meta = d && d.media_metadata; + if (Array.isArray(items) && meta) { + for (var gi = 0; gi < items.length; gi++) { + var it = items[gi]; + var m = it && meta[it.media_id]; + var u = m && m.s && m.s.u; + if (u) gallery_urls.push(decodeHtml(u)); + } + } + return { + post_hint: post_hint, + url_overridden_by_dest: url_overridden_by_dest, + preview_image_url: preview_image_url, + gallery_urls: gallery_urls, + }; + } var postId = ${JSON.stringify(postId)}; var linkFullname = 't3_' + postId; @@ -392,11 +426,16 @@ cli({ // Post header row. var body = post.selftext || ''; if (body.length > maxLength) body = body.slice(0, maxLength) + '\\n... [truncated]'; + var postMedia = extractRedditMedia(post); rows.push({ type: 'POST', author: post.author || '[deleted]', score: post.score || 0, text: post.title + (body ? '\\n\\n' + body : '') + (post.url && !post.is_self ? '\\n' + post.url : ''), + post_hint: postMedia.post_hint, + url_overridden_by_dest: postMedia.url_overridden_by_dest, + preview_image_url: postMedia.preview_image_url, + gallery_urls: postMedia.gallery_urls, }); // Recursive comment walker. @@ -418,6 +457,10 @@ cli({ author: d.author || '[deleted]', score: d.score || 0, text: indentedBody, + post_hint: '', + url_overridden_by_dest: '', + preview_image_url: '', + gallery_urls: [], }); var t1Children = []; @@ -443,6 +486,10 @@ cli({ author: '', score: '', text: cutoffIndent + '[+' + totalHidden + ' more replies]', + post_hint: '', + url_overridden_by_dest: '', + preview_image_url: '', + gallery_urls: [], }); } return; @@ -463,6 +510,10 @@ cli({ author: '', score: '', text: moreIndent + '[+' + hidden + ' more replies]', + post_hint: '', + url_overridden_by_dest: '', + preview_image_url: '', + gallery_urls: [], }); } } @@ -489,6 +540,10 @@ cli({ author: '', score: '', text: '[+' + hiddenTopLevel + ' more top-level comments]', + post_hint: '', + url_overridden_by_dest: '', + preview_image_url: '', + gallery_urls: [], }); } diff --git a/clis/reddit/read.test.js b/clis/reddit/read.test.js index cd173b61a..e89591d3b 100644 --- a/clis/reddit/read.test.js +++ b/clis/reddit/read.test.js @@ -81,7 +81,18 @@ describe('reddit read adapter', () => { it('uses an ephemeral Reddit site tab by default', () => { expect(command?.browser).toBe(true); expect(command?.siteSession).toBeUndefined(); - expect(command?.columns).toEqual(['type', 'author', 'score', 'text']); + expect(command?.columns).toEqual([ + 'type', 'author', 'score', 'text', + 'post_hint', 'url_overridden_by_dest', 'preview_image_url', 'gallery_urls', + ]); + }); + + it('embeds extractRedditMedia in the browser-evaluated source and applies it to the POST row', async () => { + const page = makePage({ kind: 'ok', rows: [], expandMeta: { rounds: 0, fetched: 0, capped: false, errors: [] } }); + await command.func(page, { 'post-id': 'abc123', limit: 5 }); + const src = page.evaluate.mock.calls[0][0]; + expect(src).toContain('function extractRedditMedia'); + expect(src).toContain('var postMedia = extractRedditMedia(post)'); }); it('exposes the new --expand-more / --expand-rounds args', () => { @@ -153,16 +164,24 @@ describe('reddit read adapter', () => { const page = makePage({ kind: 'ok', rows: [ - { type: 'POST', author: 'alice', score: 10, text: 'Title' }, - { type: 'L0', author: 'bob', score: 5, text: 'Comment' }, + { type: 'POST', author: 'alice', score: 10, text: 'Title', + post_hint: 'image', url_overridden_by_dest: 'https://i.redd.it/a.jpg', + preview_image_url: 'https://preview.redd.it/a.jpg?width=640', + gallery_urls: [] }, + { type: 'L0', author: 'bob', score: 5, text: 'Comment', + post_hint: '', url_overridden_by_dest: '', preview_image_url: '', gallery_urls: [] }, ], expandMeta: { rounds: 0, fetched: 0, capped: false, errors: [] }, }); const result = await command.func(page, { 'post-id': 'abc123', limit: 5 }); expect(page.goto).toHaveBeenCalledWith('https://www.reddit.com'); expect(result).toEqual([ - { type: 'POST', author: 'alice', score: 10, text: 'Title' }, - { type: 'L0', author: 'bob', score: 5, text: 'Comment' }, + { type: 'POST', author: 'alice', score: 10, text: 'Title', + post_hint: 'image', url_overridden_by_dest: 'https://i.redd.it/a.jpg', + preview_image_url: 'https://preview.redd.it/a.jpg?width=640', + gallery_urls: [] }, + { type: 'L0', author: 'bob', score: 5, text: 'Comment', + post_hint: '', url_overridden_by_dest: '', preview_image_url: '', gallery_urls: [] }, ]); });