Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cli-manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -34355,6 +34355,8 @@
"columns": [
"rank",
"author",
"userId",
"profileUrl",
"text",
"likes",
"time",
Expand Down
44 changes: 39 additions & 5 deletions clis/xiaohongshu/comments.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,19 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { AuthRequiredError, CliError, EmptyResultError } from '@jackwener/opencli/errors';
import { parseNoteId, buildNoteUrl } from './note-helpers.js';
import { normalizeXhsUserId } from './user-helpers.js';

const XHS_PROFILE_HREF_SELECTOR = '.author-wrapper a[href*="/user/profile/"], a.name[href*="/user/profile/"], a.user-name[href*="/user/profile/"], a[href*="/user/profile/"]';

export function buildXhsProfileUrl(href, webHost = 'www.xiaohongshu.com') {
const raw = typeof href === 'string' ? href.trim() : '';
if (!raw)
return '';
const userId = normalizeXhsUserId(raw);
if (!userId)
return '';
return `https://${webHost}/user/profile/${userId}`;
}
export function parseCommentLimit(raw, fallback = 20) {
const n = Number(raw);
if (!Number.isFinite(n))
Expand Down Expand Up @@ -69,6 +82,12 @@ export function buildCommentsExtractJs(withReplies) {
const parseLikes = (el) => {
return parseLikeCountText(clean(el))
}
const HREF_SELECTOR = ${JSON.stringify(XHS_PROFILE_HREF_SELECTOR)}
const extractAuthorHref = (el) => {
if (!el) return ''
const anchor = el.querySelector(HREF_SELECTOR)
return anchor ? (anchor.getAttribute('href') || '') : ''
}
const expandReplyThreads = async (root) => {
if (!withReplies || !root) return
const clickedTexts = new Set()
Expand Down Expand Up @@ -98,23 +117,25 @@ export function buildCommentsExtractJs(withReplies) {
if (!item) continue

const author = clean(item.querySelector('.author-wrapper .name, .user-name'))
const authorHrefRaw = extractAuthorHref(item)
const text = clean(item.querySelector('.content, .note-text'))
const likes = parseLikes(item.querySelector('.count'))
const time = clean(item.querySelector('.date, .time'))

if (!text) continue
results.push({ author, text, likes, time, is_reply: false, reply_to: '' })
results.push({ author, authorHrefRaw, text, likes, time, is_reply: false, reply_to: '' })

// Extract nested replies (楼中楼)
if (withReplies) {
await expandReplyThreads(p)
p.querySelectorAll('.reply-container .comment-item-sub, .sub-comment-list .comment-item').forEach(sub => {
const sAuthor = clean(sub.querySelector('.name, .user-name'))
const sAuthorHrefRaw = extractAuthorHref(sub)
const sText = clean(sub.querySelector('.content, .note-text'))
const sLikes = parseLikes(sub.querySelector('.count'))
const sTime = clean(sub.querySelector('.date, .time'))
if (!sText) return
results.push({ author: sAuthor, text: sText, likes: sLikes, time: sTime, is_reply: true, reply_to: author })
results.push({ author: sAuthor, authorHrefRaw: sAuthorHrefRaw, text: sText, likes: sLikes, time: sTime, is_reply: true, reply_to: author })
})
}
}
Expand All @@ -136,7 +157,7 @@ export const command = cli({
{ name: 'limit', type: 'int', default: 20, help: 'Number of top-level comments (max 50)' },
{ name: 'with-replies', type: 'boolean', default: false, help: 'Include nested replies (楼中楼)' },
],
columns: ['rank', 'author', 'text', 'likes', 'time', 'is_reply', 'reply_to'],
columns: ['rank', 'author', 'userId', 'profileUrl', 'text', 'likes', 'time', 'is_reply', 'reply_to'],
func: async (page, kwargs) => {
const limit = parseCommentLimit(kwargs.limit);
const withReplies = Boolean(kwargs['with-replies']);
Expand All @@ -159,6 +180,19 @@ export const command = cli({
// noteId currently unused after parsing — kept for symmetry with the note command
void noteId;
const all = data.results ?? [];
// authorHrefRaw is a raw transport field from the extractor; it is consumed
// here into userId / profileUrl and intentionally not part of the row shape.
const enrich = (c, i) => ({
rank: i + 1,
author: c.author,
userId: c.authorHrefRaw ? normalizeXhsUserId(c.authorHrefRaw) : '',
profileUrl: c.authorHrefRaw ? buildXhsProfileUrl(c.authorHrefRaw) : '',
text: c.text,
likes: c.likes,
time: c.time,
is_reply: c.is_reply,
reply_to: c.reply_to,
});
// When limiting, count only top-level comments; their replies are included for free
if (withReplies) {
const limited = [];
Expand All @@ -170,8 +204,8 @@ export const command = cli({
break;
limited.push(c);
}
return limited.map((c, i) => ({ rank: i + 1, ...c }));
return limited.map(enrich);
}
return all.slice(0, limit).map((c, i) => ({ rank: i + 1, ...c }));
return all.slice(0, limit).map(enrich);
},
});
60 changes: 57 additions & 3 deletions clis/xiaohongshu/comments.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, expect, it, vi } from 'vitest';
import { JSDOM } from 'jsdom';
import { getRegistry } from '@jackwener/opencli/registry';
import { buildCommentsExtractJs, parseXhsLikeCountText } from './comments.js';
import { buildCommentsExtractJs, buildXhsProfileUrl, parseXhsLikeCountText } from './comments.js';
function createPageMock(evaluateResult) {
return {
goto: vi.fn().mockResolvedValue(undefined),
Expand Down Expand Up @@ -178,10 +178,34 @@ describe('xiaohongshu comments', () => {
`);

expect(data.results).toEqual([
{ author: 'Alice', text: 'Great note', likes: 21000, time: 'today', is_reply: false, reply_to: '' },
{ author: 'Bob', text: 'Malformed count', likes: 0, time: '', is_reply: false, reply_to: '' },
{ author: 'Alice', authorHrefRaw: '', text: 'Great note', likes: 21000, time: 'today', is_reply: false, reply_to: '' },
{ author: 'Bob', authorHrefRaw: '', text: 'Malformed count', likes: 0, time: '', is_reply: false, reply_to: '' },
]);
});
it('extracts authorHrefRaw from /user/profile/ anchor wrapping the name', async () => {
const data = await runCommentsExtract(`
<main>
<section class="parent-comment">
<div class="comment-item">
<div class="author-wrapper"><a class="name" href="/user/profile/5e8a1b2c3d4e5f6a7b8c9d0e?xsec_token=tok">Alice</a></div>
<div class="content">Hi</div>
<span class="count">1</span>
<span class="date">today</span>
</div>
</section>
<section class="parent-comment">
<div class="comment-item">
<a class="user-name" href="https://www.xiaohongshu.com/user/profile/abc123def456">Bob</a>
<div class="note-text">Hey</div>
</div>
</section>
</main>
`);
expect(data.results[0].author).toBe('Alice');
expect(data.results[0].authorHrefRaw).toBe('/user/profile/5e8a1b2c3d4e5f6a7b8c9d0e?xsec_token=tok');
expect(data.results[1].author).toBe('Bob');
expect(data.results[1].authorHrefRaw).toBe('https://www.xiaohongshu.com/user/profile/abc123def456');
});
it('respects the limit for top-level comments', async () => {
const manyComments = Array.from({ length: 10 }, (_, i) => ({
author: `User${i}`,
Expand All @@ -200,6 +224,36 @@ describe('xiaohongshu comments', () => {
expect(result[0].rank).toBe(1);
expect(result[2].rank).toBe(3);
});
it('enriches each row with userId and profileUrl derived from authorHrefRaw', async () => {
const page = createPageMock({
loginWall: false,
results: [
{ author: 'Alice', authorHrefRaw: '/user/profile/abc123?xsec_token=tok', text: 'hi', likes: 1, time: 't', is_reply: false, reply_to: '' },
{ author: 'Bob', authorHrefRaw: 'https://www.xiaohongshu.com/user/profile/xyz789', text: 'hey', likes: 0, time: '', is_reply: false, reply_to: '' },
{ author: 'Anon', authorHrefRaw: '', text: 'no link', likes: 0, time: '', is_reply: false, reply_to: '' },
],
});
const result = (await command.func(page, {
'note-id': 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok',
limit: 5,
}));
expect(result).toHaveLength(3);
expect(result[0]).toMatchObject({ rank: 1, author: 'Alice', userId: 'abc123', profileUrl: 'https://www.xiaohongshu.com/user/profile/abc123' });
expect(result[1]).toMatchObject({ rank: 2, author: 'Bob', userId: 'xyz789', profileUrl: 'https://www.xiaohongshu.com/user/profile/xyz789' });
expect(result[2]).toMatchObject({ rank: 3, author: 'Anon', userId: '', profileUrl: '' });
// the raw transport field must not leak into the final row shape
for (const row of result) {
expect(row).not.toHaveProperty('authorHrefRaw');
expect(row).not.toHaveProperty('authorHref');
}
});
it('buildXhsProfileUrl handles relative, absolute, and empty inputs', () => {
expect(buildXhsProfileUrl('/user/profile/abc123')).toBe('https://www.xiaohongshu.com/user/profile/abc123');
expect(buildXhsProfileUrl('https://www.xiaohongshu.com/user/profile/xyz?xsec_token=tok')).toBe('https://www.xiaohongshu.com/user/profile/xyz');
expect(buildXhsProfileUrl('')).toBe('');
expect(buildXhsProfileUrl(null)).toBe('');
expect(buildXhsProfileUrl('/user/profile/zzz', 'www.rednote.com')).toBe('https://www.rednote.com/user/profile/zzz');
});
it('clamps invalid negative limits to a safe minimum', async () => {
const page = createPageMock({
loginWall: false,
Expand Down
Loading