From 8279e9fc82bcb0d479be260ce175dedb11bd6de6 Mon Sep 17 00:00:00 2001 From: slowmande Date: Thu, 14 May 2026 16:42:58 +0800 Subject: [PATCH 1/6] route(naver): add Naver search RSS route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supports all search categories: all (통합검색), blog (블로그), cafe (카페), news (뉴스), video (동영상) --- lib/routes/naver/namespace.ts | 10 ++ lib/routes/naver/search.ts | 279 ++++++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 lib/routes/naver/namespace.ts create mode 100644 lib/routes/naver/search.ts diff --git a/lib/routes/naver/namespace.ts b/lib/routes/naver/namespace.ts new file mode 100644 index 000000000000..d83d8e06d497 --- /dev/null +++ b/lib/routes/naver/namespace.ts @@ -0,0 +1,10 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: '네이버', + url: 'naver.com', + lang: 'ko', + zh: { + name: 'Naver', + }, +}; diff --git a/lib/routes/naver/search.ts b/lib/routes/naver/search.ts new file mode 100644 index 000000000000..9d8d19745522 --- /dev/null +++ b/lib/routes/naver/search.ts @@ -0,0 +1,279 @@ +import type { Route } from '@/types'; +import ofetch from '@/utils/ofetch'; + +export const route: Route = { + path: ['/search/:category/:keyword', '/search/:keyword'], + categories: ['other'], + example: '/naver/search/all/송소희', + parameters: { + category: { + description: '검색 카테고리. 기본값: all (통합검색)', + default: 'all', + options: [ + { value: 'all', label: '통합검색' }, + { value: 'blog', label: '블로그' }, + { value: 'cafe', label: '카페' }, + { value: 'news', label: '뉴스' }, + { value: 'video', label: '동영상' }, + ], + }, + keyword: '검색 키워드', + }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['m.search.naver.com/search.naver'], + target: '/search/:category?/:keyword', + }, + ], + name: '검색', + maintainers: [''], + handler, +}; + +const CATEGORY_CONFIG: Record string; templateId: string }> = { + all: { + url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m.all&where=m&sm=mtb_opt&query=${encodeURIComponent(keyword)}&nso=so%3Add&nso_open=1`, + templateId: 'webItem', + }, + blog: { + url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_blog.all&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, + templateId: 'ugcItem', + }, + cafe: { + url: (keyword) => `https://m.search.naver.com/search.naver?where=m_cafe&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, + templateId: 'webItem', + }, + news: { + url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_news.all&where=m_news&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, + templateId: 'newsItem', + }, + video: { + url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_video.all&where=m_video&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, + templateId: 'videoItem', + }, +}; + +const CATEGORY_NAMES: Record = { + all: '통합검색', + blog: '블로그', + cafe: '카페', + news: '뉴스', + video: '동영상', +}; + +async function handler(ctx) { + const keyword = ctx.req.param('keyword'); + const category = ctx.req.param('category') || 'all'; + const config = CATEGORY_CONFIG[category] || CATEGORY_CONFIG.all; + const url = config.url(keyword); + + const response = await ofetch(url); + + const segments = response.split(`"templateId":"${config.templateId}"`); + // eslint-disable-next-line no-console + console.log(`[naver-search] category=${category}, templateId=${config.templateId}, segments=${segments.length}`); + + const items = extractItems(response, config.templateId); + + return { + title: `${keyword} - 네이버 ${CATEGORY_NAMES[category] || CATEGORY_NAMES.all}`, + description: `${keyword}의 네이버 ${CATEGORY_NAMES[category] || CATEGORY_NAMES.all} 검색 결과입니다.`, + link: url, + item: items, + }; +} + +function extractItems(response: string, templateId: string) { + const segments = response.split(`"templateId":"${templateId}"`); + + return segments + .slice(0, -1) + .map((segment) => { + // videoItem: different field layout from other templates + if (templateId === 'videoItem') { + const htmlMatch = segment.match(/"html":"((?:[^"\\]|\\.)*)"/); + const title = htmlMatch ? cleanText(htmlMatch[1]) : ''; + const authorMatch = segment.match(/"authorHtml":"((?:[^"\\]|\\.)*)"/); + const author = authorMatch ? cleanText(authorMatch[1]) : ''; + const hrefMatches = [...segment.matchAll(/"href":"((?:[^"\\]|\\.)*)"/g)]; + const links = hrefMatches.map((m) => m[1]); + // Filter for real video links (YouTube, TikTok, Naver TV/blog/cafe) + const mediaDomains = /^(https?:\/\/)?(m\.youtube\.com|youtu\.be|www\.youtube\.com|www\.tiktok\.com|tv\.naver\.com|m\.blog\.naver\.com|m\.cafe\.naver\.com)\//; + const link = links.find((l) => mediaDomains.test(l)) || links[0] || ''; + const dateMatch = segment.match(/"createdAt":"((?:[^"\\]|\\.)*)"/); + const timeText = dateMatch?.[1] || ''; + const viewMatch = segment.match(/"viewCount":"((?:[^"\\]|\\.)*)"/); + const viewCount = viewMatch ? cleanText(viewMatch[1]) : ''; + const durationMatch = segment.match(/"playDuration":(\d+)/); + const duration = durationMatch ? `${durationMatch[1]}초` : ''; + + // Require videoPlayerType or playDuration to ensure it's a real video result + const hasVideoFields = segment.includes('"videoPlayerType"') || segment.includes('"playDuration"'); + if (!title || !link || !timeText || !hasVideoFields) { + return null; + } + + const parts = []; + if (author) { + parts.push(`출처: ${author}`); + } + if (viewCount) { + parts.push(`조회수: ${viewCount}`); + } + if (duration && duration !== '0초') { + parts.push(`재생시간: ${duration}`); + } + const description = parts.map((p) => `

${p}

`).join(''); + + return { + title, + link, + description, + pubDate: parseKoreanRelativeTime(timeText), + }; + } + + // Extract all title fields + const titleMatches = [...segment.matchAll(/"title":"((?:[^"\\]|\\.)*)"/g)]; + const titles = titleMatches.map((m) => cleanText(m[1])); + + // Extract URLs depending on template type + let link = ''; + if (templateId === 'webItem') { + // webItem: first href is the result link + const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); + link = hrefMatch?.[1] || ''; + } else { + // ugcItem, newsItem: last titleHref is the result link + const hrefMatches = [...segment.matchAll(/"titleHref":"((?:[^"\\]|\\.)*)"/g)]; + link = hrefMatches.length > 0 ? hrefMatches.at(-1)[1] : ''; + // Fallback: try direct href + if (!link) { + const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); + link = hrefMatch?.[1] || ''; + } + } + + // Title: last title is always the result title + const title = titles.at(-1) || ''; + // Source: second-to-last title + const sourceName = titles.at(-2) || ''; + + // Extract body/description + let bodyText = ''; + const bodyMatch = segment.match(/"bodyText":"((?:[^"\\]|\\.)*)"/) || segment.match(/"content":"((?:[^"\\]|\\.)*)"/); + if (bodyMatch) { + bodyText = cleanText(bodyMatch[1]); + } + + // Extract time + let timeText = ''; + switch (templateId) { + case 'webItem': { + const timeMatch = segment.match(/\[{"text":"([^"]*)"}/); + timeText = timeMatch?.[1] || ''; + + break; + } + case 'newsItem': { + const textMatch = segment.match(/"text":"([^"]*)"/); + timeText = textMatch?.[1] || ''; + + break; + } + case 'ugcItem': { + // blog uses createdDate like "2026-05-13T23:15:00+09:00" + const dateMatch = segment.match(/"createdDate":"([^"]*)"/); + if (dateMatch?.[1]) { + return { + title, + link, + description: sourceName ? `

출처: ${sourceName}

${bodyText}

` : bodyText, + pubDate: new Date(dateMatch[1]), + }; + } + + break; + } + default: + // Do nothing + } + + if (!title || !link) { + return null; + } + + // Skip non-result items + if (title === '더보기' || title === '관련도순' || title === '최신순') { + return null; + } + + const description = sourceName ? `

출처: ${sourceName}

${bodyText}

` : bodyText; + + return { + title, + link, + description, + pubDate: parseKoreanRelativeTime(timeText), + }; + }) + .filter(Boolean); +} + +function cleanText(text: string): string { + return text + .replaceAll(/</g, '<') + .replaceAll(/>/g, '>') + .replaceAll(/&/g, '&') + .replaceAll(/"/g, '"') + .replaceAll(//g, '') + .replaceAll(/<\/mark>/g, ''); +} + +function parseKoreanRelativeTime(timeText: string): Date { + const now = new Date(); + if (!timeText) { + return now; + } + + // Try absolute date formats first (e.g. "2025.12.01.", "2026-05-13T23:15:00+09:00") + const absDate = new Date(timeText); + if (!Number.isNaN(absDate.getTime())) { + return absDate; + } + + const match = timeText.match(/(\d+)\s*(시간|분|일|주) 전|(\d+)분 이내|(\d+)시간 이내|방금/); + if (!match) { + return now; + } + + if (match[0] === '방금') { + return now; + } + + const num = Number.parseInt(match[1] || match[3] || match[4] || '0', 10); + const unit = match[2] || ''; + + if (unit.includes('분')) { + return new Date(now.getTime() - num * 60 * 1000); + } + if (unit.includes('시간')) { + return new Date(now.getTime() - num * 60 * 60 * 1000); + } + if (unit.includes('일')) { + return new Date(now.getTime() - num * 24 * 60 * 60 * 1000); + } + if (unit.includes('주')) { + return new Date(now.getTime() - num * 7 * 24 * 60 * 60 * 1000); + } + + return now; +} From 644a483ec3befd8b028e1c8f2c0ea7b713638814 Mon Sep 17 00:00:00 2001 From: slowmande Date: Thu, 14 May 2026 17:11:14 +0800 Subject: [PATCH 2/6] fix: use string replaceAll instead of regex in cleanText Replace regex patterns with plain strings to satisfy oxlint unicorn/prefer-string-replace-all rule. --- lib/routes/naver/search.ts | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lib/routes/naver/search.ts b/lib/routes/naver/search.ts index 9d8d19745522..903939c70ddb 100644 --- a/lib/routes/naver/search.ts +++ b/lib/routes/naver/search.ts @@ -229,13 +229,7 @@ function extractItems(response: string, templateId: string) { } function cleanText(text: string): string { - return text - .replaceAll(/</g, '<') - .replaceAll(/>/g, '>') - .replaceAll(/&/g, '&') - .replaceAll(/"/g, '"') - .replaceAll(//g, '') - .replaceAll(/<\/mark>/g, ''); + return text.replaceAll('<', '<').replaceAll('>', '>').replaceAll('&', '&').replaceAll('"', '"').replaceAll('', '').replaceAll('', ''); } function parseKoreanRelativeTime(timeText: string): Date { From 463fc6f05ec8cef72ab1748db095104833de3bc6 Mon Sep 17 00:00:00 2001 From: slowmande Date: Thu, 14 May 2026 17:29:18 +0800 Subject: [PATCH 3/6] fix: remove console.log and fix cleanText double-unescaping - Remove console.log to satisfy oxlint no-console rule - Use single-pass regex replaceAll to prevent HTML entity double-unescaping (CodeQL) --- lib/routes/naver/search.ts | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/routes/naver/search.ts b/lib/routes/naver/search.ts index 903939c70ddb..8c3317d7ec50 100644 --- a/lib/routes/naver/search.ts +++ b/lib/routes/naver/search.ts @@ -77,10 +77,6 @@ async function handler(ctx) { const response = await ofetch(url); - const segments = response.split(`"templateId":"${config.templateId}"`); - // eslint-disable-next-line no-console - console.log(`[naver-search] category=${category}, templateId=${config.templateId}, segments=${segments.length}`); - const items = extractItems(response, config.templateId); return { @@ -229,7 +225,23 @@ function extractItems(response: string, templateId: string) { } function cleanText(text: string): string { - return text.replaceAll('<', '<').replaceAll('>', '>').replaceAll('&', '&').replaceAll('"', '"').replaceAll('', '').replaceAll('', ''); + return text + .replaceAll(/&(amp|lt|gt|quot);/g, (match) => { + switch (match) { + case '&': + return '&'; + case '<': + return '<'; + case '>': + return '>'; + case '"': + return '"'; + default: + return match; + } + }) + .replaceAll('', '') + .replaceAll('', ''); } function parseKoreanRelativeTime(timeText: string): Date { From 6c3bb32a0def4664fbe68188d970acda1315c6f0 Mon Sep 17 00:00:00 2001 From: slowmande Date: Tue, 26 May 2026 16:10:21 +0800 Subject: [PATCH 4/6] fix(naver): address review comments and fix cafe/all category bugs - Fix ALL category: use templateIds array to extract all content types (web, blog, news, video) instead of only webItem - Fix CAFE category: change URL to ssc=tab.m_cafe.all format and use cheerio HTML parsing since cafe page returns pure HTML instead of JSON SSR - Address Rule 11: move author/source metadata from description to dedicated author field - Address Rule 14: return undefined for unparseable dates instead of fake current time - Use single route path instead of array - Add maintainer GitHub ID - Fix cleanText double-unescaping issue with single-pass regex callback --- lib/routes/naver/search.ts | 88 ++++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/lib/routes/naver/search.ts b/lib/routes/naver/search.ts index 8c3317d7ec50..4904b840b769 100644 --- a/lib/routes/naver/search.ts +++ b/lib/routes/naver/search.ts @@ -1,8 +1,10 @@ +import { load } from 'cheerio'; + import type { Route } from '@/types'; import ofetch from '@/utils/ofetch'; export const route: Route = { - path: ['/search/:category/:keyword', '/search/:keyword'], + path: '/search/:category/:keyword', categories: ['other'], example: '/naver/search/all/송소희', parameters: { @@ -30,34 +32,34 @@ export const route: Route = { radar: [ { source: ['m.search.naver.com/search.naver'], - target: '/search/:category?/:keyword', + target: '/search/:category/:keyword', }, ], name: '검색', - maintainers: [''], + maintainers: ['slowmande'], handler, }; -const CATEGORY_CONFIG: Record string; templateId: string }> = { +const CATEGORY_CONFIG: Record string; templateIds: string[] }> = { all: { url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m.all&where=m&sm=mtb_opt&query=${encodeURIComponent(keyword)}&nso=so%3Add&nso_open=1`, - templateId: 'webItem', + templateIds: ['webItem', 'ugcItem', 'newsItem', 'videoItem'], }, blog: { url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_blog.all&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, - templateId: 'ugcItem', + templateIds: ['ugcItem'], }, cafe: { - url: (keyword) => `https://m.search.naver.com/search.naver?where=m_cafe&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, - templateId: 'webItem', + url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_cafe.all&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, + templateIds: ['webItem', 'ugcItem', 'newsItem', 'videoItem'], }, news: { url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_news.all&where=m_news&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, - templateId: 'newsItem', + templateIds: ['newsItem'], }, video: { url: (keyword) => `https://m.search.naver.com/search.naver?ssc=tab.m_video.all&where=m_video&sm=mtb_jum&query=${encodeURIComponent(keyword)}&nso=so%3Add`, - templateId: 'videoItem', + templateIds: ['videoItem'], }, }; @@ -77,7 +79,7 @@ async function handler(ctx) { const response = await ofetch(url); - const items = extractItems(response, config.templateId); + const items = category === 'cafe' ? extractCafeItems(response) : config.templateIds.flatMap((tid) => extractItems(response, tid)); return { title: `${keyword} - 네이버 ${CATEGORY_NAMES[category] || CATEGORY_NAMES.all}`, @@ -118,9 +120,6 @@ function extractItems(response: string, templateId: string) { } const parts = []; - if (author) { - parts.push(`출처: ${author}`); - } if (viewCount) { parts.push(`조회수: ${viewCount}`); } @@ -129,11 +128,13 @@ function extractItems(response: string, templateId: string) { } const description = parts.map((p) => `

${p}

`).join(''); + const pubDate = parseKoreanRelativeTime(timeText); return { title, link, description, - pubDate: parseKoreanRelativeTime(timeText), + author: author || undefined, + ...(pubDate && { pubDate }), }; } @@ -192,7 +193,8 @@ function extractItems(response: string, templateId: string) { return { title, link, - description: sourceName ? `

출처: ${sourceName}

${bodyText}

` : bodyText, + description: bodyText, + author: sourceName || undefined, pubDate: new Date(dateMatch[1]), }; } @@ -212,18 +214,58 @@ function extractItems(response: string, templateId: string) { return null; } - const description = sourceName ? `

출처: ${sourceName}

${bodyText}

` : bodyText; + const description = bodyText; + const pubDate = parseKoreanRelativeTime(timeText); return { title, link, description, - pubDate: parseKoreanRelativeTime(timeText), + author: sourceName || undefined, + ...(pubDate && { pubDate }), }; }) .filter(Boolean); } +function extractCafeItems(html: string) { + const $ = load(html); + const items: Array<{ + title: string; + link: string; + description: string; + author?: string; + pubDate?: Date; + }> = []; + + $('li.bx').each((_i, el) => { + const $el = $(el); + const titleEl = $el.find('.title_link'); + const title = titleEl.text().trim(); + const link = titleEl.attr('href') || ''; + const author = $el.find('.name').first().text().trim(); + const timeText = $el.find('.sub').first().text().trim(); + const descEl = $el.find('.dsc_link'); + const description = descEl.length ? descEl.text().trim() : ''; + + if (!title || !link) { + return; + } + + const pubDate = parseKoreanRelativeTime(timeText); + + items.push({ + title, + link, + description, + author: author || undefined, + ...(pubDate && { pubDate }), + }); + }); + + return items; +} + function cleanText(text: string): string { return text .replaceAll(/&(amp|lt|gt|quot);/g, (match) => { @@ -244,10 +286,10 @@ function cleanText(text: string): string { .replaceAll('
', ''); } -function parseKoreanRelativeTime(timeText: string): Date { +function parseKoreanRelativeTime(timeText: string): Date | undefined { const now = new Date(); if (!timeText) { - return now; + return; } // Try absolute date formats first (e.g. "2025.12.01.", "2026-05-13T23:15:00+09:00") @@ -258,11 +300,11 @@ function parseKoreanRelativeTime(timeText: string): Date { const match = timeText.match(/(\d+)\s*(시간|분|일|주) 전|(\d+)분 이내|(\d+)시간 이내|방금/); if (!match) { - return now; + return; } if (match[0] === '방금') { - return now; + return; } const num = Number.parseInt(match[1] || match[3] || match[4] || '0', 10); @@ -281,5 +323,5 @@ function parseKoreanRelativeTime(timeText: string): Date { return new Date(now.getTime() - num * 7 * 24 * 60 * 60 * 1000); } - return now; + return; } From eda393d75240fd256fdcafcfa796309a0ac47f66 Mon Sep 17 00:00:00 2001 From: slowmande Date: Tue, 26 May 2026 16:32:46 +0800 Subject: [PATCH 5/6] refactor(naver): split extractItems to reduce cyclomatic complexity Split the 37-complexity extractItems into extractVideoItem and extractGenericItem. Fixes CodeFactor "Complex Method" warning. --- lib/routes/naver/search.ts | 230 +++++++++++++++++-------------------- 1 file changed, 106 insertions(+), 124 deletions(-) diff --git a/lib/routes/naver/search.ts b/lib/routes/naver/search.ts index 4904b840b769..66ff2bf0e5fd 100644 --- a/lib/routes/naver/search.ts +++ b/lib/routes/naver/search.ts @@ -91,141 +91,123 @@ async function handler(ctx) { function extractItems(response: string, templateId: string) { const segments = response.split(`"templateId":"${templateId}"`); - return segments .slice(0, -1) - .map((segment) => { - // videoItem: different field layout from other templates - if (templateId === 'videoItem') { - const htmlMatch = segment.match(/"html":"((?:[^"\\]|\\.)*)"/); - const title = htmlMatch ? cleanText(htmlMatch[1]) : ''; - const authorMatch = segment.match(/"authorHtml":"((?:[^"\\]|\\.)*)"/); - const author = authorMatch ? cleanText(authorMatch[1]) : ''; - const hrefMatches = [...segment.matchAll(/"href":"((?:[^"\\]|\\.)*)"/g)]; - const links = hrefMatches.map((m) => m[1]); - // Filter for real video links (YouTube, TikTok, Naver TV/blog/cafe) - const mediaDomains = /^(https?:\/\/)?(m\.youtube\.com|youtu\.be|www\.youtube\.com|www\.tiktok\.com|tv\.naver\.com|m\.blog\.naver\.com|m\.cafe\.naver\.com)\//; - const link = links.find((l) => mediaDomains.test(l)) || links[0] || ''; - const dateMatch = segment.match(/"createdAt":"((?:[^"\\]|\\.)*)"/); - const timeText = dateMatch?.[1] || ''; - const viewMatch = segment.match(/"viewCount":"((?:[^"\\]|\\.)*)"/); - const viewCount = viewMatch ? cleanText(viewMatch[1]) : ''; - const durationMatch = segment.match(/"playDuration":(\d+)/); - const duration = durationMatch ? `${durationMatch[1]}초` : ''; - - // Require videoPlayerType or playDuration to ensure it's a real video result - const hasVideoFields = segment.includes('"videoPlayerType"') || segment.includes('"playDuration"'); - if (!title || !link || !timeText || !hasVideoFields) { - return null; - } - - const parts = []; - if (viewCount) { - parts.push(`조회수: ${viewCount}`); - } - if (duration && duration !== '0초') { - parts.push(`재생시간: ${duration}`); - } - const description = parts.map((p) => `

${p}

`).join(''); - - const pubDate = parseKoreanRelativeTime(timeText); - return { - title, - link, - description, - author: author || undefined, - ...(pubDate && { pubDate }), - }; - } + .map((segment) => (templateId === 'videoItem' ? extractVideoItem(segment) : extractGenericItem(segment, templateId))) + .filter(Boolean); +} - // Extract all title fields - const titleMatches = [...segment.matchAll(/"title":"((?:[^"\\]|\\.)*)"/g)]; - const titles = titleMatches.map((m) => cleanText(m[1])); - - // Extract URLs depending on template type - let link = ''; - if (templateId === 'webItem') { - // webItem: first href is the result link - const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); - link = hrefMatch?.[1] || ''; - } else { - // ugcItem, newsItem: last titleHref is the result link - const hrefMatches = [...segment.matchAll(/"titleHref":"((?:[^"\\]|\\.)*)"/g)]; - link = hrefMatches.length > 0 ? hrefMatches.at(-1)[1] : ''; - // Fallback: try direct href - if (!link) { - const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); - link = hrefMatch?.[1] || ''; - } - } +function extractVideoItem(segment: string) { + const htmlMatch = segment.match(/"html":"((?:[^"\\]|\\.)*)"/); + const title = htmlMatch ? cleanText(htmlMatch[1]) : ''; + const authorMatch = segment.match(/"authorHtml":"((?:[^"\\]|\\.)*)"/); + const author = authorMatch ? cleanText(authorMatch[1]) : ''; + const hrefMatches = [...segment.matchAll(/"href":"((?:[^"\\]|\\.)*)"/g)]; + const links = hrefMatches.map((m) => m[1]); + const mediaDomains = /^(https?:\/\/)?(m\.youtube\.com|youtu\.be|www\.youtube\.com|www\.tiktok\.com|tv\.naver\.com|m\.blog\.naver\.com|m\.cafe\.naver\.com)\//; + const link = links.find((l) => mediaDomains.test(l)) || links[0] || ''; + const dateMatch = segment.match(/"createdAt":"((?:[^"\\]|\\.)*)"/); + const timeText = dateMatch?.[1] || ''; + const viewMatch = segment.match(/"viewCount":"((?:[^"\\]|\\.)*)"/); + const viewCount = viewMatch ? cleanText(viewMatch[1]) : ''; + const durationMatch = segment.match(/"playDuration":(\d+)/); + const duration = durationMatch ? `${durationMatch[1]}초` : ''; + + const hasVideoFields = segment.includes('"videoPlayerType"') || segment.includes('"playDuration"'); + if (!title || !link || !timeText || !hasVideoFields) { + return null; + } - // Title: last title is always the result title - const title = titles.at(-1) || ''; - // Source: second-to-last title - const sourceName = titles.at(-2) || ''; + const parts = []; + if (viewCount) { + parts.push(`조회수: ${viewCount}`); + } + if (duration && duration !== '0초') { + parts.push(`재생시간: ${duration}`); + } + const description = parts.map((p) => `

${p}

`).join(''); - // Extract body/description - let bodyText = ''; - const bodyMatch = segment.match(/"bodyText":"((?:[^"\\]|\\.)*)"/) || segment.match(/"content":"((?:[^"\\]|\\.)*)"/); - if (bodyMatch) { - bodyText = cleanText(bodyMatch[1]); - } + const pubDate = parseKoreanRelativeTime(timeText); + return { + title, + link, + description, + author: author || undefined, + ...(pubDate && { pubDate }), + }; +} - // Extract time - let timeText = ''; - switch (templateId) { - case 'webItem': { - const timeMatch = segment.match(/\[{"text":"([^"]*)"}/); - timeText = timeMatch?.[1] || ''; - - break; - } - case 'newsItem': { - const textMatch = segment.match(/"text":"([^"]*)"/); - timeText = textMatch?.[1] || ''; - - break; - } - case 'ugcItem': { - // blog uses createdDate like "2026-05-13T23:15:00+09:00" - const dateMatch = segment.match(/"createdDate":"([^"]*)"/); - if (dateMatch?.[1]) { - return { - title, - link, - description: bodyText, - author: sourceName || undefined, - pubDate: new Date(dateMatch[1]), - }; - } - - break; - } - default: - // Do nothing - } +function extractGenericItem(segment: string, templateId: string) { + const titleMatches = [...segment.matchAll(/"title":"((?:[^"\\]|\\.)*)"/g)]; + const titles = titleMatches.map((m) => cleanText(m[1])); + + let link = ''; + if (templateId === 'webItem') { + const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); + link = hrefMatch?.[1] || ''; + } else { + const hrefMatches = [...segment.matchAll(/"titleHref":"((?:[^"\\]|\\.)*)"/g)]; + link = hrefMatches.length > 0 ? hrefMatches.at(-1)![1] : ''; + if (!link) { + const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); + link = hrefMatch?.[1] || ''; + } + } - if (!title || !link) { - return null; - } + const title = titles.at(-1) || ''; + const sourceName = titles.at(-2) || ''; - // Skip non-result items - if (title === '더보기' || title === '관련도순' || title === '최신순') { - return null; + let bodyText = ''; + const bodyMatch = segment.match(/"bodyText":"((?:[^"\\]|\\.)*)"/) || segment.match(/"content":"((?:[^"\\]|\\.)*)"/); + if (bodyMatch) { + bodyText = cleanText(bodyMatch[1]); + } + + let timeText = ''; + switch (templateId) { + case 'webItem': { + const timeMatch = segment.match(/\[{"text":"([^"]*)"}/); + timeText = timeMatch?.[1] || ''; + break; + } + case 'newsItem': { + const textMatch = segment.match(/"text":"([^"]*)"/); + timeText = textMatch?.[1] || ''; + break; + } + case 'ugcItem': { + const dateMatch = segment.match(/"createdDate":"([^"]*)"/); + if (dateMatch?.[1]) { + return { + title, + link, + description: bodyText, + author: sourceName || undefined, + pubDate: new Date(dateMatch[1]), + }; } + break; + } + default: + break; + } - const description = bodyText; - const pubDate = parseKoreanRelativeTime(timeText); + if (!title || !link) { + return null; + } - return { - title, - link, - description, - author: sourceName || undefined, - ...(pubDate && { pubDate }), - }; - }) - .filter(Boolean); + if (title === '더보기' || title === '관련도순' || title === '최신순') { + return null; + } + + const pubDate = parseKoreanRelativeTime(timeText); + return { + title, + link, + description: bodyText, + author: sourceName || undefined, + ...(pubDate && { pubDate }), + }; } function extractCafeItems(html: string) { From 11851fd056528c54bbc87be6b4bb6c4bd18e3921 Mon Sep 17 00:00:00 2001 From: slowmande Date: Tue, 26 May 2026 16:48:51 +0800 Subject: [PATCH 6/6] refactor(naver): split functions to pass CodeFactor complexity check Extract extractLink, buildItemFromTemplate, buildVideoDescription, and buildVideoResult helpers to bring all methods under complexity threshold. --- lib/routes/naver/search.ts | 108 +++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 58 deletions(-) diff --git a/lib/routes/naver/search.ts b/lib/routes/naver/search.ts index 66ff2bf0e5fd..1b53c876d92e 100644 --- a/lib/routes/naver/search.ts +++ b/lib/routes/naver/search.ts @@ -118,44 +118,50 @@ function extractVideoItem(segment: string) { return null; } - const parts = []; - if (viewCount) { - parts.push(`조회수: ${viewCount}`); - } - if (duration && duration !== '0초') { - parts.push(`재생시간: ${duration}`); - } - const description = parts.map((p) => `

${p}

`).join(''); - const pubDate = parseKoreanRelativeTime(timeText); + return buildVideoResult(title, link, viewCount, duration, author, pubDate); +} + +function buildVideoResult(title: string, link: string, viewCount: string, duration: string, author: string, pubDate: Date | undefined) { return { title, link, - description, + description: buildVideoDescription(viewCount, duration), author: author || undefined, ...(pubDate && { pubDate }), }; } -function extractGenericItem(segment: string, templateId: string) { - const titleMatches = [...segment.matchAll(/"title":"((?:[^"\\]|\\.)*)"/g)]; - const titles = titleMatches.map((m) => cleanText(m[1])); +function buildVideoDescription(viewCount: string, duration: string): string { + const parts: string[] = []; + if (viewCount) { + parts.push(`조회수: ${viewCount}`); + } + if (duration && duration !== '0초') { + parts.push(`재생시간: ${duration}`); + } + return parts.map((p) => `

${p}

`).join(''); +} - let link = ''; +function extractLink(segment: string, templateId: string): string { if (templateId === 'webItem') { const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); - link = hrefMatch?.[1] || ''; - } else { - const hrefMatches = [...segment.matchAll(/"titleHref":"((?:[^"\\]|\\.)*)"/g)]; - link = hrefMatches.length > 0 ? hrefMatches.at(-1)![1] : ''; - if (!link) { - const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); - link = hrefMatch?.[1] || ''; - } + return hrefMatch?.[1] || ''; + } + const hrefMatches = [...segment.matchAll(/"titleHref":"((?:[^"\\]|\\.)*)"/g)]; + if (hrefMatches.length > 0) { + return hrefMatches.at(-1)![1]; } + const hrefMatch = segment.match(/"href":"((?:[^"\\]|\\.)*)"/); + return hrefMatch?.[1] || ''; +} +function extractGenericItem(segment: string, templateId: string) { + const titleMatches = [...segment.matchAll(/"title":"((?:[^"\\]|\\.)*)"/g)]; + const titles = titleMatches.map((m) => cleanText(m[1])); const title = titles.at(-1) || ''; const sourceName = titles.at(-2) || ''; + const link = extractLink(segment, templateId); let bodyText = ''; const bodyMatch = segment.match(/"bodyText":"((?:[^"\\]|\\.)*)"/) || segment.match(/"content":"((?:[^"\\]|\\.)*)"/); @@ -163,35 +169,6 @@ function extractGenericItem(segment: string, templateId: string) { bodyText = cleanText(bodyMatch[1]); } - let timeText = ''; - switch (templateId) { - case 'webItem': { - const timeMatch = segment.match(/\[{"text":"([^"]*)"}/); - timeText = timeMatch?.[1] || ''; - break; - } - case 'newsItem': { - const textMatch = segment.match(/"text":"([^"]*)"/); - timeText = textMatch?.[1] || ''; - break; - } - case 'ugcItem': { - const dateMatch = segment.match(/"createdDate":"([^"]*)"/); - if (dateMatch?.[1]) { - return { - title, - link, - description: bodyText, - author: sourceName || undefined, - pubDate: new Date(dateMatch[1]), - }; - } - break; - } - default: - break; - } - if (!title || !link) { return null; } @@ -200,14 +177,29 @@ function extractGenericItem(segment: string, templateId: string) { return null; } + return buildItemFromTemplate(segment, templateId, title, link, bodyText, sourceName); +} + +function buildItemFromTemplate(segment: string, templateId: string, title: string, link: string, bodyText: string, sourceName: string) { + if (templateId === 'ugcItem') { + const dateMatch = segment.match(/"createdDate":"([^"]*)"/); + if (dateMatch?.[1]) { + return { title, link, description: bodyText, author: sourceName || undefined, pubDate: new Date(dateMatch[1]) }; + } + return null; + } + + let timeText = ''; + if (templateId === 'webItem') { + const timeMatch = segment.match(/\[{"text":"([^"]*)"}/); + timeText = timeMatch?.[1] || ''; + } else { + const textMatch = segment.match(/"text":"([^"]*)"/); + timeText = textMatch?.[1] || ''; + } + const pubDate = parseKoreanRelativeTime(timeText); - return { - title, - link, - description: bodyText, - author: sourceName || undefined, - ...(pubDate && { pubDate }), - }; + return { title, link, description: bodyText, author: sourceName || undefined, ...(pubDate && { pubDate }) }; } function extractCafeItems(html: string) {