diff --git a/packages/x-markdown/src/XMarkdown/__tests__/Parser.test.ts b/packages/x-markdown/src/XMarkdown/__tests__/Parser.test.ts index 06fb1a2b9..011da556c 100644 --- a/packages/x-markdown/src/XMarkdown/__tests__/Parser.test.ts +++ b/packages/x-markdown/src/XMarkdown/__tests__/Parser.test.ts @@ -81,6 +81,72 @@ describe('Parser', () => { }); describe('protectCustomTagNewlines', () => { + it('should keep markdown syntax inside custom tag children as plain text', () => { + const parser = new Parser({ + components: { custom: 'div' }, + }); + const content = '{"sales":[{"name":"电子产品[202](红)","value":52000}]}'; + const result = parser.parse(content); + expect(result).toContain('电子产品[202](红)'); + expect(result).not.toContain(' { + const parser = new Parser({ + components: { custom: 'div' }, + }); + const content = '{"sales":[{"name":"电子产品[202](红)","value":52000'; + const result = parser.parse(content); + expect(result).toContain('电子产品[202](红)'); + expect(result).not.toContain(' { + const parser = new Parser({ + components: { p: 'p' }, + }); + const result = parser.parse('

[Google](https://google.com)

'); + expect(result).toContain('

[Google](https://google.com)

'); + }); + + it('should keep CommonMark behavior for native HTML tag components by default', () => { + const parser = new Parser({ + components: { span: 'span' }, + }); + const result = parser.parse('*hello*'); + expect(result).toContain('hello'); + }); + + it('should keep native HTML tag component children as plain text when enabled', () => { + const parser = new Parser({ + components: { span: 'span' }, + rawCustomComponents: true, + }); + const result = parser.parse('*hello* world'); + expect(result).toContain('*hello* <i>world</i>'); + expect(result).not.toContain('hello'); + }); + + it('should only keep configured component tag children as plain text when enabled', () => { + const parser = new Parser({ + components: { span: 'span' }, + rawCustomComponents: true, + }); + const result = parser.parse('*hello* *world*'); + expect(result).toContain('*hello*'); + expect(result).toContain('world'); + }); + + it('should keep unclosed native HTML tag component children as plain text when enabled', () => { + const parser = new Parser({ + components: { span: 'span' }, + rawCustomComponents: true, + }); + const result = parser.parse('*hello* world'); + expect(result).toContain('*hello* <i>world</i>'); + expect(result).not.toContain('hello'); + }); + it('should protect newlines inside custom tags when protectCustomTagNewlines is true', () => { const parser = new Parser({ protectCustomTagNewlines: true, @@ -92,14 +158,15 @@ describe('Parser', () => { expect(result).not.toMatch(/First line<\/p>\s*

Second line/); }); - it('should not protect newlines when protectCustomTagNewlines is false', () => { + it('should protect custom tag content when protectCustomTagNewlines is false', () => { const parser = new Parser({ protectCustomTagNewlines: false, components: { CustomComponent: 'div' }, }); const content = 'First line\n\nSecond line'; const result = parser.parse(content); - expect(result).toContain('

'); + expect(result).toContain('First line\n\nSecond line'); + expect(result).not.toMatch(/First line<\/p>\s*

Second line/); }); it('should work normally when protectCustomTagNewlines is true but no custom components', () => { diff --git a/packages/x-markdown/src/XMarkdown/__tests__/index.test.tsx b/packages/x-markdown/src/XMarkdown/__tests__/index.test.tsx index 6ee85e0dc..5cfcc898a 100644 --- a/packages/x-markdown/src/XMarkdown/__tests__/index.test.tsx +++ b/packages/x-markdown/src/XMarkdown/__tests__/index.test.tsx @@ -176,6 +176,75 @@ describe('XMarkdown', () => { expect((container.firstChild as HTMLElement)?.innerHTML).toBe(html); }); + it('passes custom component children as plain text without parsing markdown links', () => { + let receivedChildren: React.ReactNode; + const markdown = + '{"sales":[{"name":"电子产品[202](红)","value":52000}],"majorGroupName":"华南师范大学[202](汕尾校区)"}'; + + const { container } = render( + { + receivedChildren = props.children; + return {props.children}; + }, + }} + />, + ); + + expect(receivedChildren).toBe( + '{"sales":[{"name":"电子产品[202](红)","value":52000}],"majorGroupName":"华南师范大学[202](汕尾校区)"}', + ); + expect(container.querySelector('a')).not.toBeInTheDocument(); + }); + + it('passes streaming custom component children as plain text before the tag is closed', () => { + let receivedChildren: React.ReactNode; + const markdown = '{"sales":[{"name":"电子产品[202](红)","value":52000'; + + const { container } = render( + { + receivedChildren = props.children; + return {props.children}; + }, + }} + />, + ); + + expect(receivedChildren).toBe('{"sales":[{"name":"电子产品[202](红)","value":52000'); + expect(container.querySelector('a')).not.toBeInTheDocument(); + }); + + it('keeps native component children as plain text when rawCustomComponents is true', () => { + let receivedChildren: React.ReactNode; + const markdown = '*hello* world'; + + const { container } = render( + { + receivedChildren = props.children; + return {props.children}; + }, + }} + />, + ); + + expect(receivedChildren).toBe('*hello* world'); + expect(container.querySelector('em')).not.toBeInTheDocument(); + expect(container.querySelector('i')).not.toBeInTheDocument(); + expect(container.querySelector('[data-testid="plain"]')).toHaveTextContent( + '*hello* world', + ); + }); + it('walkToken', () => { const walkTokens = (token: Token) => { if (token.type === 'heading') { diff --git a/packages/x-markdown/src/XMarkdown/core/Parser.ts b/packages/x-markdown/src/XMarkdown/core/Parser.ts index 17aeb32cc..6688a12ad 100644 --- a/packages/x-markdown/src/XMarkdown/core/Parser.ts +++ b/packages/x-markdown/src/XMarkdown/core/Parser.ts @@ -7,6 +7,7 @@ type ParserOptions = { openLinksInNewTab?: boolean; components?: XMarkdownProps['components']; protectCustomTagNewlines?: boolean; + rawCustomComponents?: boolean; escapeRawHtml?: boolean; }; @@ -34,6 +35,121 @@ const escapeReplacements: Record = { }; const getEscapeReplacement = (ch: string) => escapeReplacements[ch]; +const NATIVE_HTML_TAGS = new Set([ + 'a', + 'abbr', + 'address', + 'area', + 'article', + 'aside', + 'audio', + 'b', + 'base', + 'bdi', + 'bdo', + 'blockquote', + 'body', + 'br', + 'button', + 'canvas', + 'caption', + 'cite', + 'code', + 'col', + 'colgroup', + 'data', + 'datalist', + 'dd', + 'del', + 'details', + 'dfn', + 'dialog', + 'div', + 'dl', + 'dt', + 'em', + 'embed', + 'fieldset', + 'figcaption', + 'figure', + 'footer', + 'form', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'head', + 'header', + 'hgroup', + 'hr', + 'html', + 'i', + 'iframe', + 'img', + 'input', + 'ins', + 'kbd', + 'label', + 'legend', + 'li', + 'link', + 'main', + 'map', + 'mark', + 'menu', + 'meta', + 'meter', + 'nav', + 'noscript', + 'object', + 'ol', + 'optgroup', + 'option', + 'output', + 'p', + 'picture', + 'pre', + 'progress', + 'q', + 'rp', + 'rt', + 'ruby', + 's', + 'samp', + 'script', + 'search', + 'section', + 'select', + 'slot', + 'small', + 'source', + 'span', + 'strong', + 'style', + 'sub', + 'summary', + 'sup', + 'table', + 'tbody', + 'td', + 'template', + 'textarea', + 'tfoot', + 'th', + 'thead', + 'time', + 'title', + 'tr', + 'track', + 'u', + 'ul', + 'var', + 'video', + 'wbr', +]); + export function escapeHtml(html: string, encode?: boolean) { if (encode) { if (other.escapeTest.test(html)) { @@ -54,6 +170,10 @@ const TAIL_MARKER = Symbol('tailMarker'); // Type for tokens that can be marked for tail injection type MarkableToken = Token & { [TAIL_MARKER]?: boolean }; +type CustomTagPlaceholder = { + protected: string; +}; + class Parser { options: ParserOptions; markdownInstance: Marked; @@ -173,7 +293,9 @@ class Parser { placeholders: Map; } { const placeholders = new Map(); - const customTagNames = Object.keys(this.options.components || {}); + const customTagNames = Object.keys(this.options.components || {}).filter((name) => { + return this.options.rawCustomComponents || !NATIVE_HTML_TAGS.has(name.toLowerCase()); + }); if (customTagNames.length === 0) { return { protected: content, placeholders }; @@ -223,6 +345,12 @@ class Parser { const result: string[] = []; let lastIndex = 0; + const createPlaceholder = ({ protected: protectedContent }: CustomTagPlaceholder) => { + const ph = `\u0000XMDPLACEHOLDER${placeholderIndex++}\u0000`; + placeholders.set(ph, protectedContent); + return ph; + }; + for (const pos of positions) { if (pos.type === 'open') { // Self-closing tags don't have inner content @@ -246,22 +374,36 @@ class Parser { result.push(content.slice(lastIndex, startPos)); } - if (innerContent.includes('\n\n')) { - const protectedInner = innerContent.replace(/\n\n/g, () => { - const ph = `__X_MD_PLACEHOLDER_${placeholderIndex++}__`; - placeholders.set(ph, '\n\n'); - return ph; - }); - result.push(openTag + protectedInner + closeTag); - } else { - result.push(openTag + innerContent + closeTag); - } + result.push( + createPlaceholder({ + protected: + openTag + + (this.options.rawCustomComponents ? escapeHtml(innerContent, true) : innerContent) + + closeTag, + }), + ); lastIndex = endPos; } } } + if (stack.length > 0) { + const open = stack[0]; + if (lastIndex < open.start) { + result.push(content.slice(lastIndex, open.start)); + } + const unclosedContent = content.slice(open.start); + result.push( + createPlaceholder({ + protected: this.options.rawCustomComponents + ? this.escapeUnclosedTagContent(unclosedContent, open.openTag) + : unclosedContent, + }), + ); + return { protected: result.join(''), placeholders }; + } + if (lastIndex < content.length) { result.push(content.slice(lastIndex)); } @@ -269,14 +411,19 @@ class Parser { return { protected: result.join(''), placeholders }; } + private escapeUnclosedTagContent(content: string, openTag: string): string { + return openTag + escapeHtml(content.slice(openTag.length), true); + } + private restorePlaceholders(content: string, placeholders: Map): string { if (placeholders.size === 0) { return content; } - return content.replace( - /__X_MD_PLACEHOLDER_\d+__/g, - (match) => placeholders.get(match) ?? match, - ); + let restored = content; + placeholders.forEach((value, placeholder) => { + restored = restored.split(placeholder).join(value); + }); + return restored; } /** @@ -337,14 +484,9 @@ class Parser { // Set tail injection flag this.injectTail = parseOptions?.injectTail ?? false; - // Protect custom tags if needed - if (this.options.protectCustomTagNewlines) { - const { protected: protectedContent, placeholders } = this.protectCustomTags(content); - const parsed = this.markdownInstance.parse(protectedContent) as string; - return this.restorePlaceholders(parsed, placeholders); - } - - return this.markdownInstance.parse(content) as string; + const { protected: protectedContent, placeholders } = this.protectCustomTags(content); + const parsed = this.markdownInstance.parse(protectedContent) as string; + return this.restorePlaceholders(parsed, placeholders); } } diff --git a/packages/x-markdown/src/XMarkdown/index.tsx b/packages/x-markdown/src/XMarkdown/index.tsx index 8f005c195..cc3c4bf33 100644 --- a/packages/x-markdown/src/XMarkdown/index.tsx +++ b/packages/x-markdown/src/XMarkdown/index.tsx @@ -21,6 +21,7 @@ const XMarkdown: React.FC = React.memo((props) => { openLinksInNewTab, dompurifyConfig, protectCustomTagNewlines, + rawCustomComponents, escapeRawHtml, debug, } = props; @@ -61,6 +62,7 @@ const XMarkdown: React.FC = React.memo((props) => { openLinksInNewTab, components: mergedComponents, protectCustomTagNewlines, + rawCustomComponents, escapeRawHtml, }), [ @@ -69,6 +71,7 @@ const XMarkdown: React.FC = React.memo((props) => { openLinksInNewTab, mergedComponents, protectCustomTagNewlines, + rawCustomComponents, escapeRawHtml, ], ); diff --git a/packages/x-markdown/src/XMarkdown/interface.ts b/packages/x-markdown/src/XMarkdown/interface.ts index c7d10c52b..1a877fd94 100644 --- a/packages/x-markdown/src/XMarkdown/interface.ts +++ b/packages/x-markdown/src/XMarkdown/interface.ts @@ -179,6 +179,12 @@ interface XMarkdownProps { * @default false */ protectCustomTagNewlines?: boolean; + /** + * @description 是否将 components 命中的 HTML 标签内部内容按原始纯文本处理,不再继续解析其中的 Markdown 或 HTML + * @description Whether to treat children of HTML tags matched by components as raw plain text, without further parsing Markdown or HTML inside + * @default false + */ + rawCustomComponents?: boolean; /** * @description 是否将 Markdown 中的原始 HTML 转义为纯文本展示(不解析为真实 HTML),避免 XSS 同时保留内容 * @description Whether to escape raw HTML in Markdown as plain text (not parsed as real HTML), avoiding XSS while preserving content @@ -194,11 +200,11 @@ interface XMarkdownProps { } export type { - XMarkdownProps, - Token, - Tokens, - StreamStatus, ComponentProps, StreamingOption, + StreamStatus, TailConfig, + Token, + Tokens, + XMarkdownProps, }; diff --git a/packages/x/docs/x-markdown/components.en-US.md b/packages/x/docs/x-markdown/components.en-US.md index c3972ab68..fac2a47a7 100644 --- a/packages/x/docs/x-markdown/components.en-US.md +++ b/packages/x/docs/x-markdown/components.en-US.md @@ -38,6 +38,8 @@ import { Mermaid, Think, XMarkdown } from '@ant-design/x'; 3. If data depends on complete syntax, fetch or parse after `streamStatus === 'done'`. 4. Keep custom tags semantically clear and avoid ambiguous mixed Markdown/HTML blocks. +By default, native HTML tags still follow CommonMark rules, so Markdown inside inline HTML can continue to be parsed. Set `rawCustomComponents` when tags registered in `components` should receive their inner content as raw plain text. + ## FAQ: Custom Tag Closing Issues If block-level custom tags contain unexpected blank lines, Markdown parsers may end the HTML block early and convert trailing content into paragraphs. To avoid this: diff --git a/packages/x/docs/x-markdown/components.zh-CN.md b/packages/x/docs/x-markdown/components.zh-CN.md index 71d128978..c96ee0f5f 100644 --- a/packages/x/docs/x-markdown/components.zh-CN.md +++ b/packages/x/docs/x-markdown/components.zh-CN.md @@ -38,6 +38,8 @@ import { Mermaid, Think, XMarkdown } from '@ant-design/x'; 3. 依赖完整语法的数据解析,尽量在 `streamStatus === 'done'` 后执行。 4. 自定义标签命名尽量语义化,减少 Markdown 与 HTML 混写歧义。 +默认情况下,原生 HTML 标签仍遵循 CommonMark 规则,行内 HTML 内部的 Markdown 可能继续被解析。如果希望 `components` 注册的标签内部内容按原始纯文本传给组件,可以开启 `rawCustomComponents`。 + ## FAQ: 自定义标签闭合异常 如果块级自定义标签内部出现不符合预期的空行,Markdown 解析器可能提前结束 HTML 块,后续内容会被当作普通段落处理。建议: diff --git a/packages/x/docs/x-markdown/examples.en-US.md b/packages/x/docs/x-markdown/examples.en-US.md index 87556c5a2..3a373f654 100644 --- a/packages/x/docs/x-markdown/examples.en-US.md +++ b/packages/x/docs/x-markdown/examples.en-US.md @@ -37,6 +37,7 @@ Use this page to get a minimal setup for rendering LLM Markdown output. | openLinksInNewTab | Add `target="_blank"` to all links so they open in a new tab | `boolean` | `false` | | dompurifyConfig | DOMPurify config for HTML sanitization and XSS protection | [`DOMPurify.Config`](https://github.com/cure53/DOMPurify#can-i-configure-dompurify) | - | | protectCustomTagNewlines | Whether to preserve newlines inside custom tags | `boolean` | `false` | +| rawCustomComponents | Treat children of HTML tags matched by `components` as raw plain text, without parsing Markdown or HTML inside | `boolean` | `false` | | escapeRawHtml | Escape raw HTML in Markdown as plain text (do not parse as real HTML), to prevent XSS while keeping content visible | `boolean` | `false` | | debug | Enable debug mode (performance overlay) | `boolean` | `false` | diff --git a/packages/x/docs/x-markdown/examples.zh-CN.md b/packages/x/docs/x-markdown/examples.zh-CN.md index 5ef292a19..3406f6e69 100644 --- a/packages/x/docs/x-markdown/examples.zh-CN.md +++ b/packages/x/docs/x-markdown/examples.zh-CN.md @@ -37,6 +37,7 @@ packageName: x-markdown | openLinksInNewTab | 是否为所有链接添加 `target="_blank"` 并在新标签页打开 | `boolean` | `false` | | dompurifyConfig | HTML 净化与 XSS 防护的 DOMPurify 配置 | [`DOMPurify.Config`](https://github.com/cure53/DOMPurify#can-i-configure-dompurify) | - | | protectCustomTagNewlines | 是否保留自定义标签内部的换行 | `boolean` | `false` | +| rawCustomComponents | 是否将 `components` 命中的 HTML 标签内部内容按原始纯文本处理,不再继续解析其中的 Markdown 或 HTML | `boolean` | `false` | | escapeRawHtml | 是否将 Markdown 中的原始 HTML 转义为纯文本展示(不解析为真实 HTML),用于防 XSS 同时保留内容 | `boolean` | `false` | | debug | 是否开启调试模式(显示性能监控浮层) | `boolean` | `false` |