diff --git a/package-lock.json b/package-lock.json index 86d5718..c38276a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4218,6 +4218,73 @@ "void-elements": "^2.0.0" } }, + "node_modules/dom-serializer": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-3.1.1.tgz", + "integrity": "sha512-4MEa38/QexBob6gFNwu+EGdWvhJ1OKuNwdYY3Y3NyeWDQfnGeDYQUDfIRzWu5B5gsv03so2Uxd28YC6zrsx3Lw==", + "license": "MIT", + "dependencies": { + "domelementtype": "^3.0.0", + "domhandler": "^6.0.0", + "entities": "^8.0.0" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-3.0.0.tgz", + "integrity": "sha512-umCQid3jKbDmVjx8jGaW7uUykm4DEUeyV21hPxNMo2nV955DhUThwqyOIDtreepP31hl84X7G5U9ZfsWvIB3Pg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/domhandler": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-6.0.1.tgz", + "integrity": "sha512-gYzvtM72ZtxQO0T048kd6HWSbbGCNOUwcnfQ01cqIJ4X2IYKFFHZ5mKvrQETcFXxsRObZulDaKmy//R7TPtsBg==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^3.0.0" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-4.0.2.tgz", + "integrity": "sha512-qI4JLRKnSzqFqr7hAlS5xQDusBCjKSEG4t4+7aNrIQMHBcsC2TGEhuyABJdYkgSewL57PNLYEiibY2iPKhKpaA==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^3.0.0", + "domelementtype": "^3.0.0", + "domhandler": "^6.0.0" + }, + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -4341,7 +4408,6 @@ "version": "8.0.0", "resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz", "integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==", - "dev": true, "license": "BSD-2-Clause", "engines": { "node": ">=20.19.0" @@ -5517,6 +5583,28 @@ "dev": true, "license": "MIT" }, + "node_modules/htmlparser2": { + "version": "12.0.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-12.0.0.tgz", + "integrity": "sha512-Tz7u1i95/g2x2jz81+x0FBVhBhY5aRTvD3tXXdFaljuNdzDLJ8UGNRrTcj2cgQvAg3iW/h77Fz15nLW0L0CrZw==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^3.0.0", + "domhandler": "^6.0.0", + "domutils": "^4.0.2", + "entities": "^8.0.0" + }, + "engines": { + "node": ">=20.19.0" + } + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -10065,6 +10153,9 @@ "packages/linkify-html": { "version": "4.3.2", "license": "MIT", + "dependencies": { + "htmlparser2": "^12.0.0" + }, "devDependencies": { "@nfrasser/simple-html-tokenizer": "==0.5.11-4" }, diff --git a/package.json b/package.json index 104ffe0..66f078f 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,6 @@ "@babel/preset-env": "^7.13.10", "@eslint/eslintrc": "^3.1.0", "@eslint/js": "^9.13.0", - "@nfrasser/simple-html-tokenizer": "==0.5.11-4", "@rollup/plugin-babel": "^7.0.0", "@rollup/plugin-commonjs": "^29.0.2", "@rollup/plugin-node-resolve": "^16.0.1", diff --git a/packages/linkify-html/package.json b/packages/linkify-html/package.json index ea92ad1..14a7550 100644 --- a/packages/linkify-html/package.json +++ b/packages/linkify-html/package.json @@ -8,7 +8,7 @@ "scripts": { "build": "rollup -c rollup.config.js", "clean": "rm -rf lib dist *.tgz *.d.ts", - "copy:license": "copyfiles -f ../../node_modules/@nfrasser/simple-html-tokenizer/LICENSE dist/simple-html-tokenizer", + "copy:license": "copyfiles -f ../../node_modules/htmlparser2/LICENSE dist/htmlparser2", "prepack": "run-s clean build tsc copy:license", "tsc": "tsc", "test": "echo \"Error: no test specified\" && exit 1" @@ -31,7 +31,7 @@ }, "homepage": "https://linkify.js.org", "devDependencies": { - "@nfrasser/simple-html-tokenizer": "==0.5.11-4" + "htmlparser2": "^12.0.0" }, "peerDependencies": { "linkifyjs": "^4.0.0" diff --git a/packages/linkify-html/src/linkify-html.mjs b/packages/linkify-html/src/linkify-html.mjs index 8f05afb..11f3b61 100644 --- a/packages/linkify-html/src/linkify-html.mjs +++ b/packages/linkify-html/src/linkify-html.mjs @@ -1,12 +1,51 @@ -import { tokenize as htmlTokenize } from '@nfrasser/simple-html-tokenizer'; +import { Parser as HTMLParser } from 'htmlparser2'; import { tokenize, Options } from 'linkifyjs'; -const LinkifyResult = 'LinkifyResult'; -const StartTag = 'StartTag'; -const EndTag = 'EndTag'; -const Chars = 'Chars'; -const Comment = 'Comment'; -const Doctype = 'Doctype'; +// Known void elements in HTML5 — these never need a closing tag +const VOID_ELEMENTS = new Set([ + 'area', + 'base', + 'br', + 'col', + 'embed', + 'hr', + 'img', + 'input', + 'link', + 'meta', + 'param', + 'source', + 'track', + 'wbr', +]); + +// Elements with optional end tags in HTML5 — these may be implicitly closed +// by the parser (auto-closed), so an immediate implied close is NOT self-closing +const OPTIONAL_END_TAG = new Set([ + 'body', + 'caption', + 'col', + 'colgroup', + 'dd', + 'dt', + 'head', + 'html', + 'li', + 'optgroup', + 'option', + 'p', + 'rb', + 'rp', + 'rt', + 'rtc', + 'summary', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', +]); /** * @param {string} str html string to link @@ -14,156 +53,158 @@ const Doctype = 'Doctype'; * @returns {string} resulting string */ export default function linkifyHtml(str, opts = {}) { - // `tokens` and `token` in this section refer to tokens generated by the - // HTML parser, not linkify's parser - const tokens = htmlTokenize(str); - const linkifiedTokens = []; - const linkified = []; - const options = new Options(opts, defaultRender); + const output = []; - // Linkify the tokens given by the parser - for (let i = 0; i < tokens.length; i++) { - const token = tokens[i]; + // Stack of uppercase tag names whose content should not be linkified + const ignoreDepth = []; - if (token.type === StartTag) { - linkifiedTokens.push(token); + // Pending open tag: stored until we know if it's self-closing or has content + let pendingTag = null; - // Ignore all the contents of ignored tags - const tagName = token.tagName.toUpperCase(); - const isIgnored = tagName === 'A' || options.ignoreTags.indexOf(tagName) >= 0; - if (!isIgnored) { - continue; - } + // Buffer for text accumulation — htmlparser2 may split text at entity boundaries + let textBuffer = ''; - let preskipLen = linkifiedTokens.length; - skipTagTokens(tagName, tokens, ++i, linkifiedTokens); - i += linkifiedTokens.length - preskipLen - 1; - } else if (token.type !== Chars) { - // Skip this token, it's not important - linkifiedTokens.push(token); - } else { - // Valid text token, linkify it! - const linkifedChars = linkifyChars(token.chars, options); - linkifiedTokens.push.apply(linkifiedTokens, linkifedChars); + function flushPendingTag() { + if (pendingTag) { + output.push(renderOpenTag(pendingTag.name, pendingTag.attrs, false)); + pendingTag = null; } } - // Convert the tokens back into a string - for (let i = 0; i < linkifiedTokens.length; i++) { - const token = linkifiedTokens[i]; - switch (token.type) { - case LinkifyResult: - linkified.push(token.rendered); - break; - case StartTag: { - let link = '<' + token.tagName; - if (token.attributes.length > 0) { - link += ' ' + attributeArrayToStrings(token.attributes).join(' '); - } - if (token.selfClosing) { - link += ' /'; - } - link += '>'; - linkified.push(link); - break; + function flushTextBuffer() { + if (!textBuffer) { + return; + } + const text = textBuffer; + textBuffer = ''; + if (ignoreDepth.length > 0) { + // Inside an ignored tag: output escaped text, do not linkify + output.push(escapeText(text)); + } else { + const items = linkifyChars(text, options); + for (let i = 0; i < items.length; i++) { + output.push(items[i]); } - case EndTag: - linkified.push(`${token.tagName}>`); - break; - case Chars: - linkified.push(escapeText(token.chars)); - break; - case Comment: - linkified.push(``); - break; - case Doctype: { - let doctype = `'; - linkified.push(doctype); + } + } + + function renderOpenTag(name, attrs, selfClosing) { + let tag = `<${name}`; + for (const attr in attrs) { + tag += ` ${attr}="${escapeAttr(String(attrs[attr]))}"`; + } + tag += selfClosing ? ' />' : '>'; + return tag; + } + + function pushIgnoreTag(name) { + ignoreDepth.push(name.toUpperCase()); + } + + function popIgnoreTag(name) { + const upper = name.toUpperCase(); + for (let i = ignoreDepth.length - 1; i >= 0; i--) { + if (ignoreDepth[i] === upper) { + ignoreDepth.splice(i, 1); break; } } } - return linkified.join(''); + const parser = new HTMLParser({ + onprocessinginstruction(name, data) { + flushTextBuffer(); + flushPendingTag(); + // Reconstruct the processing instruction / doctype using raw `data` + output.push(`<${data}>`); + }, + + onopentag(name, attrs) { + flushTextBuffer(); + flushPendingTag(); + const tagNameUpper = name.toUpperCase(); + const isIgnored = tagNameUpper === 'A' || options.ignoreTags.indexOf(tagNameUpper) >= 0; + if (isIgnored) { + pushIgnoreTag(name); + } + pendingTag = { name, attrs }; + }, + + onclosetag(name, isImplied) { + flushTextBuffer(); + if (pendingTag && pendingTag.name === name) { + // Close fired before any content was emitted for this tag. + if (!isImplied) { + // Explicit close tag (e.g. or
) + output.push(renderOpenTag(name, pendingTag.attrs, false)); + output.push(`${name}>`); + } else if (VOID_ELEMENTS.has(name)) { + // Void element — output bare open tag (no slash) + output.push(renderOpenTag(name, pendingTag.attrs, false)); + } else if (OPTIONAL_END_TAG.has(name)) { + // Optional-end-tag element that was auto-closed by the parser — + // treat as a regular open (not self-closing) + output.push(renderOpenTag(name, pendingTag.attrs, false)); + } else { + // Non-void, non-optional element (SVG/MathML/custom) — self-closing + output.push(renderOpenTag(name, pendingTag.attrs, true)); + } + pendingTag = null; + } else { + flushPendingTag(); + if (!isImplied) { + output.push(`${name}>`); + } + } + popIgnoreTag(name); + }, + + ontext(text) { + // Flush the pending open tag before accumulating text + flushPendingTag(); + textBuffer += text; + }, + + oncomment(data) { + flushTextBuffer(); + flushPendingTag(); + output.push(``); + }, + }); + + parser.write(str); + parser.end(); + + // Flush any remaining content after parsing is complete + flushTextBuffer(); + flushPendingTag(); + + return output.join(''); } /** - `tokens` and `token` in this section referes to tokens returned by - `linkify.tokenize`. `linkified` will contain HTML Parser-style tokens - @param {string} - @param {import('linkifyjs').Options} -*/ + * Linkify a plain-text string, returning an array of HTML output strings. + * @param {string} str + * @param {import('linkifyjs').Options} options + * @returns {string[]} + */ function linkifyChars(str, options) { const tokens = tokenize(str); const result = []; - for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; if (token.t === 'nl' && options.get('nl2br')) { - result.push({ - type: StartTag, - tagName: 'br', - attributes: [], - selfClosing: true, - }); + result.push('