diff --git a/package-lock.json b/package-lock.json index 50ecc9d88..d4abbfbbe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,6 +26,7 @@ "@uiw/color-convert": "^2.10.1", "@uiw/react-color-block": "^2.10.1", "@uiw/react-color-colorful": "^2.9.2", + "@xenova/transformers": "^2.17.2", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "dnd-timeline": "^2.4.0", @@ -1772,6 +1773,15 @@ "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==", "license": "MIT" }, + "node_modules/@huggingface/jinja": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz", + "integrity": "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/@isaacs/fs-minipass": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", @@ -2104,6 +2114,70 @@ "dev": true, "license": "MIT" }, + "node_modules/@protobufjs/aspromise": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", + "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/base64": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", + "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/codegen": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz", + "integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/eventemitter": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", + "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/fetch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", + "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.1", + "@protobufjs/inquire": "^1.1.0" + } + }, + "node_modules/@protobufjs/float": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", + "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/inquire": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz", + "integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/path": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", + "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/pool": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", + "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", + "license": "BSD-3-Clause" + }, + "node_modules/@protobufjs/utf8": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz", + "integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==", + "license": "BSD-3-Clause" + }, "node_modules/@radix-ui/number": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz", @@ -3822,6 +3896,12 @@ "@types/node": "*" } }, + "node_modules/@types/long": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz", + "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==", + "license": "MIT" + }, "node_modules/@types/ms": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", @@ -3833,7 +3913,6 @@ "version": "22.19.17", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.17.tgz", "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~6.21.0" @@ -4293,6 +4372,20 @@ "integrity": "sha512-RPmm6kgRbI8e98zSD3RVACvnuktIja5+yLgDAkTmxLr90BEwdTXRQWNLF3ETTTyH/8mKhznZuN5AveXYFEsMGQ==", "license": "BSD-3-Clause" }, + "node_modules/@xenova/transformers": { + "version": "2.17.2", + "resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.17.2.tgz", + "integrity": "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ==", + "license": "Apache-2.0", + "dependencies": { + "@huggingface/jinja": "^0.2.2", + "onnxruntime-web": "1.14.0", + "sharp": "^0.32.0" + }, + "optionalDependencies": { + "onnxruntime-node": "1.14.0" + } + }, "node_modules/@xmldom/xmldom": { "version": "0.8.13", "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.13.tgz", @@ -4763,11 +4856,101 @@ "node": "18 || 20 || >=22" } }, + "node_modules/bare-events": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz", + "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", + "license": "Apache-2.0", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.7.1", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.7.1.tgz", + "integrity": "sha512-WDRsyVN52eAx/lBamKD6uyw8H4228h/x0sGGGegOamM2cd7Pag88GfMQalobXI+HaEUxpCkbKQUDOQqt9wawRw==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.9.1", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.9.1.tgz", + "integrity": "sha512-6M5XjcnsygQNPMCMPXSK379xrJFiZ/AEMNBmFEmQW8d/789VQATvriyi5r0HYTL9TkQ26rn3kgdTG3aisbrXkQ==", + "license": "Apache-2.0", + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "license": "Apache-2.0", + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.13.1", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.13.1.tgz", + "integrity": "sha512-Vp0cnjYyrEC4whYTymQ+YZi6pBpfiICZO3cfRG8sy67ZNWe951urv1x4eW1BKNngw3U+3fPYb5JQvHbCtxH7Ow==", + "license": "Apache-2.0", + "dependencies": { + "streamx": "^2.25.0", + "teex": "^1.0.1" + }, + "peerDependencies": { + "bare-abort-controller": "*", + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + }, + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.4.3.tgz", + "integrity": "sha512-Kccpc7ACfXaxfeInfqKcZtW4pT5YBn1mesc4sCsun6sRwtbJ4h+sNOaksUpYEJUKfN65YWC6Bw2OJEFiKxq8nQ==", + "license": "Apache-2.0", + "dependencies": { + "bare-path": "^3.0.0" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, "funding": [ { "type": "github", @@ -4819,6 +5002,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "license": "MIT", + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, "node_modules/boolean": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz", @@ -4891,7 +5085,6 @@ "version": "5.7.1", "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", - "dev": true, "funding": [ { "type": "github", @@ -4907,7 +5100,6 @@ } ], "license": "MIT", - "optional": true, "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.1.13" @@ -5306,11 +5498,23 @@ "node": ">=6" } }, + "node_modules/color": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz", + "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1", + "color-string": "^1.9.0" + }, + "engines": { + "node": ">=12.5.0" + } + }, "node_modules/color-convert": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -5323,9 +5527,18 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, "license": "MIT" }, + "node_modules/color-string": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz", + "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==", + "license": "MIT", + "dependencies": { + "color-name": "^1.0.0", + "simple-swizzle": "^0.2.2" + } + }, "node_modules/colorette": { "version": "2.0.20", "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz", @@ -5529,7 +5742,6 @@ "version": "6.0.0", "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "dev": true, "license": "MIT", "dependencies": { "mimic-response": "^3.1.0" @@ -5545,7 +5757,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=10" @@ -5554,6 +5765,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "license": "MIT", + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/defer-to-connect": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/defer-to-connect/-/defer-to-connect-2.0.1.tgz", @@ -5622,6 +5842,15 @@ "node": ">=6" } }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, "node_modules/detect-node": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz", @@ -6096,7 +6325,6 @@ "version": "1.4.5", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", - "dev": true, "license": "MIT", "dependencies": { "once": "^1.4.0" @@ -6289,6 +6517,24 @@ "license": "MIT", "peer": true }, + "node_modules/events-universal": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz", + "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.7.0" + } + }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "license": "(MIT OR WTFPL)", + "engines": { + "node": ">=6" + } + }, "node_modules/expect-type": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz", @@ -6368,6 +6614,12 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", + "license": "MIT" + }, "node_modules/fast-glob": { "version": "3.3.3", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", @@ -6503,6 +6755,12 @@ "integrity": "sha512-IKlE+pNvL2R+kVL1kEhUYqRxVqeFnjiIvHWDMLFXNaqyUdFXQM2wte44EfMYJNHkW16X991t2Zg8apKkhv7OBA==", "license": "MIT" }, + "node_modules/flatbuffers": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz", + "integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==", + "license": "SEE LICENSE IN LICENSE.txt" + }, "node_modules/form-data": { "version": "4.0.5", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", @@ -6561,6 +6819,12 @@ } } }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "license": "MIT" + }, "node_modules/fs-extra": { "version": "8.1.0", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-8.1.0.tgz", @@ -6716,6 +6980,12 @@ "js-binary-schema-parser": "^2.0.3" } }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "license": "MIT" + }, "node_modules/glob": { "version": "7.2.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", @@ -6883,6 +7153,12 @@ "integrity": "sha512-dMW4CWBTUK1AEEDeZc1g4xpPGIrSf9fJF960qbTZmN/QwZIWY5wgliS6JWl9/25fpTGJrMRtSjGtOmPnfjZB+A==", "license": "Standard 'no charge' license: https://gsap.com/standard-license." }, + "node_modules/guid-typescript": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz", + "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==", + "license": "ISC" + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -7093,7 +7369,6 @@ "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", - "dev": true, "funding": [ { "type": "github", @@ -7108,8 +7383,7 @@ "url": "https://feross.org/support" } ], - "license": "BSD-3-Clause", - "optional": true + "license": "BSD-3-Clause" }, "node_modules/indent-string": { "version": "4.0.0", @@ -7137,9 +7411,20 @@ "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", - "dev": true, "license": "ISC" }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "license": "ISC" + }, + "node_modules/is-arrayish": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz", + "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==", + "license": "MIT" + }, "node_modules/is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -7652,6 +7937,12 @@ "url": "https://github.com/chalk/slice-ansi?sponsor=1" } }, + "node_modules/long": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz", + "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==", + "license": "Apache-2.0" + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -7884,7 +8175,6 @@ "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", - "dev": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -7927,6 +8217,12 @@ "mkdirp": "bin/cmd.js" } }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "license": "MIT" + }, "node_modules/motion": { "version": "12.38.0", "resolved": "https://registry.npmjs.org/motion/-/motion-12.38.0.tgz", @@ -8023,6 +8319,12 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "license": "MIT" + }, "node_modules/node-abi": { "version": "4.28.0", "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-4.28.0.tgz", @@ -8256,7 +8558,6 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", - "dev": true, "license": "ISC", "dependencies": { "wrappy": "1" @@ -8278,6 +8579,50 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/onnx-proto": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz", + "integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==", + "license": "MIT", + "dependencies": { + "protobufjs": "^6.8.8" + } + }, + "node_modules/onnxruntime-common": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.14.0.tgz", + "integrity": "sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew==", + "license": "MIT" + }, + "node_modules/onnxruntime-node": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz", + "integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==", + "license": "MIT", + "optional": true, + "os": [ + "win32", + "darwin", + "linux" + ], + "dependencies": { + "onnxruntime-common": "~1.14.0" + } + }, + "node_modules/onnxruntime-web": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz", + "integrity": "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw==", + "license": "MIT", + "dependencies": { + "flatbuffers": "^1.12.0", + "guid-typescript": "^1.0.9", + "long": "^4.0.0", + "onnx-proto": "^4.0.4", + "onnxruntime-common": "~1.14.0", + "platform": "^1.3.6" + } + }, "node_modules/p-cancelable": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz", @@ -8470,6 +8815,12 @@ "integrity": "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==", "license": "MIT" }, + "node_modules/platform": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", + "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", + "license": "MIT" + }, "node_modules/playwright": { "version": "1.59.1", "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz", @@ -8713,6 +9064,91 @@ "node": "^12.20.0 || >=14" } }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", + "license": "MIT", + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/prebuild-install/node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "license": "ISC" + }, + "node_modules/prebuild-install/node_modules/node-abi": { + "version": "3.92.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.92.0.tgz", + "integrity": "sha512-KdHvFWZjEKDf0cakgFjebl371GPsISX2oZHcuyKqM7DtogIsHrqKeLTo8wBHxaXRAQlY2PsPlZmfo+9ZCxEREQ==", + "license": "MIT", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/prebuild-install/node_modules/semver": { + "version": "7.8.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.0.tgz", + "integrity": "sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/prebuild-install/node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "license": "MIT", + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/prebuild-install/node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "license": "MIT", + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/pretty-format": { "version": "27.5.1", "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-27.5.1.tgz", @@ -8806,11 +9242,36 @@ "signal-exit": "^3.0.2" } }, + "node_modules/protobufjs": { + "version": "6.11.6", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.6.tgz", + "integrity": "sha512-k8BHqgPBOtrlougZZqF2uUk5Z7bN8f0wj+3e8M3hvtSv0NBAz4VBy5f6R5Nxq/l+i7mRFTgNZb2trxqTpHNY/A==", + "hasInstallScript": true, + "license": "BSD-3-Clause", + "dependencies": { + "@protobufjs/aspromise": "^1.1.2", + "@protobufjs/base64": "^1.1.2", + "@protobufjs/codegen": "^2.0.4", + "@protobufjs/eventemitter": "^1.1.0", + "@protobufjs/fetch": "^1.1.0", + "@protobufjs/float": "^1.0.2", + "@protobufjs/inquire": "^1.1.0", + "@protobufjs/path": "^1.1.2", + "@protobufjs/pool": "^1.1.0", + "@protobufjs/utf8": "^1.1.0", + "@types/long": "^4.0.1", + "@types/node": ">=13.7.0", + "long": "^4.0.0" + }, + "bin": { + "pbjs": "bin/pbjs", + "pbts": "bin/pbts" + } + }, "node_modules/pump": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", - "dev": true, "license": "MIT", "dependencies": { "end-of-stream": "^1.1.0", @@ -8893,6 +9354,21 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, "node_modules/re-resizable": { "version": "6.11.2", "resolved": "https://registry.npmjs.org/re-resizable/-/re-resizable-6.11.2.tgz", @@ -9091,6 +9567,20 @@ "pify": "^2.3.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -9373,6 +9863,26 @@ "queue-microtask": "^1.2.2" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -9457,6 +9967,47 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/sharp": { + "version": "0.32.6", + "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz", + "integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "color": "^4.2.3", + "detect-libc": "^2.0.2", + "node-addon-api": "^6.1.0", + "prebuild-install": "^7.1.1", + "semver": "^7.5.4", + "simple-get": "^4.0.1", + "tar-fs": "^3.0.4", + "tunnel-agent": "^0.6.0" + }, + "engines": { + "node": ">=14.15.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/sharp/node_modules/node-addon-api": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz", + "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==", + "license": "MIT" + }, + "node_modules/sharp/node_modules/semver": { + "version": "7.8.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.0.tgz", + "integrity": "sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -9570,6 +10121,60 @@ "dev": true, "license": "ISC" }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, + "node_modules/simple-swizzle": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz", + "integrity": "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==", + "license": "MIT", + "dependencies": { + "is-arrayish": "^0.3.1" + } + }, "node_modules/simple-update-notifier": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-2.0.0.tgz", @@ -9711,6 +10316,26 @@ "dev": true, "license": "MIT" }, + "node_modules/streamx": { + "version": "2.25.0", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.25.0.tgz", + "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==", + "license": "MIT", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-argv": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/string-argv/-/string-argv-0.3.2.tgz", @@ -9791,6 +10416,15 @@ "node": ">=8" } }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/sucrase": { "version": "3.35.1", "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz", @@ -9949,6 +10583,46 @@ "node": ">=18" } }, + "node_modules/tar-fs": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.2.tgz", + "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/tar-stream": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.2.0.tgz", + "integrity": "sha512-ojzvCvVaNp6aOTFmG7jaRD0meowIAuPc3cMMhSgKiVWws1GyHbGd/xvnyuRKcKlMpt3qvxx6r0hreCNITP9hIg==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/tar-stream/node_modules/b4a": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.1.tgz", + "integrity": "sha512-aiqre1Nr0B/6DgE2N5vwTc+2/oQZ4Wh1t4NznYY4E00y8LCt6NqdRv81so00oo27D8MVKTpUa/MwUUtBLXCoDw==", + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, "node_modules/tar/node_modules/yallist": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", @@ -9959,6 +10633,15 @@ "node": ">=18" } }, + "node_modules/teex": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/teex/-/teex-1.0.1.tgz", + "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", + "license": "MIT", + "dependencies": { + "streamx": "^2.12.5" + } + }, "node_modules/temp": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/temp/-/temp-0.9.4.tgz", @@ -10049,6 +10732,29 @@ "dev": true, "license": "MIT" }, + "node_modules/text-decoder": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", + "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, + "node_modules/text-decoder/node_modules/b4a": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.8.1.tgz", + "integrity": "sha512-aiqre1Nr0B/6DgE2N5vwTc+2/oQZ4Wh1t4NznYY4E00y8LCt6NqdRv81so00oo27D8MVKTpUa/MwUUtBLXCoDw==", + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, "node_modules/thenify": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz", @@ -10252,6 +10958,18 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, "node_modules/type-fest": { "version": "0.13.1", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz", @@ -10294,7 +11012,6 @@ "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, "license": "MIT" }, "node_modules/universalify": { @@ -10806,7 +11523,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", - "dev": true, "license": "ISC" }, "node_modules/ws": { diff --git a/package.json b/package.json index fd0c4cf3d..16a013804 100644 --- a/package.json +++ b/package.json @@ -63,6 +63,7 @@ "@uiw/color-convert": "^2.10.1", "@uiw/react-color-block": "^2.10.1", "@uiw/react-color-colorful": "^2.9.2", + "@xenova/transformers": "^2.17.2", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "dnd-timeline": "^2.4.0", diff --git a/src/App.tsx b/src/App.tsx index 6c36aa8c5..0c8875d04 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -112,7 +112,7 @@ export default function App() { return ( {content} - + ); } diff --git a/src/components/ui/select.tsx b/src/components/ui/select.tsx index d151d164e..bdbf64e9a 100644 --- a/src/components/ui/select.tsx +++ b/src/components/ui/select.tsx @@ -82,7 +82,8 @@ const SelectContent = React.forwardRef< ; -const Toaster = ({ ...props }: ToasterProps) => { +const Toaster = ({ className, ...props }: ToasterProps) => { return ( (null); const annotationOnlyRegions = useMemo( @@ -1260,8 +1288,11 @@ export default function VideoEditor() { const handleAnnotationSpanChange = useCallback( (id: string, span: Span) => { - pushState((prev) => ({ - annotationRegions: prev.annotationRegions.map((region) => + pushState((prev) => { + const editedAutoCaption = + prev.annotationRegions.find((region) => region.id === id)?.annotationSource === + "auto-caption"; + const next = prev.annotationRegions.map((region) => region.id === id ? { ...region, @@ -1269,8 +1300,11 @@ export default function VideoEditor() { endMs: Math.round(span.end), } : region, - ), - })); + ); + return { + annotationRegions: editedAutoCaption ? reconcileAutoCaptionTimelineGaps(next) : next, + }; + }); }, [pushState], ); @@ -1283,8 +1317,10 @@ export default function VideoEditor() { const source = prev.annotationRegions.find((region) => region.id === id); if (!source) return {}; + const { annotationSource: _stripCaptionLink, ...sourceWithoutCaptionLink } = source; + const duplicate: AnnotationRegion = { - ...source, + ...sourceWithoutCaptionLink, id: duplicateId, zIndex: duplicateZIndex, position: { x: source.position.x + 4, y: source.position.y + 4 }, @@ -1375,11 +1411,18 @@ export default function VideoEditor() { const handleAnnotationStyleChange = useCallback( (id: string, style: Partial) => { - pushState((prev) => ({ - annotationRegions: prev.annotationRegions.map((region) => - region.id === id ? { ...region, style: { ...region.style, ...style } } : region, - ), - })); + pushState((prev) => { + const touched = prev.annotationRegions.find((r) => r.id === id); + const syncAutoCaptions = touched?.annotationSource === "auto-caption"; + return { + annotationRegions: prev.annotationRegions.map((region) => { + if (syncAutoCaptions && region.annotationSource === "auto-caption") { + return { ...region, style: { ...region.style, ...style } }; + } + return region.id === id ? { ...region, style: { ...region.style, ...style } } : region; + }), + }; + }); }, [pushState], ); @@ -1442,22 +1485,36 @@ export default function VideoEditor() { const handleAnnotationPositionChange = useCallback( (id: string, position: { x: number; y: number }) => { - pushState((prev) => ({ - annotationRegions: prev.annotationRegions.map((region) => - region.id === id ? { ...region, position } : region, - ), - })); + pushState((prev) => { + const moved = prev.annotationRegions.find((r) => r.id === id); + const syncAutoCaptions = moved?.annotationSource === "auto-caption"; + return { + annotationRegions: prev.annotationRegions.map((region) => { + if (syncAutoCaptions && region.annotationSource === "auto-caption") { + return { ...region, position }; + } + return region.id === id ? { ...region, position } : region; + }), + }; + }); }, [pushState], ); const handleAnnotationSizeChange = useCallback( (id: string, size: { width: number; height: number }) => { - pushState((prev) => ({ - annotationRegions: prev.annotationRegions.map((region) => - region.id === id ? { ...region, size } : region, - ), - })); + pushState((prev) => { + const resized = prev.annotationRegions.find((r) => r.id === id); + const syncAutoCaptions = resized?.annotationSource === "auto-caption"; + return { + annotationRegions: prev.annotationRegions.map((region) => { + if (syncAutoCaptions && region.annotationSource === "auto-caption") { + return { ...region, size }; + } + return region.id === id ? { ...region, size } : region; + }), + }; + }); }, [pushState], ); @@ -2018,6 +2075,139 @@ export default function VideoEditor() { } }, []); + const generateAutoCaptions = useCallback( + async (minWords: number, maxWords: number) => { + if (!videoPath) { + toast.error(t("errors.noVideoLoaded")); + return; + } + if (isAutoCaptioningRef.current) { + toast.error(t("autoCaptions.busy")); + return; + } + const minW = Math.max(1, Math.min(minWords, maxWords)); + const maxW = Math.max(minW, maxWords); + + isAutoCaptioningRef.current = true; + setIsAutoCaptioning(true); + toast.loading(t("autoCaptions.generating"), { id: AUTO_CAPTION_PROGRESS_TOAST_ID }); + try { + const { samples, truncated, durationSec } = await extractMono16kFromVideoUrl(videoPath); + if (!Number.isFinite(durationSec) || durationSec <= 0 || samples.length < 800) { + toast.dismiss(AUTO_CAPTION_PROGRESS_TOAST_ID); + toast.error(t("autoCaptions.noAudio")); + return; + } + + const { samples: speechSamples, trimSec } = trimLeadingSilenceMono16k(samples); + if (speechSamples.length < 800) { + toast.dismiss(AUTO_CAPTION_PROGRESS_TOAST_ID); + toast.error(t("autoCaptions.noAudio")); + return; + } + + const trimMs = Math.round(trimSec * 1000); + const trimRegionsForTranscribe = shiftTrimRegionsMsForCaptionBuffer(trimRegions, trimMs); + + const transcribeOptions = { + onStatus: (phase: "model" | "transcribe") => { + if (phase === "model") { + toast.loading(t("autoCaptions.loadingModel"), { + id: AUTO_CAPTION_PROGRESS_TOAST_ID, + }); + } else { + toast.loading(t("autoCaptions.transcribing"), { + id: AUTO_CAPTION_PROGRESS_TOAST_ID, + }); + } + }, + }; + + let { segments: segmentsRaw, granularity } = await transcribeMono16kToSegments( + speechSamples, + { + trimRegions: trimRegionsForTranscribe, + ...transcribeOptions, + }, + ); + let transcribedFromTrimmedBuffer = true; + + // Some recordings come back empty after leading-silence trimming even though the full + // source has recognizable speech. Retry once against the untouched audio buffer before + // giving up so we do not show "no speech detected" for a spoken clip. + if (segmentsRaw.length === 0 && trimSec > 0) { + ({ segments: segmentsRaw, granularity } = await transcribeMono16kToSegments(samples, { + trimRegions, + ...transcribeOptions, + })); + transcribedFromTrimmedBuffer = false; + } + + const segments = + transcribedFromTrimmedBuffer && trimSec > 0 + ? segmentsRaw.map((s) => ({ + ...s, + startSec: s.startSec + trimSec, + endSec: s.endSec + trimSec, + })) + : segmentsRaw; + + let { regions, nextNumericId, nextZIndex } = captionSegmentsToAnnotationRegions( + segments, + nextAnnotationIdRef.current, + nextAnnotationZIndexRef.current, + { + minWordsPerCaption: minW, + maxWordsPerCaption: maxW, + timestampGranularity: granularity, + }, + ); + + if (regions.length === 0 && segments.length > 0) { + ({ regions, nextNumericId, nextZIndex } = captionSegmentsToAnnotationRegions( + segments, + nextAnnotationIdRef.current, + nextAnnotationZIndexRef.current, + { + minWordsPerCaption: 1, + maxWordsPerCaption: Number.MAX_SAFE_INTEGER, + timestampGranularity: granularity, + }, + )); + } + + if (regions.length === 0) { + toast.dismiss(AUTO_CAPTION_PROGRESS_TOAST_ID); + toast.info(t("autoCaptions.noneHeard")); + return; + } + + pushState((prev) => ({ annotationRegions: [...prev.annotationRegions, ...regions] })); + nextAnnotationIdRef.current = nextNumericId; + nextAnnotationZIndexRef.current = nextZIndex; + + toast.dismiss(AUTO_CAPTION_PROGRESS_TOAST_ID); + const minutesTrunc = String(Math.round(MAX_CAPTION_AUDIO_SEC / 60)); + if (truncated) { + toast.success(t("autoCaptions.done", { count: String(regions.length) }), { + description: t("autoCaptions.truncated", { minutes: minutesTrunc }), + }); + } else { + toast.success(t("autoCaptions.done", { count: String(regions.length) })); + } + } catch (e) { + console.error(e); + toast.dismiss(AUTO_CAPTION_PROGRESS_TOAST_ID); + const detail = e instanceof Error ? e.message : String(e); + toast.error(t("autoCaptions.failed"), { description: detail }); + } finally { + isAutoCaptioningRef.current = false; + setIsAutoCaptioning(false); + } + }, + [videoPath, trimRegions, pushState, t], + ); + const handleSaveDiagnostic = useCallback(async () => { const result = await window.electronAPI.saveDiagnostic({ error: exportError ?? "Manual diagnostic export", @@ -2060,7 +2250,7 @@ export default function VideoEditor() { {t("newRecording.title")} @@ -2085,13 +2275,92 @@ export default function VideoEditor() { + + + + {t("autoCaptions.dialogTitle")} + {t("autoCaptions.dialogDescription")} + +
+
+ + +
+
+ + +
+
+ + + + +
+
+
{ + if (!videoPath) { + toast.error(t("errors.noVideoLoaded")); + return; + } + if (isAutoCaptioningRef.current) { + toast.error(t("autoCaptions.busy")); + return; + } + setShowAutoCaptionsDialog(true); + }} />
diff --git a/src/components/video-editor/projectPersistence.ts b/src/components/video-editor/projectPersistence.ts index ff59427f2..1fefa43e9 100644 --- a/src/components/video-editor/projectPersistence.ts +++ b/src/components/video-editor/projectPersistence.ts @@ -333,6 +333,8 @@ export function normalizeProjectEditor(editor: Partial): Pro content: typeof region.content === "string" ? region.content : "", textContent: typeof region.textContent === "string" ? region.textContent : undefined, imageContent: typeof region.imageContent === "string" ? region.imageContent : undefined, + annotationSource: + region.annotationSource === "auto-caption" ? ("auto-caption" as const) : undefined, position: { x: clamp( isFiniteNumber(region.position?.x) diff --git a/src/components/video-editor/timeline/TimelineEditor.tsx b/src/components/video-editor/timeline/TimelineEditor.tsx index f84d038a9..65ebd8bdb 100644 --- a/src/components/video-editor/timeline/TimelineEditor.tsx +++ b/src/components/video-editor/timeline/TimelineEditor.tsx @@ -1,6 +1,7 @@ import type { Range, Span } from "dnd-timeline"; import { useTimelineContext } from "dnd-timeline"; import { + Captions, Check, ChevronDown, Gauge, @@ -92,6 +93,11 @@ interface TimelineEditorProps { onAspectRatioChange: (aspectRatio: AspectRatio) => void; videoUrl?: string; showTrimWaveform?: boolean; + /** Opens the auto-captions flow. When omitted, the captions button is hidden. */ + onGenerateCaptions?: () => void; + isGeneratingCaptions?: boolean; + /** Localized label for the auto-captions button (lives in the `editor` namespace). */ + captionsLabel?: string; } interface TimelineScaleConfig { @@ -924,6 +930,9 @@ export default function TimelineEditor({ onAspectRatioChange, videoUrl, showTrimWaveform = false, + onGenerateCaptions, + isGeneratingCaptions = false, + captionsLabel, }: TimelineEditorProps) { const t = useScopedT("timeline"); const totalMs = useMemo(() => Math.max(0, Math.round(videoDuration * 1000)), [videoDuration]); @@ -1659,6 +1668,18 @@ export default function TimelineEditor({ > + {onGenerateCaptions && ( + + )}
diff --git a/src/components/video-editor/types.ts b/src/components/video-editor/types.ts index 0f2267cca..1aca90af1 100644 --- a/src/components/video-editor/types.ts +++ b/src/components/video-editor/types.ts @@ -288,6 +288,8 @@ export interface AnnotationRegion { size: AnnotationSize; style: AnnotationTextStyle; zIndex: number; + /** When set, layout/style edits on one region can sync to all auto-caption siblings. */ + annotationSource?: "auto-caption"; figureData?: FigureData; blurData?: BlurData; } diff --git a/src/i18n/locales/ar/editor.json b/src/i18n/locales/ar/editor.json index b3e122280..39750e5eb 100644 --- a/src/i18n/locales/ar/editor.json +++ b/src/i18n/locales/ar/editor.json @@ -44,6 +44,25 @@ "permissionDenied": "تم رفض إذن التسجيل. يرجى السماح بتسجيل الشاشة.", "accessibilityAllowAndRetry": "اسمح بوصول تسهيلات الاستخدام لـ OpenScreen، ثم اضغط على التسجيل مرة أخرى لبدء العد التنازلي." }, + "autoCaptions": { + "button": "التسميات التوضيحية التلقائية", + "dialogTitle": "التسميات التوضيحية التلقائية", + "dialogDescription": "اختر تقريبا كم عدد الكلمات التي تظهر في كل تسمية توضيحية. يتم توزيع التوقيت عبر الكلمات في تلك العبارة.", + "minWords": "الحد الأدنى من الكلمات لكل تسمية", + "maxWords": "الحد الأقصى من الكلمات لكل تسمية", + "wordsCount": "{{count}} كلمة", + "generate": "توليد", + "dialogCancel": "إلغاء", + "generating": "جارٍ توليد التسميات من الصوت…", + "loadingModel": "جارٍ تحميل نموذج الكلام (سيتم تنزيل ~75 ميغابايت عند الاستخدام الأول)…", + "transcribing": "جارٍ نسخ الكلام إلى نص…", + "busy": "توليد التسميات قيد التنفيذ بالفعل.", + "done": "تمت إضافة {{count}} تسمية.", + "noneHeard": "لم يتم الكشف عن أي كلام.", + "noAudio": "لا يحتوي هذا الفيديو على صوت صالح للنسخ.", + "failed": "تعذّر توليد التسميات.", + "truncated": "تم نسخ الدقائق الأولى فقط: {{minutes}} دقيقة." + }, "emptyState": { "title": "لا يوجد مشروع مفتوح", "description": "استورد مقطع فيديو للبدء في التحرير، أو حمّل مشروع OpenScreen موجود.", diff --git a/src/i18n/locales/en/editor.json b/src/i18n/locales/en/editor.json index ebd9a5d5f..d6a56f033 100644 --- a/src/i18n/locales/en/editor.json +++ b/src/i18n/locales/en/editor.json @@ -44,6 +44,25 @@ "permissionDenied": "Recording permission denied. Please allow screen recording.", "accessibilityAllowAndRetry": "Allow Accessibility access for OpenScreen, then press record again to start the countdown." }, + "autoCaptions": { + "button": "Auto captions", + "dialogTitle": "Auto captions", + "dialogDescription": "Choose roughly how many words each caption shows at once. Timing is spread across the words in that phrase.", + "minWords": "Minimum words per caption", + "maxWords": "Maximum words per caption", + "wordsCount": "{{count}} words", + "generate": "Generate", + "dialogCancel": "Cancel", + "generating": "Generating captions from audio…", + "loadingModel": "Loading speech model (first use downloads ~75 MB)…", + "transcribing": "Transcribing speech…", + "busy": "Caption generation is already in progress.", + "done": "Added {{count}} captions.", + "noneHeard": "No speech was detected.", + "noAudio": "This video has no usable audio to transcribe.", + "failed": "Could not generate captions.", + "truncated": "Only the first {{minutes}} minutes were transcribed." + }, "emptyState": { "title": "No project open", "description": "Import a video to start editing, or load an existing OpenScreen project.", diff --git a/src/i18n/locales/es/editor.json b/src/i18n/locales/es/editor.json index 16a2c8547..277ce40ff 100644 --- a/src/i18n/locales/es/editor.json +++ b/src/i18n/locales/es/editor.json @@ -44,6 +44,25 @@ "cancel": "Cancelar", "confirm": "Confirmar" }, + "autoCaptions": { + "button": "Subtítulos automáticos", + "dialogTitle": "Subtítulos automáticos", + "dialogDescription": "Elige aproximadamente cuántas palabras muestra cada subtítulo a la vez. El tiempo se reparte entre las palabras de esa frase.", + "minWords": "Número mínimo de palabras por subtítulo", + "maxWords": "Número máximo de palabras por subtítulo", + "wordsCount": "{{count}} palabras", + "generate": "Generar", + "dialogCancel": "Cancelar", + "generating": "Generando subtítulos a partir del audio…", + "loadingModel": "Cargando el modelo de voz (el primer uso descarga ~75 MB)…", + "transcribing": "Transcribiendo el habla…", + "busy": "La generación de subtítulos ya está en curso.", + "done": "Se añadieron {{count}} subtítulos.", + "noneHeard": "No se detectó voz.", + "noAudio": "Este video no tiene audio utilizable para transcribir.", + "failed": "No se pudieron generar los subtítulos.", + "truncated": "Solo se transcribieron los primeros {{minutes}} minutos." + }, "emptyState": { "title": "No hay proyecto abierto", "description": "Importa un video para empezar a editar o carga un proyecto de OpenScreen existente.", diff --git a/src/i18n/locales/fr/editor.json b/src/i18n/locales/fr/editor.json index 4eb57a9cc..40dc24fd7 100644 --- a/src/i18n/locales/fr/editor.json +++ b/src/i18n/locales/fr/editor.json @@ -44,6 +44,25 @@ }, "loadingVideo": "Chargement de la vidéo...", "loadingEditor": "Chargement de l'éditeur...", + "autoCaptions": { + "button": "Sous-titres automatiques", + "dialogTitle": "Sous-titres automatiques", + "dialogDescription": "Choisissez approximativement combien de mots chaque sous-titre affiche à la fois. Le timing est réparti entre les mots de cette phrase.", + "minWords": "Nombre minimum de mots par sous-titre", + "maxWords": "Nombre maximum de mots par sous-titre", + "wordsCount": "{{count}} mots", + "generate": "Générer", + "dialogCancel": "Annuler", + "generating": "Génération des sous-titres à partir de l'audio…", + "loadingModel": "Chargement du modèle vocal (le premier usage télécharge ~75 MB)…", + "transcribing": "Transcription de la parole…", + "busy": "La génération des sous-titres est déjà en cours.", + "done": "{{count}} sous-titres ajoutés.", + "noneHeard": "Aucune parole n'a été détectée.", + "noAudio": "Cette vidéo ne contient pas d'audio exploitable pour la transcription.", + "failed": "Impossible de générer les sous-titres.", + "truncated": "Seules les {{minutes}} premières minutes ont été transcrites." + }, "emptyState": { "title": "Aucun projet ouvert", "description": "Importez une vidéo pour commencer à éditer, ou chargez un projet OpenScreen existant.", diff --git a/src/i18n/locales/it/editor.json b/src/i18n/locales/it/editor.json index 336d3e6ba..0e94b9a9f 100644 --- a/src/i18n/locales/it/editor.json +++ b/src/i18n/locales/it/editor.json @@ -42,5 +42,24 @@ "cameraNotFound": "Fotocamera non trovata.", "permissionDenied": "Autorizzazione di registrazione negata. Consenti la registrazione dello schermo.", "accessibilityAllowAndRetry": "Consenti l'accesso all'accessibilità per OpenScreen, poi premi di nuovo registra per avviare il conto alla rovescia." + }, + "autoCaptions": { + "button": "Sottotitoli automatici", + "dialogTitle": "Sottotitoli automatici", + "dialogDescription": "Scegli all'incirca quante parole mostrare per ogni sottotitolo. La temporizzazione viene distribuita tra le parole della frase.", + "minWords": "Numero minimo di parole per sottotitolo", + "maxWords": "Numero massimo di parole per sottotitolo", + "wordsCount": "{{count}} parole", + "generate": "Genera", + "dialogCancel": "Annulla", + "generating": "Generazione dei sottotitoli dall'audio…", + "loadingModel": "Caricamento del modello vocale (al primo utilizzo vengono scaricati ~75 MB)…", + "transcribing": "Trascrizione del parlato…", + "busy": "La generazione dei sottotitoli è già in corso.", + "done": "Aggiunti {{count}} sottotitoli.", + "noneHeard": "Nessun parlato rilevato.", + "noAudio": "Questo video non contiene audio utilizzabile per la trascrizione.", + "failed": "Impossibile generare i sottotitoli.", + "truncated": "Sono stati trascritti solo i primi {{minutes}} minuti." } } diff --git a/src/i18n/locales/ja-JP/editor.json b/src/i18n/locales/ja-JP/editor.json index 5151d1054..8e0da42e1 100644 --- a/src/i18n/locales/ja-JP/editor.json +++ b/src/i18n/locales/ja-JP/editor.json @@ -44,6 +44,25 @@ "cameraNotFound": "カメラが見つかりません。", "accessibilityAllowAndRetry": "OpenScreenにアクセシビリティアクセスを許可してから、もう一度録画を押してカウントダウンを開始してください。" }, + "autoCaptions": { + "button": "自動キャプション", + "dialogTitle": "自動キャプション", + "dialogDescription": "各キャプションに一度に表示する語数の目安を選びます。タイミングはそのフレーズ内の語に分配されます。", + "minWords": "キャプションあたりの最小語数", + "maxWords": "キャプションあたりの最大語数", + "wordsCount": "{{count}} 語", + "generate": "生成", + "dialogCancel": "キャンセル", + "generating": "音声からキャプションを生成しています…", + "loadingModel": "音声モデルを読み込んでいます(初回利用時は約 75 MB をダウンロードします)…", + "transcribing": "音声を文字起こししています…", + "busy": "キャプションの生成はすでに実行中です。", + "done": "{{count}} 件のキャプションを追加しました。", + "noneHeard": "音声が検出されませんでした。", + "noAudio": "この動画には書き起こしに使える音声がありません。", + "failed": "キャプションを生成できませんでした。", + "truncated": "最初の {{minutes}} 分のみが書き起こされました。" + }, "emptyState": { "title": "プロジェクトが開かれていません", "description": "動画をインポートして編集を開始するか、既存の OpenScreen プロジェクトを読み込んでください。", diff --git a/src/i18n/locales/ko-KR/editor.json b/src/i18n/locales/ko-KR/editor.json index 23990c386..a63a22a57 100644 --- a/src/i18n/locales/ko-KR/editor.json +++ b/src/i18n/locales/ko-KR/editor.json @@ -44,6 +44,25 @@ "cameraNotFound": "카메라를 찾을 수 없습니다.", "accessibilityAllowAndRetry": "OpenScreen의 손쉬운 사용 접근을 허용한 다음, 카운트다운을 시작하려면 다시 녹화를 누르세요." }, + "autoCaptions": { + "button": "자동 자막", + "dialogTitle": "자동 자막", + "dialogDescription": "각 자막에 한 번에 표시할 단어 수의 대략적인 값을 선택하세요. 타이밍은 해당 구문의 단어들에 나뉩니다.", + "minWords": "자막당 최소 단어 수", + "maxWords": "자막당 최대 단어 수", + "wordsCount": "{{count}}개 단어", + "generate": "생성", + "dialogCancel": "취소", + "generating": "오디오에서 자막을 생성하는 중…", + "loadingModel": "음성 모델을 불러오는 중(첫 사용 시 약 75MB 다운로드)…", + "transcribing": "음성을 전사하는 중…", + "busy": "자막 생성이 이미 진행 중입니다.", + "done": "자막 {{count}}개를 추가했습니다.", + "noneHeard": "음성이 감지되지 않았습니다.", + "noAudio": "이 동영상에는 전사에 사용할 수 있는 음성이 없습니다.", + "failed": "자막을 생성할 수 없습니다.", + "truncated": "처음 {{minutes}}분만 전사되었습니다." + }, "emptyState": { "title": "열린 프로젝트 없음", "description": "동영상을 가져와 편집을 시작하거나 기존 OpenScreen 프로젝트를 불러오세요.", diff --git a/src/i18n/locales/pt-BR/editor.json b/src/i18n/locales/pt-BR/editor.json index 7e3f69531..b0e9ab8c9 100644 --- a/src/i18n/locales/pt-BR/editor.json +++ b/src/i18n/locales/pt-BR/editor.json @@ -41,5 +41,24 @@ "cameraDisconnected": "Webcam desconectada.", "cameraNotFound": "Câmera não encontrada.", "permissionDenied": "Permissão de gravação negada. Por favor, permita a gravação de tela." + }, + "autoCaptions": { + "button": "Legendas automáticas", + "dialogTitle": "Legendas automáticas", + "dialogDescription": "Escolha aproximadamente quantas palavras cada legenda mostra de cada vez. O tempo é distribuído entre as palavras da frase.", + "minWords": "Mínimo de palavras por legenda", + "maxWords": "Máximo de palavras por legenda", + "wordsCount": "{{count}} palavras", + "generate": "Gerar", + "dialogCancel": "Cancelar", + "generating": "Gerando legendas a partir do áudio…", + "loadingModel": "Carregando o modelo de fala (o primeiro uso baixa ~75 MB)…", + "transcribing": "Transcrevendo a fala…", + "busy": "A geração de legendas já está em andamento.", + "done": "{{count}} legendas adicionadas.", + "noneHeard": "Nenhuma fala foi detectada.", + "noAudio": "Este vídeo não tem áudio utilizável para transcrição.", + "failed": "Não foi possível gerar as legendas.", + "truncated": "Apenas os primeiros {{minutes}} minutos foram transcritos." } } diff --git a/src/i18n/locales/ru/editor.json b/src/i18n/locales/ru/editor.json index ff0c80b8b..78fa129a1 100644 --- a/src/i18n/locales/ru/editor.json +++ b/src/i18n/locales/ru/editor.json @@ -44,6 +44,25 @@ "permissionDenied": "Разрешение на запись запрещено. Пожалуйста, разрешите запись экрана.", "accessibilityAllowAndRetry": "Разрешите OpenScreen доступ к Универсальному доступу, затем снова нажмите запись, чтобы начать обратный отсчет." }, + "autoCaptions": { + "button": "Автосубтитры", + "dialogTitle": "Автосубтитры", + "dialogDescription": "Выберите, сколько примерно слов показывать в одном субтитре. Время распределяется между словами фразы.", + "minWords": "Минимум слов в субтитре", + "maxWords": "Максимум слов в субтитре", + "wordsCount": "{{count}} слов", + "generate": "Создать", + "dialogCancel": "Отмена", + "generating": "Создание субтитров из звука…", + "loadingModel": "Загрузка речевой модели (при первом запуске скачивается ~75 МБ)…", + "transcribing": "Распознавание речи…", + "busy": "Создание субтитров уже выполняется.", + "done": "Добавлено субтитров: {{count}}.", + "noneHeard": "Речь не обнаружена.", + "noAudio": "В этом видео нет звука, пригодного для расшифровки.", + "failed": "Не удалось создать субтитры.", + "truncated": "Расшифрованы только первые {{minutes}} мин." + }, "emptyState": { "title": "Нет открытых проектов", "description": "Импортируйте видео для начала редактирования или загрузите существующий проект OpenScreen.", diff --git a/src/i18n/locales/tr/editor.json b/src/i18n/locales/tr/editor.json index de45a180f..89203e719 100644 --- a/src/i18n/locales/tr/editor.json +++ b/src/i18n/locales/tr/editor.json @@ -44,6 +44,25 @@ "cancel": "İptal", "confirm": "Onayla" }, + "autoCaptions": { + "button": "Otomatik altyazılar", + "dialogTitle": "Otomatik altyazılar", + "dialogDescription": "Her altyazının aynı anda yaklaşık kaç kelime göstermesini istediğinizi seçin. Zamanlama, o ifadedeki kelimelere dağıtılır.", + "minWords": "Altyazı başına en az kelime", + "maxWords": "Altyazı başına en fazla kelime", + "wordsCount": "{{count}} kelime", + "generate": "Oluştur", + "dialogCancel": "İptal", + "generating": "Sesten altyazılar oluşturuluyor…", + "loadingModel": "Konuşma modeli yükleniyor (ilk kullanımda ~75 MB indirilir)…", + "transcribing": "Konuşma yazıya dökülüyor…", + "busy": "Altyazı oluşturma zaten devam ediyor.", + "done": "{{count}} altyazı eklendi.", + "noneHeard": "Konuşma algılanmadı.", + "noAudio": "Bu videoda yazıya dökülebilecek kullanılabilir bir ses yok.", + "failed": "Altyazılar oluşturulamadı.", + "truncated": "Yalnızca ilk {{minutes}} dakika yazıya döküldü." + }, "emptyState": { "title": "Açık proje yok", "description": "Düzenlemeye başlamak için bir video içe aktarın veya mevcut bir OpenScreen projesi yükleyin.", diff --git a/src/i18n/locales/vi/editor.json b/src/i18n/locales/vi/editor.json index 1875bb559..90004091e 100644 --- a/src/i18n/locales/vi/editor.json +++ b/src/i18n/locales/vi/editor.json @@ -44,6 +44,25 @@ "permissionDenied": "Quyền ghi hình bị từ chối. Vui lòng cho phép ghi màn hình.", "accessibilityAllowAndRetry": "Cho phép OpenScreen truy cập Trợ năng, sau đó nhấn ghi lại để bắt đầu đếm ngược." }, + "autoCaptions": { + "button": "Phụ đề tự động", + "dialogTitle": "Phụ đề tự động", + "dialogDescription": "Chọn khoảng bao nhiêu từ mỗi phụ đề hiển thị cùng lúc. Thời gian được phân bổ cho các từ trong cụm từ đó.", + "minWords": "Số từ tối thiểu mỗi phụ đề", + "maxWords": "Số từ tối đa mỗi phụ đề", + "wordsCount": "{{count}} từ", + "generate": "Tạo", + "dialogCancel": "Hủy", + "generating": "Đang tạo phụ đề từ âm thanh…", + "loadingModel": "Đang tải mô hình giọng nói (lần đầu sử dụng sẽ tải ~75 MB)…", + "transcribing": "Đang chuyển lời nói thành văn bản…", + "busy": "Việc tạo phụ đề đang được tiến hành.", + "done": "Đã thêm {{count}} phụ đề.", + "noneHeard": "Không phát hiện thấy lời nói.", + "noAudio": "Video này không có âm thanh dùng được để chuyển thành văn bản.", + "failed": "Không thể tạo phụ đề.", + "truncated": "Chỉ {{minutes}} phút đầu tiên được chuyển thành văn bản." + }, "emptyState": { "title": "Không có dự án nào được mở", "description": "Nhập video để bắt đầu chỉnh sửa hoặc tải một dự án OpenScreen hiện có.", diff --git a/src/i18n/locales/zh-CN/editor.json b/src/i18n/locales/zh-CN/editor.json index d11f1dd95..58f6ae27b 100644 --- a/src/i18n/locales/zh-CN/editor.json +++ b/src/i18n/locales/zh-CN/editor.json @@ -44,6 +44,25 @@ "permissionDenied": "录屏权限被拒绝。请允许屏幕录制。", "accessibilityAllowAndRetry": "允许 OpenScreen 使用辅助功能权限,然后再次按录制以开始倒计时。" }, + "autoCaptions": { + "button": "自动字幕", + "dialogTitle": "自动字幕", + "dialogDescription": "大致选择每条字幕一次显示多少个字词。时间会在该语句内的字词之间分配。", + "minWords": "每条字幕的最少字数", + "maxWords": "每条字幕的最多字数", + "wordsCount": "{{count}} 个词", + "generate": "生成", + "dialogCancel": "取消", + "generating": "正在从音频生成字幕…", + "loadingModel": "正在加载语音模型(首次使用将下载约 75 MB)…", + "transcribing": "正在转写语音…", + "busy": "字幕生成已在进行中。", + "done": "已添加 {{count}} 条字幕。", + "noneHeard": "未检测到语音。", + "noAudio": "此视频没有可用于转写的音频。", + "failed": "无法生成字幕。", + "truncated": "仅转写了最前 {{minutes}} 分钟。" + }, "emptyState": { "title": "未打开任何项目", "description": "导入视频开始编辑,或加载已有的 OpenScreen 项目。", diff --git a/src/i18n/locales/zh-TW/editor.json b/src/i18n/locales/zh-TW/editor.json index 131518713..8a6485409 100644 --- a/src/i18n/locales/zh-TW/editor.json +++ b/src/i18n/locales/zh-TW/editor.json @@ -44,6 +44,25 @@ "cameraNotFound": "找不到攝影機。", "accessibilityAllowAndRetry": "允許 OpenScreen 使用輔助使用權限,然後再次按下錄製以開始倒數。" }, + "autoCaptions": { + "button": "自動字幕", + "dialogTitle": "自動字幕", + "dialogDescription": "大致選擇每條字幕一次顯示多少字詞。時間會在該語句內的字詞之間分配。", + "minWords": "每條字幕的最少字數", + "maxWords": "每條字幕的最多字數", + "wordsCount": "{{count}} 個詞", + "generate": "產生", + "dialogCancel": "取消", + "generating": "正在從音訊產生字幕…", + "loadingModel": "正在載入語音模型(首次使用將下載約 75 MB)…", + "transcribing": "正在轉錄語音…", + "busy": "字幕產生已在進行中。", + "done": "已新增 {{count}} 條字幕。", + "noneHeard": "未偵測到語音。", + "noAudio": "此影片沒有可用於轉寫的音訊。", + "failed": "無法產生字幕。", + "truncated": "僅轉寫了最前 {{minutes}} 分鐘。" + }, "emptyState": { "title": "未開啟任何專案", "description": "匯入影片以開始編輯,或載入現有的 OpenScreen 專案。", diff --git a/src/lib/captioning/annotationsFromCaptions.test.ts b/src/lib/captioning/annotationsFromCaptions.test.ts new file mode 100644 index 000000000..bbf26fed2 --- /dev/null +++ b/src/lib/captioning/annotationsFromCaptions.test.ts @@ -0,0 +1,178 @@ +import { describe, expect, it } from "vitest"; + +import { + captionSegmentsToAnnotationRegions, + groupPhraseCaptionSegmentsIntoLines, + groupTimedCaptionWordsIntoLines, + reconcileAutoCaptionTimelineGaps, +} from "./annotationsFromCaptions"; + +describe("groupPhraseCaptionSegmentsIntoLines", () => { + it("preserves phrase boundaries when formatting phrase-timestamp captions", () => { + const lines = groupPhraseCaptionSegmentsIntoLines( + [ + { startSec: 0, endSec: 0.5, text: "alpha beta" }, + { startSec: 0.62, endSec: 1.6, text: "gamma delta" }, + ], + 2, + 2, + ); + + expect(lines).toHaveLength(2); + expect(lines[0]).toMatchObject({ text: "alpha beta", startSec: 0 }); + expect(lines[1]).toMatchObject({ text: "gamma delta", startSec: 0.62 }); + expect(lines[0]!.endSec).toBeLessThanOrEqual(0.62); + }); + + it("slices a single merged phrase into timed caption lines by word bounds", () => { + const lines = groupPhraseCaptionSegmentsIntoLines( + [{ startSec: 0, endSec: 1, text: "alpha beta gamma delta" }], + 2, + 2, + ); + + expect(lines).toHaveLength(2); + expect(lines[0]).toMatchObject({ + startSec: 0, + endSec: 0.5, + text: "alpha beta", + }); + expect(lines[1]).toMatchObject({ + startSec: 0.5, + endSec: 1, + text: "gamma delta", + }); + }); +}); + +describe("captionSegmentsToAnnotationRegions", () => { + it("uses raw phrase timing instead of shifting caption boundaries", () => { + const { regions } = captionSegmentsToAnnotationRegions( + [ + { startSec: 0, endSec: 0.5, text: "first second" }, + { startSec: 0.62, endSec: 1.2, text: "third fourth" }, + ], + 1, + 1, + { minWordsPerCaption: 2, maxWordsPerCaption: 2, timestampGranularity: "phrase" }, + ); + + expect(regions).toHaveLength(2); + expect(regions[0]).toMatchObject({ startMs: 0, endMs: 500 }); + expect(regions[1]).toMatchObject({ startMs: 620, endMs: 1200 }); + }); + + it("preserves empty timeline space when word timestamps contain a real pause", () => { + const lines = groupTimedCaptionWordsIntoLines( + [ + { startSec: 0, endSec: 0.12, text: "first" }, + { startSec: 0.13, endSec: 0.28, text: "caption" }, + { startSec: 0.7, endSec: 0.83, text: "second" }, + { startSec: 0.84, endSec: 0.98, text: "caption" }, + ], + 2, + 2, + ); + + expect(lines).toHaveLength(2); + expect(lines[0]).toMatchObject({ startSec: 0, endSec: 0.28, text: "first caption" }); + expect(lines[1]).toMatchObject({ startSec: 0.7, endSec: 0.98, text: "second caption" }); + }); + + it("preserves repeated words before grouping in word mode", () => { + const { regions } = captionSegmentsToAnnotationRegions( + [ + { startSec: 0, endSec: 0.12, text: "I" }, + { startSec: 0.13, endSec: 0.25, text: "I" }, + ], + 1, + 1, + { minWordsPerCaption: 2, maxWordsPerCaption: 2, timestampGranularity: "word" }, + ); + + expect(regions).toHaveLength(1); + expect(regions[0]).toMatchObject({ content: "I I" }); + }); +}); + +describe("reconcileAutoCaptionTimelineGaps", () => { + it("does not change regions when the minimum enforced gap is zero", () => { + const regions = reconcileAutoCaptionTimelineGaps([ + { + id: "annotation-1", + startMs: 0, + endMs: 120, + type: "text", + content: "one", + annotationSource: "auto-caption", + position: { x: 0, y: 0 }, + size: { width: 10, height: 10 }, + style: { + color: "#fff", + backgroundColor: "transparent", + fontSize: 24, + fontFamily: "Inter", + fontWeight: "normal", + fontStyle: "normal", + textDecoration: "none", + textAlign: "center", + }, + zIndex: 1, + }, + { + id: "manual-1", + startMs: 50, + endMs: 1000, + type: "text", + content: "manual", + position: { x: 10, y: 10 }, + size: { width: 10, height: 10 }, + style: { + color: "#fff", + backgroundColor: "transparent", + fontSize: 24, + fontFamily: "Inter", + fontWeight: "normal", + fontStyle: "normal", + textDecoration: "none", + textAlign: "center", + }, + zIndex: 2, + }, + { + id: "annotation-2", + startMs: 130, + endMs: 300, + type: "text", + content: "two", + annotationSource: "auto-caption", + position: { x: 0, y: 0 }, + size: { width: 10, height: 10 }, + style: { + color: "#fff", + backgroundColor: "transparent", + fontSize: 24, + fontFamily: "Inter", + fontWeight: "normal", + fontStyle: "normal", + textDecoration: "none", + textAlign: "center", + }, + zIndex: 3, + }, + ]); + + expect(regions.find((r) => r.id === "manual-1")).toMatchObject({ + startMs: 50, + endMs: 1000, + }); + expect(regions.find((r) => r.id === "annotation-1")).toMatchObject({ + startMs: 0, + endMs: 120, + }); + expect(regions.find((r) => r.id === "annotation-2")).toMatchObject({ + startMs: 130, + endMs: 300, + }); + }); +}); diff --git a/src/lib/captioning/annotationsFromCaptions.ts b/src/lib/captioning/annotationsFromCaptions.ts new file mode 100644 index 000000000..0f6dc2af4 --- /dev/null +++ b/src/lib/captioning/annotationsFromCaptions.ts @@ -0,0 +1,618 @@ +import type { AnnotationRegion, AnnotationTextStyle } from "@/components/video-editor/types"; + +import type { CaptionSegment } from "./transcribe"; + +/** Wide lower-third bar; `position.x` is top-left as % of container, so center with (100 − width) / 2. */ +const CAPTION_WIDTH = 92; +const CAPTION_HEIGHT = 12; +const CAPTION_BOTTOM_MARGIN = 2; + +const CAPTION_POSITION = { + x: (100 - CAPTION_WIDTH) / 2, + y: 100 - CAPTION_HEIGHT - CAPTION_BOTTOM_MARGIN, +}; + +const CAPTION_SIZE = { width: CAPTION_WIDTH, height: CAPTION_HEIGHT }; + +const CAPTION_STYLE: AnnotationTextStyle = { + color: "#ffffff", + backgroundColor: "rgba(255, 255, 255, 0)", + fontSize: 24, + fontFamily: "Inter", + fontWeight: "normal", + fontStyle: "normal", + textDecoration: "none", + textAlign: "center", +}; + +/** + * Nudge caption **starts** earlier (seconds). Whisper onsets are often slightly late vs. what you + * hear; do **not** apply the same offset to ends — that pulls lines off-screen too early. + */ +const AUTO_CAPTION_START_BIAS_SEC = 0; + +/** + * Extra time held after Whisper’s segment **end** (seconds). Model end times are often early vs. + * trailing vowels / room tone; this is separate from `AUTO_CAPTION_START_BIAS_SEC`. + */ +const AUTO_CAPTION_END_HOLD_SEC = 0; + +/** Inside one Whisper phrase, sub-lines can be shorter (do not steal time from neighbors). */ +const WORD_SPLIT_MIN_SPAN_SEC = 0.02; + +/** Brief linger after the last word in a line (seconds); trimmed if it would overlap the next line. */ +const CAPTION_LINE_END_TAIL_SEC = 0; + +/** A real silence between word-level timestamps should start a new caption run. */ +const WORD_RUN_BREAK_GAP_SEC = 0.24; + +/** + * Minimum time between consecutive caption regions on the timeline (seconds). Keeps a visible gap + * so blocks do not read as one clip; kept small so we do not erase natural short pauses between phrases. + */ +const MIN_CAPTION_TIMELINE_GAP_SEC = 0; + +/** Same text again with almost no gap or overlap — common Whisper / chunk artifact. */ +const DEDUPE_SAME_TEXT_MAX_GAP_SEC = 0.55; + +export const SAME_CONTENT_ECHO_MAX_GAP_SEC = 1.15; + +function normalizeCaptionKey(text: string): string { + return text + .trim() + .replace(/\s+/g, " ") + .replace(/[\u2018\u2019]/g, "'") + .replace(/[\u201C\u201D]/g, '"') + .toLowerCase() + .replace(/[.!?,;:]+$/g, ""); +} + +/** Legacy echo-collapse helper kept for reference while phrase timing uses raw model spans. */ +export function collapseSameContentEchoes(segments: CaptionSegment[]): CaptionSegment[] { + const sorted = [...segments] + .filter((s) => s.text.trim()) + .sort((a, b) => a.startSec - b.startSec || a.endSec - b.endSec); + const out: CaptionSegment[] = []; + const lastIndexByKey = new Map(); + + for (const seg of sorted) { + const key = normalizeCaptionKey(seg.text); + const hit = lastIndexByKey.get(key); + if (hit !== undefined) { + const prev = out[hit]!; + if (seg.startSec < prev.endSec + SAME_CONTENT_ECHO_MAX_GAP_SEC) { + prev.startSec = Math.min(prev.startSec, seg.startSec); + prev.endSec = Math.max(prev.endSec, seg.endSec); + continue; + } + } + out.push({ + startSec: seg.startSec, + endSec: seg.endSec, + text: seg.text.trim(), + }); + lastIndexByKey.set(key, out.length - 1); + } + return out; +} + +/** + * Only merge segments that are almost back-to-back (Whisper often splits mid-phrase with a tiny gap). + * Wider gaps are usually silence or missed audio — merging those stretches word timing across dead air. + */ +/** + * Collapse adjacent duplicate lines (overlapping or tiny gap). Does not merge the same phrase + * repeated later in the video when separated by real silence. + */ +function dedupeAdjacentCaptionRepeats(segments: CaptionSegment[]): CaptionSegment[] { + const sorted = [...segments] + .filter((s) => s.text.trim()) + .sort((a, b) => a.startSec - b.startSec || a.endSec - b.endSec); + const out: CaptionSegment[] = []; + for (const seg of sorted) { + const t = seg.text.trim(); + const prev = out[out.length - 1]; + if (prev && normalizeCaptionKey(prev.text) === normalizeCaptionKey(t)) { + const overlap = prev.endSec - seg.startSec; + const gap = seg.startSec - prev.endSec; + if (overlap > 0.015 || gap < DEDUPE_SAME_TEXT_MAX_GAP_SEC) { + prev.startSec = Math.min(prev.startSec, seg.startSec); + prev.endSec = Math.max(prev.endSec, seg.endSec); + continue; + } + } + out.push({ startSec: seg.startSec, endSec: seg.endSec, text: t }); + } + return out; +} + +/** Trim only real overlaps. Avoid synthetic lead/lag so caption timing matches model output. */ +function finalizeCaptionSegmentsForPlayback(segments: CaptionSegment[]): CaptionSegment[] { + const OVERLAP_TRIM_SEC = 0.002; + + const sortedRaw = [...segments] + .filter((s) => s.text.trim()) + .sort((a, b) => a.startSec - b.startSec || a.endSec - b.endSec); + + const a = sortedRaw.map((seg) => { + let s = seg.startSec + AUTO_CAPTION_START_BIAS_SEC; + let e = seg.endSec + AUTO_CAPTION_END_HOLD_SEC; + s = Math.max(0, s); + if (e <= s) e = s + 0.02; + return { startSec: s, endSec: e, text: seg.text.trim() }; + }); + + for (let i = 1; i < a.length; i++) { + if (a[i].startSec < a[i - 1].endSec - OVERLAP_TRIM_SEC) { + a[i - 1].endSec = Math.max(a[i - 1].startSec + 1e-4, a[i].startSec); + } + } + + return a; +} + +/** Default min gap between auto-caption blocks on the timeline (ms); matches `MIN_CAPTION_TIMELINE_GAP_SEC`. */ +export const DEFAULT_AUTO_CAPTION_MIN_GAP_MS = Math.round(MIN_CAPTION_TIMELINE_GAP_SEC * 1000); + +/** + * Enforces a minimum gap between consecutive `auto-caption` regions (by start time). Shortens the + * previous region's end when possible; otherwise shifts the following region later so edits on + * the timeline cannot squeeze caption blocks completely flush. + */ +export function reconcileAutoCaptionTimelineGaps( + regions: AnnotationRegion[], + minGapMs: number = DEFAULT_AUTO_CAPTION_MIN_GAP_MS, +): AnnotationRegion[] { + const gap = Math.max(0, Math.round(minGapMs)); + if (regions.length === 0 || gap === 0) return regions; + + const autoCandidates = regions.filter((r) => r.annotationSource === "auto-caption"); + if (autoCandidates.length <= 1) return regions; + + const sorted = [...autoCandidates].sort((a, b) => a.startMs - b.startMs || a.endMs - b.endMs); + const fixed: AnnotationRegion[] = []; + let prev = { ...sorted[0]! }; + fixed.push(prev); + + for (let i = 1; i < sorted.length; i++) { + let cur = { ...sorted[i]! }; + const minStart = prev.endMs + gap; + + if (cur.startMs < minStart) { + const newPrevEnd = cur.startMs - gap; + if (newPrevEnd >= prev.startMs + 1) { + prev = { ...prev, endMs: newPrevEnd }; + fixed[fixed.length - 1] = prev; + } else { + const dur = Math.max(1, cur.endMs - cur.startMs); + cur = { ...cur, startMs: minStart, endMs: minStart + dur }; + } + } + + fixed.push(cur); + prev = cur; + } + + const fixedById = new Map(fixed.map((r) => [r.id, r])); + return regions.map((r) => fixedById.get(r.id) ?? r); +} + +/** Join phrases that are close in time so the editor does not create dozens of separate overlays. */ +export function mergeAdjacentCaptionSegments( + segments: CaptionSegment[], + options?: { maxGapSec?: number; maxChars?: number; maxBlockDurationSec?: number }, +): CaptionSegment[] { + const maxGapSec = options?.maxGapSec ?? 1.35; + const maxChars = options?.maxChars ?? 320; + const maxBlockDurationSec = options?.maxBlockDurationSec ?? 12; + + const sorted = [...segments].sort((a, b) => a.startSec - b.startSec); + const out: CaptionSegment[] = []; + + for (const seg of sorted) { + const text = seg.text.trim(); + if (!text) continue; + + const prev = out[out.length - 1]; + if (!prev) { + out.push({ startSec: seg.startSec, endSec: seg.endSec, text }); + continue; + } + + const gap = seg.startSec - prev.endSec; + const mergedText = `${prev.text} ${text}`.trim(); + const mergedEnd = Math.max(prev.endSec, seg.endSec); + const wouldSpan = mergedEnd - prev.startSec; + if (gap <= maxGapSec && mergedText.length <= maxChars && wouldSpan <= maxBlockDurationSec) { + prev.endSec = mergedEnd; + prev.text = mergedText; + } else { + out.push({ startSec: seg.startSec, endSec: seg.endSec, text }); + } + } + + return out; +} + +function partitionPhraseCaptionSegments( + segments: CaptionSegment[], + options?: { maxGapSec?: number; maxChars?: number; maxBlockDurationSec?: number }, +): CaptionSegment[][] { + const maxGapSec = options?.maxGapSec ?? 0; + const maxChars = options?.maxChars ?? Number.POSITIVE_INFINITY; + const maxBlockDurationSec = options?.maxBlockDurationSec ?? Number.POSITIVE_INFINITY; + + const sorted = [...segments] + .filter((s) => s.text.trim()) + .sort((a, b) => a.startSec - b.startSec || a.endSec - b.endSec); + if (sorted.length === 0) return []; + + const groups: CaptionSegment[][] = []; + let current: CaptionSegment[] = []; + + for (const seg of sorted) { + const text = seg.text.trim(); + if (!text) continue; + + if (current.length === 0) { + current.push({ ...seg, text }); + continue; + } + + const prev = current[current.length - 1]!; + const groupStart = current[0]!.startSec; + const gap = seg.startSec - prev.endSec; + const currentChars = current.reduce((sum, item) => sum + item.text.length, 0); + const wouldChars = currentChars + 1 + text.length; + const wouldSpan = Math.max(prev.endSec, seg.endSec) - groupStart; + + if (gap <= maxGapSec && wouldChars <= maxChars && wouldSpan <= maxBlockDurationSec) { + current.push({ ...seg, text }); + continue; + } + + groups.push(current); + current = [{ ...seg, text }]; + } + + if (current.length > 0) { + groups.push(current); + } + + return groups; +} + +export interface CaptionSegmentLayoutOptions { + /** Lower bound on words per on-screen caption (default 2). */ + minWordsPerCaption?: number; + /** Upper bound on words per on-screen caption (default 7). */ + maxWordsPerCaption?: number; + /** + * `word`: each `CaptionSegment` is a single token with Whisper word timestamps (default). + * `phrase`: merged phrase spans; use proportional line splitting inside each span. + */ + timestampGranularity?: "word" | "phrase"; +} + +function computeCaptionLineIndexRanges( + wordCount: number, + minWords: number, + maxWords: number, +): Array<{ from: number; to: number }> { + const minW = Math.max(1, Math.min(Math.floor(minWords), Math.floor(maxWords))); + const maxW = Math.max(minW, Math.floor(maxWords)); + const sliceRanges: Array<{ from: number; to: number }> = []; + let i = 0; + while (i < wordCount) { + const remaining = wordCount - i; + if (remaining <= maxW) { + if (sliceRanges.length > 0 && remaining < minW) { + sliceRanges[sliceRanges.length - 1]!.to = wordCount; + } else { + sliceRanges.push({ from: i, to: wordCount }); + } + break; + } + + let take = maxW; + const after = remaining - take; + if (after > 0 && after < minW) { + take = remaining - minW; + if (take < minW) { + sliceRanges.push({ from: i, to: wordCount }); + break; + } + if (take > maxW) { + take = maxW; + } + } + sliceRanges.push({ from: i, to: i + take }); + i += take; + } + return sliceRanges; +} + +/** + * Groups per-word segments into on-screen lines using each token's Whisper timestamps + * (no proportional stretching across a long phrase span). + */ +export function groupTimedCaptionWordsIntoLines( + segments: CaptionSegment[], + minWords: number, + maxWords: number, +): CaptionSegment[] { + const words = [...segments] + .filter((s) => s.text.trim()) + .sort((a, b) => a.startSec - b.startSec || a.endSec - b.endSec); + if (words.length === 0) return []; + + const minW = Math.max(1, Math.min(Math.floor(minWords), Math.floor(maxWords))); + const maxW = Math.max(minW, Math.floor(maxWords)); + const out: CaptionSegment[] = []; + + let runStart = 0; + const flushRun = (runEndExclusive: number) => { + const run = words.slice(runStart, runEndExclusive); + if (run.length === 0) return; + const ranges = computeCaptionLineIndexRanges(run.length, minW, maxW); + for (const { from, to } of ranges) { + const slice = run.slice(from, to); + const s = slice[0]!.startSec; + const rawEnd = slice[slice.length - 1]!.endSec; + const e = Math.max(s + WORD_SPLIT_MIN_SPAN_SEC, rawEnd + CAPTION_LINE_END_TAIL_SEC); + out.push({ + startSec: s, + endSec: e, + text: slice.map((w) => w.text.trim()).join(" "), + }); + } + }; + + for (let i = 1; i < words.length; i++) { + const prev = words[i - 1]!; + const cur = words[i]!; + const gap = cur.startSec - prev.endSec; + if (gap >= WORD_RUN_BREAK_GAP_SEC) { + flushRun(i); + runStart = i; + } + } + flushRun(words.length); + + for (let i = 0; i < out.length - 1; i++) { + if (out[i]!.endSec > out[i + 1]!.startSec + 1e-3) { + out[i]!.endSec = Math.max( + out[i]!.startSec + WORD_SPLIT_MIN_SPAN_SEC, + out[i + 1]!.startSec - 1e-4, + ); + } + } + return out; +} + +/** + * Splits each merged transcription span into shorter captions with about + * `minWords`–`maxWords` words. Times are interpolated by character weight inside the span. + */ +export function splitMergedCaptionsByWordBounds( + merged: CaptionSegment[], + minWords: number, + maxWords: number, +): CaptionSegment[] { + const minW = Math.max(1, Math.min(Math.floor(minWords), Math.floor(maxWords))); + const maxW = Math.max(minW, Math.floor(maxWords)); + const out: CaptionSegment[] = []; + + for (const seg of merged) { + const words = seg.text.trim().split(/\s+/).filter(Boolean); + if (words.length === 0) continue; + + if (words.length <= maxW) { + out.push({ + startSec: seg.startSec, + endSec: seg.endSec, + text: words.join(" "), + }); + continue; + } + + out.push(...splitOneSegmentByWordBounds(seg.startSec, seg.endSec, words, minW, maxW)); + } + + return out; +} + +function wrapCaptionTextByWordBounds(text: string, minWords: number, maxWords: number): string { + const words = text.trim().split(/\s+/).filter(Boolean); + if (words.length === 0) return ""; + const minW = Math.max(1, Math.min(Math.floor(minWords), Math.floor(maxWords))); + const maxW = Math.max(minW, Math.floor(maxWords)); + const ranges = computeCaptionLineIndexRanges(words.length, minW, maxW); + return ranges.map(({ from, to }) => words.slice(from, to).join(" ")).join("\n"); +} + +function expandPhraseSegmentToPseudoWords(segment: CaptionSegment): CaptionSegment[] { + const words = segment.text.trim().split(/\s+/).filter(Boolean); + if (words.length === 0) return []; + if (words.length === 1) { + return [ + { + startSec: segment.startSec, + endSec: segment.endSec, + text: words[0]!, + }, + ]; + } + + return splitOneSegmentByWordBounds(segment.startSec, segment.endSec, words, 1, 1); +} + +export function groupPhraseCaptionSegmentsIntoLines( + segments: CaptionSegment[], + minWords: number, + maxWords: number, + options?: { maxGapSec?: number; maxChars?: number; maxBlockDurationSec?: number }, +): CaptionSegment[] { + const groups = partitionPhraseCaptionSegments(segments, options); + const out: CaptionSegment[] = []; + + for (const group of groups) { + if (group.length === 1) { + const only = group[0]!; + const wrapped = wrapCaptionTextByWordBounds(only.text, minWords, maxWords).trim(); + if (!wrapped) continue; + const lineTexts = wrapped + .split("\n") + .map((t) => t.trim()) + .filter(Boolean); + const n = lineTexts.length; + const rawDur = only.endSec - only.startSec; + if (n > 1 && rawDur < n * WORD_SPLIT_MIN_SPAN_SEC) { + out.push({ + startSec: only.startSec, + endSec: only.endSec, + text: lineTexts.join(" "), + }); + continue; + } + const dur = Math.max(rawDur, WORD_SPLIT_MIN_SPAN_SEC * n); + if (n <= 1) { + out.push({ + startSec: only.startSec, + endSec: only.endSec, + text: lineTexts[0] ?? wrapped, + }); + continue; + } + for (let i = 0; i < n; i++) { + const startSec = only.startSec + (dur * i) / n; + const boundary = only.startSec + (dur * (i + 1)) / n; + const endSec = + i === n - 1 ? only.endSec : Math.max(startSec + WORD_SPLIT_MIN_SPAN_SEC, boundary); + out.push({ + startSec, + endSec, + text: lineTexts[i]!, + }); + } + continue; + } + + const pseudoWords = group.flatMap(expandPhraseSegmentToPseudoWords); + out.push(...groupTimedCaptionWordsIntoLines(pseudoWords, minWords, maxWords)); + } + + return out; +} + +function splitOneSegmentByWordBounds( + startSec: number, + endSec: number, + words: string[], + minWords: number, + maxWords: number, +): CaptionSegment[] { + const sliceRanges = computeCaptionLineIndexRanges(words.length, minWords, maxWords); + + const dur = Math.max(endSec - startSec, 0.05); + const weights = words.map((w) => Math.max(1, w.length)); + const totalW = weights.reduce((a, b) => a + b, 0); + + const weightSum = (from: number, to: number) => { + let s = 0; + for (let k = from; k < to; k++) s += weights[k] ?? 0; + return s; + }; + + const result: CaptionSegment[] = []; + let prevEnd = startSec; + for (const { from, to } of sliceRanges) { + const wb = weightSum(0, from); + const ws = weightSum(from, to); + let s = startSec + (wb / totalW) * dur; + let e = startSec + ((wb + ws) / totalW) * dur; + s = Math.max(s, prevEnd); + e = Math.max(s + WORD_SPLIT_MIN_SPAN_SEC, e); + e = Math.min(e, endSec); + if (e <= s) { + e = Math.min(endSec, s + WORD_SPLIT_MIN_SPAN_SEC); + } + prevEnd = e; + result.push({ + startSec: s, + endSec: e, + text: words.slice(from, to).join(" "), + }); + } + if (result.length > 0) { + result[result.length - 1].endSec = endSec; + for (let i = 0; i < result.length - 1; i++) { + if (result[i].endSec > result[i + 1].startSec + 0.002) { + result[i].endSec = Math.max(result[i].startSec + 1e-4, result[i + 1].startSec); + } + } + } + return result; +} + +export function captionSegmentsToAnnotationRegions( + segments: CaptionSegment[], + startNumericId: number, + startZIndex: number, + layout?: CaptionSegmentLayoutOptions, +): { regions: AnnotationRegion[]; nextNumericId: number; nextZIndex: number } { + // Do not echo-collapse raw word tokens before grouping: repeated words ("I … I") share a + // normalized key and would merge spans while keeping only the first token's text. + const minW = layout?.minWordsPerCaption ?? 2; + const maxW = layout?.maxWordsPerCaption ?? 7; + const granularity = layout?.timestampGranularity ?? "word"; + + const grouped = + granularity === "phrase" + ? groupPhraseCaptionSegmentsIntoLines(segments, minW, maxW) + : groupTimedCaptionWordsIntoLines(segments, minW, maxW); + + const dedupedOut = dedupeAdjacentCaptionRepeats(grouped); + const finalized = finalizeCaptionSegmentsForPlayback(dedupedOut); + + let nid = startNumericId; + let z = startZIndex; + const regions: AnnotationRegion[] = []; + + for (const seg of finalized) { + const startMs = Math.round(seg.startSec * 1000); + const endMs = Math.max(Math.round(seg.endSec * 1000), startMs + 1); + regions.push({ + id: `annotation-${nid++}`, + startMs, + endMs, + type: "text", + content: seg.text, + annotationSource: "auto-caption", + position: { ...CAPTION_POSITION }, + size: { ...CAPTION_SIZE }, + style: { ...CAPTION_STYLE }, + zIndex: z++, + }); + } + + return { + regions: reconcileAutoCaptionTimelineGaps(regions), + nextNumericId: nid, + nextZIndex: z, + }; +} + +export function maxAnnotationNumericId(regions: AnnotationRegion[]): number { + let max = 0; + for (const r of regions) { + const m = /^annotation-(\d+)$/.exec(r.id); + if (m) max = Math.max(max, Number.parseInt(m[1], 10)); + } + return max; +} + +export function maxAnnotationZIndex(regions: AnnotationRegion[]): number { + if (regions.length === 0) return 0; + return Math.max(...regions.map((r) => r.zIndex)); +} diff --git a/src/lib/captioning/captionConstants.ts b/src/lib/captioning/captionConstants.ts new file mode 100644 index 000000000..1bacb7cc7 --- /dev/null +++ b/src/lib/captioning/captionConstants.ts @@ -0,0 +1,2 @@ +/** Max audio length for auto-captions (decode + transcribe); keep demuxer read aligned with this. */ +export const MAX_CAPTION_AUDIO_SEC = 4 * 60 * 60; diff --git a/src/lib/captioning/extractMono16k.ts b/src/lib/captioning/extractMono16k.ts new file mode 100644 index 000000000..53258567c --- /dev/null +++ b/src/lib/captioning/extractMono16k.ts @@ -0,0 +1,159 @@ +import { MAX_CAPTION_AUDIO_SEC } from "./captionConstants"; +import { extractMonoPcmViaWebDemuxer } from "./extractMono16kWebDemuxer"; + +export { MAX_CAPTION_AUDIO_SEC }; + +const FETCH_TIMEOUT_MS = 120_000; + +async function fetchWithTimeout(url: string, signal?: AbortSignal): Promise { + const ctrl = new AbortController(); + const timer = window.setTimeout(() => ctrl.abort(), FETCH_TIMEOUT_MS); + const onAbort = () => ctrl.abort(); + if (signal) { + if (signal.aborted) ctrl.abort(); + else signal.addEventListener("abort", onAbort, { once: true }); + } + try { + return await fetch(url, { signal: ctrl.signal }); + } finally { + window.clearTimeout(timer); + if (signal) signal.removeEventListener("abort", onAbort); + } +} + +/** + * Load the editor video the same way as `StreamingVideoDecoder`: + * Electron `readBinaryFile` for local paths (fetch(file://) is unreliable in the renderer), + * otherwise HTTP / blob / data URLs via fetch. + */ +async function loadSourceVideoFile(videoUrl: string, signal?: AbortSignal): Promise { + const isRemoteUrl = /^(https?:|blob:|data:)/i.test(videoUrl); + + if (!isRemoteUrl && window.electronAPI?.readBinaryFile) { + const result = await window.electronAPI.readBinaryFile(videoUrl); + if (!result.success || !result.data) { + throw new Error(result.message || result.error || "Failed to read source video"); + } + const filename = (result.path || videoUrl).split(/[\\/]/).pop() || "video"; + return new File([result.data], filename, { type: "video/webm" }); + } + + const response = await fetchWithTimeout(videoUrl, signal); + if (!response.ok) { + throw new Error(`Failed to load video for captions: ${response.status} ${response.statusText}`); + } + const blob = await response.blob(); + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + const filename = videoUrl.split("/").pop() || "video"; + return new File([blob], filename, { type: blob.type || "video/webm" }); +} + +function mixToMono(audioBuffer: AudioBuffer): Float32Array { + const { length, numberOfChannels } = audioBuffer; + const out = new Float32Array(length); + if (numberOfChannels === 0) return out; + for (let i = 0; i < length; i++) { + let sum = 0; + for (let c = 0; c < numberOfChannels; c++) { + sum += audioBuffer.getChannelData(c)[i]; + } + out[i] = sum / numberOfChannels; + } + return out; +} + +async function resampleMono( + mono: Float32Array, + fromRate: number, + toRate: number, + signal?: AbortSignal, +): Promise { + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + if (fromRate === toRate) return mono; + const durationSec = mono.length / fromRate; + const outLength = Math.max(1, Math.ceil(durationSec * toRate)); + const offline = new OfflineAudioContext(1, outLength, toRate); + const buf = offline.createBuffer(1, mono.length, fromRate); + buf.copyToChannel(Float32Array.from(mono), 0); + const src = offline.createBufferSource(); + src.buffer = buf; + src.connect(offline.destination); + src.start(0); + const rendered = await offline.startRendering(); + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + return rendered.getChannelData(0).slice(); +} + +async function truncateAndResampleTo16k( + mono: Float32Array, + fromRate: number, + durationSec: number, + signal?: AbortSignal, +): Promise<{ samples: Float32Array; truncated: boolean; durationSec: number }> { + let truncated = false; + let work = mono; + if (durationSec > MAX_CAPTION_AUDIO_SEC) { + const maxSamples = Math.floor(MAX_CAPTION_AUDIO_SEC * fromRate); + work = mono.subarray(0, Math.min(mono.length, maxSamples)); + truncated = true; + } + + const samples = await resampleMono(work, fromRate, 16_000, signal); + return { samples, truncated, durationSec: samples.length / 16_000 }; +} + +/** + * Decode the video's audio track to mono 16 kHz float samples (Whisper input). + * Prefers `decodeAudioData` when the container is supported; otherwise uses the same + * web-demuxer + AudioDecoder path as export. + */ +export async function extractMono16kFromVideoUrl( + videoUrl: string, + options?: { signal?: AbortSignal }, +): Promise<{ samples: Float32Array; truncated: boolean; durationSec: number }> { + const file = await loadSourceVideoFile(videoUrl, options?.signal); + + /** When this returns null, use web-demuxer + AudioDecoder (same as export). */ + const tryDecodeAudioDataPath = async (): Promise<{ + samples: Float32Array; + truncated: boolean; + durationSec: number; + } | null> => { + const audioContext = new AudioContext(); + try { + const ab = await file.arrayBuffer(); + if (options?.signal?.aborted) throw new DOMException("Aborted", "AbortError"); + const audioBuffer = await audioContext.decodeAudioData(ab.slice(0)); + if ( + audioBuffer.numberOfChannels === 0 || + audioBuffer.length === 0 || + !Number.isFinite(audioBuffer.duration) || + audioBuffer.duration <= 0 + ) { + return null; + } + const durationSec = audioBuffer.duration; + const mono = mixToMono(audioBuffer); + const fromRate = audioBuffer.sampleRate; + const out = await truncateAndResampleTo16k(mono, fromRate, durationSec, options?.signal); + // decodeAudioData can resolve for some WebM/Matroska inputs yet yield almost no usable + // PCM; captions only run the demuxer path on throw today, so we never recover. + if (out.samples.length < 800) { + return null; + } + return out; + } catch { + return null; + } finally { + await audioContext.close().catch(() => undefined); + } + }; + + const primary = await tryDecodeAudioDataPath(); + if (primary) { + return primary; + } + + const pcm = await extractMonoPcmViaWebDemuxer(file, options?.signal); + return truncateAndResampleTo16k(pcm.mono, pcm.sampleRate, pcm.durationSec, options?.signal); +} diff --git a/src/lib/captioning/extractMono16kWebDemuxer.ts b/src/lib/captioning/extractMono16kWebDemuxer.ts new file mode 100644 index 000000000..fd85f5703 --- /dev/null +++ b/src/lib/captioning/extractMono16kWebDemuxer.ts @@ -0,0 +1,187 @@ +import { WebDemuxer } from "web-demuxer"; + +import { MAX_CAPTION_AUDIO_SEC } from "./captionConstants"; + +const DECODE_QUEUE_BACKPRESSURE = 20; +const SOURCE_LOAD_TIMEOUT_MS = 60_000; +const READ_END_PADDING_SEC = 0.5; + +function webDemuxerWasmUrl(): string { + return new URL("../exporter/wasm/web-demuxer.wasm", window.location.href).href; +} + +function audioDataFrameToMono(frame: AudioData): Float32Array { + const frames = frame.numberOfFrames; + const ch = frame.numberOfChannels; + const out = new Float32Array(frames); + const fmt = frame.format || ""; + const planar = fmt.includes("planar"); + + if (planar) { + const plane = new Float32Array(frames); + for (let c = 0; c < ch; c++) { + frame.copyTo(plane, { planeIndex: c }); + for (let i = 0; i < frames; i++) { + out[i] += plane[i]; + } + } + for (let i = 0; i < frames; i++) { + out[i] /= ch; + } + } else { + const interleaved = new Float32Array(frames * ch); + frame.copyTo(interleaved, { planeIndex: 0 }); + for (let i = 0; i < frames; i++) { + let sum = 0; + for (let c = 0; c < ch; c++) { + sum += interleaved[i * ch + c]; + } + out[i] = sum / ch; + } + } + return out; +} + +function mergeAndConsumeDecodedAudioToMonoLinear( + frames: AudioData[], + sampleRate: number, + durationSec: number, +): Float32Array { + const sorted = [...frames].sort((a, b) => a.timestamp - b.timestamp); + const totalSamples = Math.max(1, Math.ceil(durationSec * sampleRate)); + const acc = new Float32Array(totalSamples); + const weight = new Float32Array(totalSamples); + + for (const frame of sorted) { + const startSample = Math.round((frame.timestamp / 1e6) * sampleRate); + const slice = audioDataFrameToMono(frame); + for (let i = 0; i < slice.length; i++) { + const pos = startSample + i; + if (pos >= 0 && pos < totalSamples) { + acc[pos] += slice[i]; + weight[pos] += 1; + } + } + frame.close(); + } + + for (let i = 0; i < totalSamples; i++) { + if (weight[i] > 0) { + acc[i] /= weight[i]; + } + } + return acc; +} + +function withTimeout(promise: Promise, ms: number, message: string): Promise { + return new Promise((resolve, reject) => { + const id = window.setTimeout(() => reject(new Error(message)), ms); + promise + .then((v) => { + window.clearTimeout(id); + resolve(v); + }) + .catch((e) => { + window.clearTimeout(id); + reject(e instanceof Error ? e : new Error(String(e))); + }); + }); +} + +/** + * Demux + WebCodecs audio decode (same stack as export). Use when + * `decodeAudioData` cannot handle the container (e.g. WebM with video). + */ +export async function extractMonoPcmViaWebDemuxer( + file: File, + signal?: AbortSignal, +): Promise<{ mono: Float32Array; sampleRate: number; durationSec: number }> { + const demuxer = new WebDemuxer({ wasmFilePath: webDemuxerWasmUrl() }); + await withTimeout( + demuxer.load(file), + SOURCE_LOAD_TIMEOUT_MS, + "Timed out while parsing the source video for captions.", + ); + + if (signal?.aborted) throw new DOMException("Aborted", "AbortError"); + + const mediaInfo = await withTimeout( + demuxer.getMediaInfo(), + SOURCE_LOAD_TIMEOUT_MS, + "Timed out while reading media info for captions.", + ); + + const reportedDurationSec = + Number.isFinite(mediaInfo.duration) && mediaInfo.duration > 0 ? mediaInfo.duration : 0; + + let audioConfig: AudioDecoderConfig; + try { + audioConfig = await demuxer.getDecoderConfig("audio"); + } catch { + throw new Error("No audio track found in this video."); + } + + const codecCheck = await AudioDecoder.isConfigSupported(audioConfig); + if (!codecCheck.supported) { + throw new Error(`Audio codec not supported for captions: ${audioConfig.codec}`); + } + + const sampleRate = audioConfig.sampleRate || 48_000; + + // Many WebM/Matroska files report a too-short duration; capping read at reported time stops + // demux early and mergeAndConsumeDecodedAudioToMonoLinear clips everything past that. Read up to the + // same ceiling as caption decode (demuxer stops when the track ends). + const readEndSec = MAX_CAPTION_AUDIO_SEC + READ_END_PADDING_SEC; + const decodedFrames: AudioData[] = []; + + const decoder = new AudioDecoder({ + output: (data: AudioData) => decodedFrames.push(data), + error: (e: DOMException) => console.error("[captioning] AudioDecoder error:", e), + }); + decoder.configure(audioConfig); + + const reader = demuxer.read("audio", 0, readEndSec).getReader(); + try { + while (!signal?.aborted) { + const { done, value: chunk } = await reader.read(); + if (done || !chunk) break; + decoder.decode(chunk); + while (decoder.decodeQueueSize > DECODE_QUEUE_BACKPRESSURE && !signal?.aborted) { + await new Promise((r) => setTimeout(r, 1)); + } + } + } finally { + try { + await reader.cancel(); + } catch { + /* already closed */ + } + } + + if (decoder.state === "configured") { + await decoder.flush(); + decoder.close(); + } + + if (signal?.aborted) { + for (const f of decodedFrames) f.close(); + throw new DOMException("Aborted", "AbortError"); + } + + if (decodedFrames.length === 0) { + throw new Error("Decoded zero audio frames from this video."); + } + + let maxEndUs = 0; + for (const f of decodedFrames) { + const end = f.timestamp + (f.duration ?? 0); + if (end > maxEndUs) maxEndUs = end; + } + const inferredDurationSec = maxEndUs / 1e6; + // Prefer extent implied by decoded frames (fixes bad container duration). If frames lack + // duration, fall back to reported metadata. + const durationSec = inferredDurationSec > 0.02 ? inferredDurationSec : reportedDurationSec; + + const mono = mergeAndConsumeDecodedAudioToMonoLinear(decodedFrames, sampleRate, durationSec); + return { mono, sampleRate, durationSec }; +} diff --git a/src/lib/captioning/index.ts b/src/lib/captioning/index.ts new file mode 100644 index 000000000..cc2e2a3a6 --- /dev/null +++ b/src/lib/captioning/index.ts @@ -0,0 +1,17 @@ +export type { CaptionSegmentLayoutOptions } from "./annotationsFromCaptions"; +export { + captionSegmentsToAnnotationRegions, + DEFAULT_AUTO_CAPTION_MIN_GAP_MS, + groupTimedCaptionWordsIntoLines, + mergeAdjacentCaptionSegments, + reconcileAutoCaptionTimelineGaps, + splitMergedCaptionsByWordBounds, +} from "./annotationsFromCaptions"; +export { extractMono16kFromVideoUrl, MAX_CAPTION_AUDIO_SEC } from "./extractMono16k"; +export { shiftTrimRegionsMsForCaptionBuffer, trimLeadingSilenceMono16k } from "./leadingSilence"; +export type { + CaptionSegment, + CaptionTimestampGranularity, + TranscribeMono16kResult, +} from "./transcribe"; +export { transcribeMono16kToSegments } from "./transcribe"; diff --git a/src/lib/captioning/leadingSilence.ts b/src/lib/captioning/leadingSilence.ts new file mode 100644 index 000000000..4bd6a11aa --- /dev/null +++ b/src/lib/captioning/leadingSilence.ts @@ -0,0 +1,78 @@ +/** Caption path is always mono 16 kHz after `extractMono16kFromVideoUrl`. */ +import type { TrimRegion } from "@/components/video-editor/types"; + +const SAMPLE_RATE = 16_000; + +/** Window length for peak detection (~50 ms). */ +const WINDOW_SAMPLES = 800; + +/** Coarse hop so long intros scan quickly (~50 ms steps). */ +const HOP_SAMPLES = 800; + +/** Max |sample| in a window below this counts as silence (float PCM ~[-1, 1]). */ +const PEAK_THRESHOLD = 0.012; + +/** Keep a little audio before the first peak so word onsets are not clipped. */ +const PRE_ROLL_SEC = 0.12; + +/** Do not scan more than this much audio for leading silence (performance + pathological files). */ +const MAX_LEADING_SCAN_SEC = 15 * 60; + +/** + * Drops quiet audio at the beginning so Whisper is not fed a long silent prefix (which can skew + * the first phrase and wastes work). Returned `trimSec` must be added back to every segment time. + */ +export function trimLeadingSilenceMono16k(samples: Float32Array): { + samples: Float32Array; + trimSec: number; +} { + if (samples.length < WINDOW_SAMPLES) { + return { samples, trimSec: 0 }; + } + + const maxIndex = Math.min( + samples.length - WINDOW_SAMPLES, + Math.floor(MAX_LEADING_SCAN_SEC * SAMPLE_RATE), + ); + + let firstSpeechSample = -1; + for (let i = 0; i <= maxIndex; i += HOP_SAMPLES) { + let peak = 0; + for (let j = 0; j < WINDOW_SAMPLES; j++) { + peak = Math.max(peak, Math.abs(samples[i + j]!)); + } + if (peak > PEAK_THRESHOLD) { + firstSpeechSample = i; + break; + } + } + + if (firstSpeechSample <= 0) { + return { samples, trimSec: 0 }; + } + + const preRollSamples = Math.round(PRE_ROLL_SEC * SAMPLE_RATE); + const start = Math.max(0, firstSpeechSample - preRollSamples); + return { + samples: samples.subarray(start), + trimSec: start / SAMPLE_RATE, + }; +} + +/** + * When audio is trimmed from the front, Whisper times are relative to the shortened buffer. + * Shift trim regions by the same offset so `segmentOverlapsTrim` still uses consistent coordinates. + */ +export function shiftTrimRegionsMsForCaptionBuffer( + regions: TrimRegion[], + trimMs: number, +): TrimRegion[] { + if (trimMs <= 0) return regions; + return regions + .map((r) => ({ + ...r, + startMs: Math.max(0, r.startMs - trimMs), + endMs: Math.max(0, r.endMs - trimMs), + })) + .filter((r) => r.endMs > r.startMs); +} diff --git a/src/lib/captioning/transcribe.ts b/src/lib/captioning/transcribe.ts new file mode 100644 index 000000000..91f1d91f0 --- /dev/null +++ b/src/lib/captioning/transcribe.ts @@ -0,0 +1,91 @@ +import type { TrimRegion } from "@/components/video-editor/types"; + +export interface CaptionSegment { + startSec: number; + endSec: number; + text: string; +} + +/** How caption layout should interpret `CaptionSegment` times from `transcribeMono16kToSegments`. */ +export type CaptionTimestampGranularity = "word" | "phrase"; + +export interface TranscribeMono16kResult { + segments: CaptionSegment[]; + granularity: CaptionTimestampGranularity; +} + +/** Request payload posted from the renderer to the transcription worker. */ +export interface TranscribeWorkerRequest { + samples: Float32Array; + trimRegions: TrimRegion[]; +} + +/** Messages the transcription worker posts back to the renderer. */ +export type TranscribeWorkerResponse = + | { type: "status"; phase: "model" | "transcribe" } + | { type: "result"; segments: CaptionSegment[]; granularity: CaptionTimestampGranularity } + | { type: "error"; message: string }; + +/** + * Transcribes mono 16 kHz audio into timed caption segments using in-browser Whisper. + * + * The model load and inference run inside a dedicated Web Worker so the editor's + * main thread stays responsive (WASM inference does not yield). The first run + * downloads model weights. Aborting (via `options.signal`) terminates the worker + * immediately, since model load / inference cannot be cooperatively cancelled. + */ +export function transcribeMono16kToSegments( + samples: Float32Array, + options?: { + trimRegions?: TrimRegion[]; + onStatus?: (phase: "model" | "transcribe") => void; + signal?: AbortSignal; + }, +): Promise { + if (options?.signal?.aborted) { + return Promise.reject(new DOMException("Aborted", "AbortError")); + } + + return new Promise((resolve, reject) => { + const worker = new Worker(new URL("./transcribe.worker.ts", import.meta.url), { + type: "module", + }); + + let settled = false; + const finish = (fn: () => void) => { + if (settled) return; + settled = true; + options?.signal?.removeEventListener("abort", onAbort); + worker.terminate(); + fn(); + }; + + const onAbort = () => finish(() => reject(new DOMException("Aborted", "AbortError"))); + options?.signal?.addEventListener("abort", onAbort, { once: true }); + + worker.onmessage = (e: MessageEvent) => { + const msg = e.data; + if (msg.type === "status") { + options?.onStatus?.(msg.phase); + return; + } + if (msg.type === "result") { + finish(() => resolve({ segments: msg.segments, granularity: msg.granularity })); + return; + } + finish(() => reject(new Error(msg.message))); + }; + + worker.onerror = (e) => { + finish(() => reject(new Error(e.message || "Caption transcription worker failed"))); + }; + + // Structured-clone copy (not a transfer): the caller may reuse `samples` + // for the full-buffer retry pass, so the buffer must stay valid here. + const request: TranscribeWorkerRequest = { + samples, + trimRegions: options?.trimRegions ?? [], + }; + worker.postMessage(request); + }); +} diff --git a/src/lib/captioning/transcribe.worker.ts b/src/lib/captioning/transcribe.worker.ts new file mode 100644 index 000000000..edd16e8ec --- /dev/null +++ b/src/lib/captioning/transcribe.worker.ts @@ -0,0 +1,81 @@ +/** + * Web Worker: runs in-browser Whisper transcription off the renderer's main + * thread so the editor UI never blocks while the model loads or audio is + * transcribed. + * + * Input message: { samples: Float32Array; trimRegions: TrimRegion[] } + * Output messages (see `TranscribeWorkerResponse`): + * { type: "status", phase: "model" | "transcribe" } progress updates + * { type: "result", segments, granularity } final captions + * { type: "error", message } failure detail + * + * The caller terminates this worker to abort (model load / inference cannot be + * cooperatively cancelled), so there is no in-worker abort handling. + */ + +import type { TranscribeWorkerRequest, TranscribeWorkerResponse } from "./transcribe"; +import { runTranscription, type TranscriberFn } from "./transcribeCore"; + +function post(message: TranscribeWorkerResponse): void { + (self as unknown as Worker).postMessage(message); +} + +/** + * ONNX Runtime's wasm bundle treats `process.versions.node` (which can leak into + * an Electron worker) as Node and tries `require("fs")`, which Vite does not + * support. Mask it only while Transformers / ORT run. No-op when `process` is + * undefined (the usual case in a Web Worker). + */ +function withoutNodeVersion(fn: () => Promise): Promise { + const versions = + typeof process !== "undefined" && process.versions && typeof process.versions === "object" + ? process.versions + : null; + const hadNode = versions !== null && "node" in versions; + const savedNode = hadNode ? (versions as { node?: string }).node : undefined; + if (hadNode && versions) { + try { + Reflect.deleteProperty(versions, "node"); + } catch { + (versions as { node?: string }).node = undefined; + } + } + return fn().finally(() => { + if (hadNode && versions && savedNode !== undefined) { + (versions as { node: string }).node = savedNode; + } + }); +} + +async function loadTranscriber(): Promise { + return withoutNodeVersion(async () => { + const { pipeline, env } = await import("@xenova/transformers"); + env.allowLocalModels = false; + // Default tiny weights only: the `output_attentions` revision has regressed inference for + // some environments (empty chunks / thrown errors) while phrase mode works on this model. + const transcriber = (await pipeline( + "automatic-speech-recognition", + "Xenova/whisper-tiny", + )) as unknown as TranscriberFn; + return transcriber; + }); +} + +self.onmessage = async (event: MessageEvent) => { + const { samples, trimRegions } = event.data; + try { + post({ type: "status", phase: "model" }); + const transcriber = await loadTranscriber(); + + post({ type: "status", phase: "transcribe" }); + const { segments, granularity } = await runTranscription( + transcriber, + samples, + trimRegions ?? [], + ); + + post({ type: "result", segments, granularity }); + } catch (e) { + post({ type: "error", message: e instanceof Error ? e.message : String(e) }); + } +}; diff --git a/src/lib/captioning/transcribeCore.ts b/src/lib/captioning/transcribeCore.ts new file mode 100644 index 000000000..111995246 --- /dev/null +++ b/src/lib/captioning/transcribeCore.ts @@ -0,0 +1,269 @@ +import type { TrimRegion } from "@/components/video-editor/types"; +import type { CaptionSegment, TranscribeMono16kResult } from "./transcribe"; + +/** + * Pure transcription algorithm shared by the captioning Web Worker. It takes an + * already-constructed Whisper `transcriber` and turns mono 16 kHz audio into + * timed caption segments. Kept free of DOM / Transformers.js imports so it can + * run inside a worker and be unit-tested in isolation. + */ + +/** A Transformers.js automatic-speech-recognition pipeline call. */ +export type TranscriberFn = ( + audio: Float32Array, + opts: Record, +) => Promise; + +function segmentOverlapsTrim(startMs: number, endMs: number, trims: TrimRegion[]): boolean { + return trims.some((t) => startMs < t.endMs && endMs > t.startMs); +} + +/** Same trim-out rule as {@link segmentsFromTranscriberChunks}; for retry passes that used empty trims. */ +function dropSegmentsOverlappingTrimRegions( + segments: CaptionSegment[], + trimRegions: TrimRegion[], +): CaptionSegment[] { + if (trimRegions.length === 0) return segments; + return segments.filter((s) => { + const startMs = Math.round(s.startSec * 1000); + const endMs = Math.round(s.endSec * 1000); + return !segmentOverlapsTrim(startMs, endMs, trimRegions); + }); +} + +/** Whisper runs with internal 30s chunks; keep each forward pass bounded for WASM memory. */ +const TRANSCRIBE_SLICE_SAMPLES = 12 * 60 * 16_000; + +/** Very short slices are skipped in the multi-slice loop unless padded (see `padTailSliceForTranscribe`). */ +const MIN_TRANSCRIBE_SLICE_SAMPLES = 800; + +/** + * Pad a short tail slice so Whisper still runs; timestamps are clamped with `realDurationSec` so + * padding does not extend perceived audio on the timeline. + */ +function padTailSliceForTranscribe(samples: Float32Array): { + slice: Float32Array; + realDurationSec: number; +} { + const realDurationSec = samples.length / 16_000; + if (samples.length >= MIN_TRANSCRIBE_SLICE_SAMPLES) { + return { slice: samples, realDurationSec }; + } + const padded = new Float32Array(MIN_TRANSCRIBE_SLICE_SAMPLES); + padded.set(samples); + return { slice: padded, realDurationSec }; +} + +/** Converts raw Whisper chunk output into sorted, deduped, trim-filtered caption segments. */ +function segmentsFromTranscriberChunks( + chunks: Array<{ timestamp?: [number | null, number | null]; text?: unknown }>, + timeOffsetSec: number, + trims: TrimRegion[], + audioDurationSec: number, +): CaptionSegment[] { + const sorted = [...chunks].sort((x, y) => { + const ax = x.timestamp?.[0]; + const ay = y.timestamp?.[0]; + const na = typeof ax === "number" ? ax : -1; + const nb = typeof ay === "number" ? ay : -1; + return na - nb; + }); + + const segments: CaptionSegment[] = []; + + for (let idx = 0; idx < sorted.length; idx++) { + const c = sorted[idx]!; + const ts = c.timestamp as [number | null, number | null] | undefined; + if (!ts) continue; + let a = ts[0]; + let b = ts[1]; + if (a == null) a = 0; + a = Math.max(0, a); + if (b == null) { + let nextStart: number | null = null; + for (let j = idx + 1; j < sorted.length; j++) { + const na = sorted[j]?.timestamp?.[0]; + if (typeof na === "number") { + nextStart = na; + break; + } + } + b = nextStart ?? audioDurationSec; + } + if (b <= a) { + b = Math.min(a + 0.25, audioDurationSec); + } + b = Math.min(b, audioDurationSec); + + const text = String(c.text ?? "") + .replace(/\s+/g, " ") + .trim(); + if (!text) continue; + + const startSec = a + timeOffsetSec; + const sliceEnd = timeOffsetSec + audioDurationSec; + const endSec = Math.min(Math.max(startSec + 0.08, b + timeOffsetSec), sliceEnd); + const startMs = Math.round(startSec * 1000); + const endMs = Math.round(endSec * 1000); + if (segmentOverlapsTrim(startMs, endMs, trims)) continue; + + segments.push({ startSec, endSec, text }); + } + + segments.sort((u, v) => u.startSec - v.startSec || u.endSec - v.endSec); + const rawDeduped: CaptionSegment[] = []; + for (const seg of segments) { + const prev = rawDeduped[rawDeduped.length - 1]; + if (prev && prev.text === seg.text && seg.startSec <= prev.endSec) { + prev.endSec = Math.max(prev.endSec, seg.endSec); + prev.startSec = Math.min(prev.startSec, seg.startSec); + continue; + } + rawDeduped.push(seg); + } + return rawDeduped; +} + +/** Runs the transcriber on one audio slice, chunking only long clips. */ +async function runTranscriberOnSlice( + transcriber: TranscriberFn, + samples: Float32Array, + opts: { forceFullSequences: boolean; timestampMode: "word" | "phrase" }, +): Promise { + const durationSec = samples.length / 16_000; + // Only chunk long clips; short-audio chunking regressed some Whisper.js runs (empty chunks). + const chunking = durationSec > 30 ? { chunk_length_s: 30, stride_length_s: 5 } : {}; + return transcriber(samples, { + return_timestamps: opts.timestampMode === "word" ? "word" : true, + force_full_sequences: opts.forceFullSequences, + ...chunking, + }); +} + +/** Flattens the various shapes a Transformers.js ASR result can take into a chunk list. */ +function getChunksFromTranscriberResult(result: unknown): Array<{ + timestamp?: [number | null, number | null]; + text?: unknown; +}> { + if (result == null) return []; + if (Array.isArray(result)) { + const out: Array<{ timestamp?: [number | null, number | null]; text?: unknown }> = []; + for (const item of result) { + const chunks = (item as { chunks?: unknown })?.chunks; + if (Array.isArray(chunks)) out.push(...chunks); + } + return out; + } + const chunks = (result as { chunks?: unknown })?.chunks; + return Array.isArray(chunks) ? chunks : []; +} + +/** Prefer `chunks`; if the model only returned top-level `text`, synthesize one span for timing. */ +function extractChunksFromAsrResult(result: unknown): Array<{ + timestamp?: [number | null, number | null]; + text?: unknown; +}> { + const fromChunks = getChunksFromTranscriberResult(result); + if (fromChunks.length > 0) return fromChunks; + const single = Array.isArray(result) ? result[0] : result; + const text = + typeof (single as { text?: unknown })?.text === "string" + ? String((single as { text: string }).text).trim() + : ""; + if (text) { + return [{ timestamp: [0, null], text }]; + } + return []; +} + +/** + * Drives Whisper over (possibly sliced) mono 16 kHz audio and returns timed segments. + * Long audio is split so one forward pass does not exhaust WASM memory; timestamps are + * shifted back onto the full timeline. Tries word- then phrase-level timestamps, with a + * trim-ignoring retry, before giving up. + */ +export async function runTranscription( + transcriber: TranscriberFn, + samples: Float32Array, + trims: TrimRegion[], +): Promise { + const transcribeOne = async ( + ignoreTrims: boolean, + forceFullSequences: boolean, + timestampMode: "word" | "phrase", + ): Promise => { + try { + const activeTrims = ignoreTrims ? [] : trims; + if (samples.length <= TRANSCRIBE_SLICE_SAMPLES) { + const { slice, realDurationSec } = padTailSliceForTranscribe(samples); + const result = await runTranscriberOnSlice(transcriber, slice, { + forceFullSequences, + timestampMode, + }); + return segmentsFromTranscriberChunks( + extractChunksFromAsrResult(result), + 0, + activeTrims, + realDurationSec, + ); + } + + const all: CaptionSegment[] = []; + for (let offset = 0; offset < samples.length; offset += TRANSCRIBE_SLICE_SAMPLES) { + const end = Math.min(offset + TRANSCRIBE_SLICE_SAMPLES, samples.length); + const sliceRaw = samples.subarray(offset, end); + const isFinalSlice = end >= samples.length; + if (sliceRaw.length === 0) continue; + if (sliceRaw.length < MIN_TRANSCRIBE_SLICE_SAMPLES && !isFinalSlice) continue; + + const { slice, realDurationSec } = + sliceRaw.length < MIN_TRANSCRIBE_SLICE_SAMPLES && isFinalSlice + ? padTailSliceForTranscribe(sliceRaw) + : { slice: sliceRaw, realDurationSec: sliceRaw.length / 16_000 }; + + const result = await runTranscriberOnSlice(transcriber, slice, { + forceFullSequences, + timestampMode, + }); + const tOff = offset / 16_000; + all.push( + ...segmentsFromTranscriberChunks( + extractChunksFromAsrResult(result), + tOff, + activeTrims, + realDurationSec, + ), + ); + } + return all; + } catch (e) { + console.warn("[captioning] Whisper pass failed:", e); + return []; + } + }; + + const attemptModes: Array<"word" | "phrase"> = ["word", "phrase"]; + for (const timestampMode of attemptModes) { + let segments = await transcribeOne(false, true, timestampMode); + if (segments.length === 0) { + segments = await transcribeOne(false, false, timestampMode); + } + if (segments.length === 0 && trims.length > 0) { + segments = dropSegmentsOverlappingTrimRegions( + await transcribeOne(true, true, timestampMode), + trims, + ); + if (segments.length === 0) { + segments = dropSegmentsOverlappingTrimRegions( + await transcribeOne(true, false, timestampMode), + trims, + ); + } + } + if (segments.length > 0) { + return { segments, granularity: timestampMode }; + } + } + + return { segments: [], granularity: "phrase" }; +} diff --git a/src/lib/vite-stubs/empty-node-module.ts b/src/lib/vite-stubs/empty-node-module.ts new file mode 100644 index 000000000..16ee52688 --- /dev/null +++ b/src/lib/vite-stubs/empty-node-module.ts @@ -0,0 +1,7 @@ +/** + * Default export with no enumerable keys. Used as a Vite alias target for Node + * builtins that `@xenova/transformers` imports; `env.js` treats an empty object + * as “no filesystem” so it stays on browser / remote paths. + */ +const empty = Object.create(null) as Record; +export default empty; diff --git a/src/lib/vite-stubs/onnxruntime-node-stub.ts b/src/lib/vite-stubs/onnxruntime-node-stub.ts new file mode 100644 index 000000000..a70b3dd60 --- /dev/null +++ b/src/lib/vite-stubs/onnxruntime-node-stub.ts @@ -0,0 +1,10 @@ +/** + * Transformers always imports `onnxruntime-node`, then picks web vs node from `process.release.name`. + * In Electron's renderer that name is often `"node"` while we still must use the WASM build — the real + * `onnxruntime-node` package is aliased away (it pulls `fs`). Re-export `onnxruntime-web` here so the + * "node" branch still receives a working ORT with `registerBackend` etc. + */ +import * as ortWeb from "onnxruntime-web"; + +const ort = (ortWeb as { default?: typeof ortWeb }).default ?? ortWeb; +export default ort; diff --git a/vite.config.ts b/vite.config.ts index 0779e1358..213e44711 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -28,8 +28,22 @@ export default defineConfig({ resolve: { alias: { "@": path.resolve(__dirname, "src"), + // @xenova/transformers: env.js statically imports fs/path/url; onnx.js imports + // onnxruntime-node (must not be bundled in the renderer — it requires fs). + fs: path.resolve(__dirname, "src/lib/vite-stubs/empty-node-module.ts"), + path: path.resolve(__dirname, "src/lib/vite-stubs/empty-node-module.ts"), + url: path.resolve(__dirname, "src/lib/vite-stubs/empty-node-module.ts"), + "onnxruntime-node": path.resolve(__dirname, "src/lib/vite-stubs/onnxruntime-node-stub.ts"), // re-exports web ORT }, }, + optimizeDeps: { + exclude: ["@xenova/transformers"], + }, + // The captioning worker dynamically imports @xenova/transformers, which makes the + // worker bundle code-split — unsupported by the default "iife" worker format. + worker: { + format: "es", + }, build: { target: "esnext", minify: "terser",