From 0c2d0500a47782b4400be6d9795f5ef3cbe0dff4 Mon Sep 17 00:00:00 2001 From: Dmytro Shyryaiev Date: Fri, 30 Jan 2026 17:31:13 +0200 Subject: [PATCH 1/9] Initial empty commit From e7f6d1657c01b9443246739c97b84dc685cb47e6 Mon Sep 17 00:00:00 2001 From: Turone Date: Mon, 30 Mar 2026 14:30:55 +0300 Subject: [PATCH 2/9] Static files via SharedArrayBuffers --- impress.js | 20 ++++- lib/application.js | 4 +- lib/cache.js | 150 +++++++++++++++++++++++++++++++++++++ lib/static.js | 112 ++++++++++++++++++++++++++-- lib/worker.js | 15 ++++ schemas/config/cache.js | 1 + test/cache-shared.js | 160 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 452 insertions(+), 10 deletions(-) create mode 100644 lib/cache.js create mode 100644 test/cache-shared.js diff --git a/impress.js b/impress.js index f3db1d7fa..885fbc6f8 100644 --- a/impress.js +++ b/impress.js @@ -11,6 +11,7 @@ const { Pool, isError } = require('metautil'); const { loadSchema } = require('metaschema'); const { Logger } = require('metalog'); const { Planner } = require('./lib/planner.js'); +const { StaticCache } = require('./lib/cache.js'); const CONFIG_SECTIONS = ['log', 'scale', 'server', 'sessions']; const PATH = process.cwd(); @@ -69,6 +70,17 @@ const startWorker = async (app, kind, port, id = ++impress.lastWorkerId) => { } app.threads.set(id, worker); + if (app.cache) { + for (const place of ['static', 'resources']) { + const entries = StaticCache.getPlaceEntries(app.cache, place); + worker.postMessage({ + name: 'cache-init', + place, + entries, + }); + } + } + worker.on('error', (error) => { impress.console.error(error.message); }); @@ -174,14 +186,20 @@ const loadApplication = async (root, dir, master) => { impress.planner = await new Planner(tasksPath, tasksConfig, impress); impress.config = config; } + const cache = new StaticCache(dir, config); + await cache.loadPlace('static'); + await cache.loadPlace('resources'); + const { balancer, ports = [], workers = {} } = config.server; const threads = new Map(); const pool = new Pool({ timeout: workers.wait }); - const app = { root, path: dir, config, threads, pool, ready: 0 }; + const app = { root, path: dir, config, threads, pool, ready: 0, cache }; if (balancer) await startWorker(app, 'balancer', balancer); for (const port of ports) await startWorker(app, 'server', port); const poolSize = workers.pool || 0; for (let i = 0; i < poolSize; i++) await startWorker(app, 'worker'); + const watchTimeout = config.server.timeouts.watch; + cache.startWatch(watchTimeout, (data) => broadcast(app, data)); impress.applications.set(dir, app); }; diff --git a/lib/application.js b/lib/application.js index 6efc01a04..872ca2601 100644 --- a/lib/application.js +++ b/lib/application.js @@ -76,8 +76,6 @@ class Application extends EventEmitter { this.sandbox.application.invoke = invoke; this.sandbox.application.emit('loading'); await this.parallel([ - this.static.load(), - this.resources.load(), this.cert.load(), (async () => { await this.schemas.load(); @@ -203,6 +201,7 @@ class Application extends EventEmitter { const relPath = filePath.substring(this.path.length + 1); const sepIndex = relPath.indexOf(node.path.sep); const place = relPath.substring(0, sepIndex); + if (place === 'static' || place === 'resources') return; node.fs.stat(filePath, (error, stat) => { if (error) return; if (stat.isDirectory()) return void this[place].load(filePath); @@ -215,6 +214,7 @@ class Application extends EventEmitter { const relPath = filePath.substring(this.path.length + 1); const sepIndex = relPath.indexOf(node.path.sep); const place = relPath.substring(0, sepIndex); + if (place === 'static' || place === 'resources') return; this[place].delete(filePath); if (threadId === 1) this.console.debug('Deleted: /' + relPath); }); diff --git a/lib/cache.js b/lib/cache.js new file mode 100644 index 000000000..52e5e1552 --- /dev/null +++ b/lib/cache.js @@ -0,0 +1,150 @@ +'use strict'; + +const { node, metarhia } = require('./deps.js'); + +const WIN = process.platform === 'win32'; +const MAX_FILE_SIZE = '10 mb'; +const STREAM_THRESHOLD = '1 mb'; + +class StaticCache { + constructor(appPath, config) { + this.appPath = appPath; + this.places = new Map(); + this.directories = []; + this.version = 0; + this.watcher = null; + const cacheConfig = config?.cache || {}; + const { sizeToBytes } = metarhia.metautil; + this.maxFileSize = sizeToBytes(cacheConfig.maxFileSize || MAX_FILE_SIZE); + this.streamThreshold = sizeToBytes( + cacheConfig.streamThreshold || STREAM_THRESHOLD, + ); + } + + static getKey(filePath, basePath) { + const key = filePath.substring(basePath.length); + if (WIN) return metarhia.metautil.replace(key, node.path.sep, '/'); + return key; + } + + async loadPlace(name) { + const dirPath = node.path.join(this.appPath, name); + await metarhia.metautil.ensureDirectory(dirPath); + const files = new Map(); + this.places.set(name, { files, path: dirPath }); + await this._loadDir(name, dirPath); + return StaticCache.getPlaceEntries(this, name); + } + + async _loadDir(placeName, dirPath) { + this.directories.push(dirPath); + try { + const items = await node.fsp.readdir(dirPath, { withFileTypes: true }); + for (const item of items) { + if (item.name.startsWith('.eslint')) continue; + const filePath = node.path.join(dirPath, item.name); + if (item.isDirectory()) await this._loadDir(placeName, filePath); + else await this._loadFile(placeName, filePath); + } + } catch { + // Directory may not exist yet + } + } + + async _loadFile(placeName, filePath) { + const place = this.places.get(placeName); + try { + const stat = await node.fsp.stat(filePath); + const key = StaticCache.getKey(filePath, place.path); + const ver = ++this.version; + if (stat.size > this.maxFileSize) { + const entry = { + key, + sab: null, + byteLength: 0, + size: stat.size, + version: ver, + }; + place.files.set(key, entry); + return; + } + const data = await node.fsp.readFile(filePath); + const sab = new SharedArrayBuffer(data.byteLength); + new Uint8Array(sab).set(data); + const entry = { + key, + sab, + byteLength: data.byteLength, + size: stat.size, + version: ver, + }; + place.files.set(key, entry); + } catch { + // File may have been removed between readdir and stat + } + } + + static getPlaceEntries(cache, name) { + const place = cache.places.get(name); + if (!place) return []; + return Array.from(place.files.values()); + } + + startWatch(timeout, broadcast) { + const { DirectoryWatcher } = metarhia.metawatch; + this.watcher = new DirectoryWatcher({ timeout }); + for (const dir of this.directories) { + this.watcher.watch(dir); + } + + this.watcher.on('change', (filePath) => { + const resolved = this._resolve(filePath); + if (!resolved.name) return; + const { name } = resolved; + node.fs.stat(filePath, async (err, stat) => { + if (err) return; + if (stat.isDirectory()) { + this.watcher.watch(filePath); + await this._loadDir(name, filePath); + const entries = StaticCache.getPlaceEntries(this, name); + broadcast({ name: 'cache-init', place: name, entries }); + return; + } + await this._loadFile(name, filePath); + const place = this.places.get(name); + const key = StaticCache.getKey(filePath, place.path); + const entry = place.files.get(key); + if (entry) { + broadcast({ name: 'cache-update', place: name, entry }); + } + }); + }); + + this.watcher.on('delete', (filePath) => { + const resolved = this._resolve(filePath); + if (!resolved.name) return; + const { name, key } = resolved; + const place = this.places.get(name); + if (place) { + place.files.delete(key); + broadcast({ name: 'cache-delete', place: name, key }); + } + }); + } + + _resolve(filePath) { + for (const [name, place] of this.places) { + if (filePath.startsWith(place.path)) { + const key = StaticCache.getKey(filePath, place.path); + return { name, key }; + } + } + return {}; + } + + close() { + if (this.watcher) this.watcher.close(); + } +} + +module.exports = { StaticCache }; diff --git a/lib/static.js b/lib/static.js index e074afdd6..1cf33bed6 100644 --- a/lib/static.js +++ b/lib/static.js @@ -3,9 +3,12 @@ const { node, metarhia } = require('./deps.js'); const { Place } = require('./place.js'); const { join } = node.path.posix; +const { Readable } = node.stream; const WIN = process.platform === 'win32'; const MAX_FILE_SIZE = '10 mb'; +const STREAM_THRESHOLD = '1 mb'; +const CHUNK_SIZE = 65536; const STATUS_CACHE = new Map(); @@ -21,12 +24,27 @@ const status = (code) => { return file; }; +const createSABStream = (sab, byteLength, options = {}) => { + const start = options.start || 0; + const end = options.end ?? byteLength - 1; + let offset = start; + return new Readable({ + read() { + if (offset > end) return void this.push(null); + const chunkEnd = Math.min(offset + CHUNK_SIZE, end + 1); + this.push(Buffer.from(sab, offset, chunkEnd - offset)); + offset = chunkEnd; + }, + }); +}; + class Static extends Place { constructor(name, application, options = {}) { super(name, application); this.files = new Map(); this.ext = options.ext; this.maxFileSize = -1; + this.streamThreshold = -1; } get(key) { @@ -39,6 +57,31 @@ class Static extends Place { return key; } + static withData(entry) { + if (entry.sab) { + entry.data = Buffer.from(entry.sab, 0, entry.byteLength); + } else { + entry.data = null; + } + return entry; + } + + initCache(entries) { + this.files.clear(); + for (const entry of entries) { + this.files.set(entry.key, Static.withData(entry)); + } + } + + updateEntry(entry) { + this.files.set(entry.key, Static.withData(entry)); + } + + // Called from worker message handler: remove entry by key + deleteEntry(key) { + this.files.delete(key); + } + delete(filePath) { const key = this.getKey(filePath); this.files.delete(key); @@ -92,20 +135,68 @@ class Static extends Place { return this.find(filePath, code, true); } + _initThreshold() { + if (this.streamThreshold !== -1) return; + const threshold = this.application.config?.cache?.streamThreshold; + this.streamThreshold = metarhia.metautil.sizeToBytes( + threshold || STREAM_THRESHOLD, + ); + } + async serve(url, transport) { const [filePath] = metarhia.metautil.split(url, '?'); const fileExt = metarhia.metautil.fileExt(filePath); let file = this.find(filePath); + + // SAB-backed cached file (shared memory from main process) + if (file.sab) { + this._initThreshold(); + const { sab, byteLength, code } = file; + if (code === -1) { + const data = Buffer.from(sab, 0, byteLength); + return void transport.write(data, 200, 'html'); + } + const { headers } = transport.req; + if (headers.range) { + const range = metarhia.metautil.parseRange(headers.range); + const { start, end = byteLength - 1 } = range; + if (start >= end || start >= byteLength || end >= byteLength) { + file = this.find(filePath, 416); + return void transport.write(file.data, 416, fileExt); + } + if (byteLength > this.streamThreshold) { + const readable = createSABStream(sab, byteLength, { start, end }); + const options = { start, end, size: byteLength }; + return void transport.write(readable, 206, fileExt, options); + } + const data = Buffer.from(sab, start, end - start + 1); + const options = { start, end, size: byteLength }; + return void transport.write(data, 206, fileExt, options); + } + if (byteLength > this.streamThreshold) { + const readable = createSABStream(sab, byteLength); + const options = { size: byteLength }; + return void transport.write(readable, code, fileExt, options); + } + const data = Buffer.from(sab, 0, byteLength); + return void transport.write(data, code, fileExt); + } + + // Legacy path: Buffer-backed cached file (used by cert or local cache) if (file.data && file.stat) { if (file.code === -1) return void transport.write(file.data, 200, 'html'); return void transport.write(file.data, file.code, fileExt); } + + // Uncached large file: stream from disk const absPath = join(this.path, url); - if (absPath.startsWith(this.path)) { - let { stat } = file; - if (!stat) stat = await node.fsp.stat(absPath).catch(() => null); - if (stat && stat.isFile()) { - const { size } = stat; + if (file.size > 0 || (file.stat && absPath.startsWith(this.path))) { + let size = file.size || file.stat?.size; + if (!size) { + const stat = await node.fsp.stat(absPath).catch(() => null); + if (stat && stat.isFile()) size = stat.size; + } + if (size) { const options = { size }; let code = 200; const { headers } = transport.req; @@ -124,8 +215,15 @@ class Static extends Place { return void transport.write(readable, code, fileExt, options); } } - if (file.code === -1) return void transport.write(file.data, 200, 'html'); - return void transport.write(file.data, 404); + + // Status page or virtual file fallback + if (file.data) { + const code = file.code === -1 ? 200 : file.code || 404; + const ext = file.code === -1 ? 'html' : fileExt; + return void transport.write(file.data, code, ext); + } + const errFile = this.find(filePath, 404); + return void transport.write(errFile.data, 404); } } diff --git a/lib/worker.js b/lib/worker.js index 4f15e98d2..dc5dd157f 100644 --- a/lib/worker.js +++ b/lib/worker.js @@ -41,6 +41,21 @@ const invoke = async ({ method, args, exclusive = false }) => { }; const handlers = { + 'cache-init': ({ place, entries }) => { + const target = application[place]; + if (target) target.initCache(entries); + }, + + 'cache-update': ({ place, entry }) => { + const target = application[place]; + if (target) target.updateEntry(entry); + }, + + 'cache-delete': ({ place, key }) => { + const target = application[place]; + if (target) target.deleteEntry(key); + }, + ready: async () => { application.emit('ready'); }, diff --git a/schemas/config/cache.js b/schemas/config/cache.js index 4d44a550a..b1ce22a16 100644 --- a/schemas/config/cache.js +++ b/schemas/config/cache.js @@ -1,5 +1,6 @@ ({ size: 'size', maxFileSize: 'size', + streamThreshold: { type: 'size', required: false }, avoid: { array: 'string', required: false }, }); diff --git a/test/cache-shared.js b/test/cache-shared.js new file mode 100644 index 000000000..3fcea0185 --- /dev/null +++ b/test/cache-shared.js @@ -0,0 +1,160 @@ +'use strict'; + +const { test } = require('node:test'); +const assert = require('node:assert'); +const path = require('node:path'); +const { Static } = require('../lib/static.js'); +const { StaticCache } = require('../lib/cache.js'); + +const root = process.cwd(); + +const application = { + path: path.join(root, 'test'), + watcher: { watch() {} }, + absolute(relative) { + return path.join(this.path, relative); + }, +}; + +// --- StaticCache (main process) --- + +test('StaticCache - should load files into SAB', async () => { + const appPath = path.join(root, 'test'); + const cache = new StaticCache(appPath, {}); + const entries = await cache.loadPlace('lib'); + assert.ok(entries.length > 0); + const entry = entries.find((e) => e.key.includes('add.js')); + assert.ok(entry); + assert.ok(entry.sab instanceof SharedArrayBuffer); + assert.strictEqual(entry.byteLength, entry.sab.byteLength); + assert.ok(entry.version > 0); + const data = Buffer.from(entry.sab, 0, entry.byteLength); + assert.ok(data.length > 0); +}); + +test('StaticCache - entries have correct structure', async () => { + const appPath = path.join(root, 'test'); + const cache = new StaticCache(appPath, {}); + const entries = await cache.loadPlace('lib'); + for (const entry of entries) { + assert.strictEqual(typeof entry.key, 'string'); + assert.ok(entry.key.startsWith('/')); + assert.strictEqual(typeof entry.byteLength, 'number'); + assert.strictEqual(typeof entry.size, 'number'); + assert.strictEqual(typeof entry.version, 'number'); + if (entry.sab) { + assert.ok(entry.sab instanceof SharedArrayBuffer); + } + } +}); + +test('StaticCache.getPlaceEntries - returns entries', async () => { + const appPath = path.join(root, 'test'); + const cache = new StaticCache(appPath, {}); + await cache.loadPlace('lib'); + const entries = StaticCache.getPlaceEntries(cache, 'lib'); + assert.ok(entries.length > 0); + const missing = StaticCache.getPlaceEntries(cache, 'none'); + assert.strictEqual(missing.length, 0); +}); + +// --- Static (worker side) --- + +test('Static initCache - populate from SAB entries', () => { + const cache = new Static('lib', application); + const content = Buffer.from('hello world'); + const sab = new SharedArrayBuffer(content.byteLength); + new Uint8Array(sab).set(content); + cache.initCache([ + { + key: '/index.html', + sab, + byteLength: content.byteLength, + size: content.byteLength, + version: 1, + }, + ]); + assert.strictEqual(cache.files.size, 1); + const file = cache.get('/index.html'); + assert.ok(file.data instanceof Buffer); + assert.strictEqual(file.data.length, content.byteLength); + assert.deepStrictEqual(file.data, content); + assert.ok(file.sab instanceof SharedArrayBuffer); +}); + +test('Static updateEntry - updates SAB entry', () => { + const cache = new Static('lib', application); + const sab1 = new SharedArrayBuffer(9); + new Uint8Array(sab1).set(Buffer.from('version 1')); + cache.initCache([ + { key: '/f.js', sab: sab1, byteLength: 9, size: 9, version: 1 }, + ]); + const content2 = Buffer.from('version 2 updated'); + const sab2 = new SharedArrayBuffer(content2.byteLength); + new Uint8Array(sab2).set(content2); + cache.updateEntry({ + key: '/f.js', + sab: sab2, + byteLength: content2.byteLength, + size: content2.byteLength, + version: 2, + }); + const file = cache.get('/f.js'); + assert.deepStrictEqual(file.data, content2); + assert.strictEqual(file.version, 2); +}); + +test('Static deleteEntry - removes entry by key', () => { + const cache = new Static('lib', application); + const sab = new SharedArrayBuffer(4); + new Uint8Array(sab).set([1, 2, 3, 4]); + cache.initCache([ + { key: '/a.js', sab, byteLength: 4, size: 4, version: 1 }, + { key: '/b.js', sab, byteLength: 4, size: 4, version: 2 }, + ]); + assert.strictEqual(cache.files.size, 2); + cache.deleteEntry('/a.js'); + assert.strictEqual(cache.files.size, 1); + assert.strictEqual(cache.get('/a.js'), undefined); + assert.ok(cache.get('/b.js')); +}); + +test('Static withData - null sab has null data', () => { + const cache = new Static('lib', application); + cache.initCache([ + { + key: '/big.bin', + sab: null, + byteLength: 0, + size: 20000000, + version: 1, + }, + ]); + const file = cache.get('/big.bin'); + assert.strictEqual(file.data, null); + assert.strictEqual(file.size, 20000000); +}); + +test('Static SAB data is zero-copy view', () => { + const sab = new SharedArrayBuffer(5); + new Uint8Array(sab).set([10, 20, 30, 40, 50]); + const cache = new Static('lib', application); + cache.initCache([{ key: '/f.bin', sab, byteLength: 5, size: 5, version: 1 }]); + const file = cache.get('/f.bin'); + assert.strictEqual(file.data.buffer, sab); +}); + +test('Static initCache clears previous entries', () => { + const cache = new Static('lib', application); + const sab = new SharedArrayBuffer(2); + cache.initCache([ + { key: '/old.js', sab, byteLength: 2, size: 2, version: 1 }, + ]); + assert.strictEqual(cache.files.size, 1); + cache.initCache([ + { key: '/new.js', sab, byteLength: 2, size: 2, version: 2 }, + ]); + assert.strictEqual(cache.files.size, 1); + assert.strictEqual(cache.get('/old.js'), undefined); + assert.ok(cache.get('/new.js')); +}); From 6862f1065a4a9a2fa9e747653350036902474150 Mon Sep 17 00:00:00 2001 From: Turone Date: Mon, 30 Mar 2026 14:30:55 +0300 Subject: [PATCH 3/9] Static files via SharedArrayBuffers --- impress.js | 20 ++++- lib/application.js | 4 +- lib/cache.js | 150 +++++++++++++++++++++++++++++++++++++ lib/static.js | 112 ++++++++++++++++++++++++++-- lib/worker.js | 15 ++++ schemas/config/cache.js | 1 + test/cache-shared.js | 160 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 452 insertions(+), 10 deletions(-) create mode 100644 lib/cache.js create mode 100644 test/cache-shared.js diff --git a/impress.js b/impress.js index f3db1d7fa..885fbc6f8 100644 --- a/impress.js +++ b/impress.js @@ -11,6 +11,7 @@ const { Pool, isError } = require('metautil'); const { loadSchema } = require('metaschema'); const { Logger } = require('metalog'); const { Planner } = require('./lib/planner.js'); +const { StaticCache } = require('./lib/cache.js'); const CONFIG_SECTIONS = ['log', 'scale', 'server', 'sessions']; const PATH = process.cwd(); @@ -69,6 +70,17 @@ const startWorker = async (app, kind, port, id = ++impress.lastWorkerId) => { } app.threads.set(id, worker); + if (app.cache) { + for (const place of ['static', 'resources']) { + const entries = StaticCache.getPlaceEntries(app.cache, place); + worker.postMessage({ + name: 'cache-init', + place, + entries, + }); + } + } + worker.on('error', (error) => { impress.console.error(error.message); }); @@ -174,14 +186,20 @@ const loadApplication = async (root, dir, master) => { impress.planner = await new Planner(tasksPath, tasksConfig, impress); impress.config = config; } + const cache = new StaticCache(dir, config); + await cache.loadPlace('static'); + await cache.loadPlace('resources'); + const { balancer, ports = [], workers = {} } = config.server; const threads = new Map(); const pool = new Pool({ timeout: workers.wait }); - const app = { root, path: dir, config, threads, pool, ready: 0 }; + const app = { root, path: dir, config, threads, pool, ready: 0, cache }; if (balancer) await startWorker(app, 'balancer', balancer); for (const port of ports) await startWorker(app, 'server', port); const poolSize = workers.pool || 0; for (let i = 0; i < poolSize; i++) await startWorker(app, 'worker'); + const watchTimeout = config.server.timeouts.watch; + cache.startWatch(watchTimeout, (data) => broadcast(app, data)); impress.applications.set(dir, app); }; diff --git a/lib/application.js b/lib/application.js index 6efc01a04..872ca2601 100644 --- a/lib/application.js +++ b/lib/application.js @@ -76,8 +76,6 @@ class Application extends EventEmitter { this.sandbox.application.invoke = invoke; this.sandbox.application.emit('loading'); await this.parallel([ - this.static.load(), - this.resources.load(), this.cert.load(), (async () => { await this.schemas.load(); @@ -203,6 +201,7 @@ class Application extends EventEmitter { const relPath = filePath.substring(this.path.length + 1); const sepIndex = relPath.indexOf(node.path.sep); const place = relPath.substring(0, sepIndex); + if (place === 'static' || place === 'resources') return; node.fs.stat(filePath, (error, stat) => { if (error) return; if (stat.isDirectory()) return void this[place].load(filePath); @@ -215,6 +214,7 @@ class Application extends EventEmitter { const relPath = filePath.substring(this.path.length + 1); const sepIndex = relPath.indexOf(node.path.sep); const place = relPath.substring(0, sepIndex); + if (place === 'static' || place === 'resources') return; this[place].delete(filePath); if (threadId === 1) this.console.debug('Deleted: /' + relPath); }); diff --git a/lib/cache.js b/lib/cache.js new file mode 100644 index 000000000..52e5e1552 --- /dev/null +++ b/lib/cache.js @@ -0,0 +1,150 @@ +'use strict'; + +const { node, metarhia } = require('./deps.js'); + +const WIN = process.platform === 'win32'; +const MAX_FILE_SIZE = '10 mb'; +const STREAM_THRESHOLD = '1 mb'; + +class StaticCache { + constructor(appPath, config) { + this.appPath = appPath; + this.places = new Map(); + this.directories = []; + this.version = 0; + this.watcher = null; + const cacheConfig = config?.cache || {}; + const { sizeToBytes } = metarhia.metautil; + this.maxFileSize = sizeToBytes(cacheConfig.maxFileSize || MAX_FILE_SIZE); + this.streamThreshold = sizeToBytes( + cacheConfig.streamThreshold || STREAM_THRESHOLD, + ); + } + + static getKey(filePath, basePath) { + const key = filePath.substring(basePath.length); + if (WIN) return metarhia.metautil.replace(key, node.path.sep, '/'); + return key; + } + + async loadPlace(name) { + const dirPath = node.path.join(this.appPath, name); + await metarhia.metautil.ensureDirectory(dirPath); + const files = new Map(); + this.places.set(name, { files, path: dirPath }); + await this._loadDir(name, dirPath); + return StaticCache.getPlaceEntries(this, name); + } + + async _loadDir(placeName, dirPath) { + this.directories.push(dirPath); + try { + const items = await node.fsp.readdir(dirPath, { withFileTypes: true }); + for (const item of items) { + if (item.name.startsWith('.eslint')) continue; + const filePath = node.path.join(dirPath, item.name); + if (item.isDirectory()) await this._loadDir(placeName, filePath); + else await this._loadFile(placeName, filePath); + } + } catch { + // Directory may not exist yet + } + } + + async _loadFile(placeName, filePath) { + const place = this.places.get(placeName); + try { + const stat = await node.fsp.stat(filePath); + const key = StaticCache.getKey(filePath, place.path); + const ver = ++this.version; + if (stat.size > this.maxFileSize) { + const entry = { + key, + sab: null, + byteLength: 0, + size: stat.size, + version: ver, + }; + place.files.set(key, entry); + return; + } + const data = await node.fsp.readFile(filePath); + const sab = new SharedArrayBuffer(data.byteLength); + new Uint8Array(sab).set(data); + const entry = { + key, + sab, + byteLength: data.byteLength, + size: stat.size, + version: ver, + }; + place.files.set(key, entry); + } catch { + // File may have been removed between readdir and stat + } + } + + static getPlaceEntries(cache, name) { + const place = cache.places.get(name); + if (!place) return []; + return Array.from(place.files.values()); + } + + startWatch(timeout, broadcast) { + const { DirectoryWatcher } = metarhia.metawatch; + this.watcher = new DirectoryWatcher({ timeout }); + for (const dir of this.directories) { + this.watcher.watch(dir); + } + + this.watcher.on('change', (filePath) => { + const resolved = this._resolve(filePath); + if (!resolved.name) return; + const { name } = resolved; + node.fs.stat(filePath, async (err, stat) => { + if (err) return; + if (stat.isDirectory()) { + this.watcher.watch(filePath); + await this._loadDir(name, filePath); + const entries = StaticCache.getPlaceEntries(this, name); + broadcast({ name: 'cache-init', place: name, entries }); + return; + } + await this._loadFile(name, filePath); + const place = this.places.get(name); + const key = StaticCache.getKey(filePath, place.path); + const entry = place.files.get(key); + if (entry) { + broadcast({ name: 'cache-update', place: name, entry }); + } + }); + }); + + this.watcher.on('delete', (filePath) => { + const resolved = this._resolve(filePath); + if (!resolved.name) return; + const { name, key } = resolved; + const place = this.places.get(name); + if (place) { + place.files.delete(key); + broadcast({ name: 'cache-delete', place: name, key }); + } + }); + } + + _resolve(filePath) { + for (const [name, place] of this.places) { + if (filePath.startsWith(place.path)) { + const key = StaticCache.getKey(filePath, place.path); + return { name, key }; + } + } + return {}; + } + + close() { + if (this.watcher) this.watcher.close(); + } +} + +module.exports = { StaticCache }; diff --git a/lib/static.js b/lib/static.js index e074afdd6..ab6ccd18d 100644 --- a/lib/static.js +++ b/lib/static.js @@ -3,9 +3,12 @@ const { node, metarhia } = require('./deps.js'); const { Place } = require('./place.js'); const { join } = node.path.posix; +const { Readable } = node.stream; const WIN = process.platform === 'win32'; const MAX_FILE_SIZE = '10 mb'; +const STREAM_THRESHOLD = '1 mb'; +const CHUNK_SIZE = 65536; const STATUS_CACHE = new Map(); @@ -21,12 +24,27 @@ const status = (code) => { return file; }; +const createSABStream = (sab, byteLength, options = {}) => { + const start = options.start || 0; + const end = options.end ?? byteLength - 1; + let offset = start; + return new Readable({ + read() { + if (offset > end) return void this.push(null); + const chunkEnd = Math.min(offset + CHUNK_SIZE, end + 1); + this.push(Buffer.from(sab, offset, chunkEnd - offset)); + offset = chunkEnd; + }, + }); +}; + class Static extends Place { constructor(name, application, options = {}) { super(name, application); this.files = new Map(); this.ext = options.ext; this.maxFileSize = -1; + this.streamThreshold = -1; } get(key) { @@ -39,6 +57,32 @@ class Static extends Place { return key; } + static withData(entry) { + if (entry.sab) { + entry.data = Buffer.from(entry.sab, 0, entry.byteLength); + } else { + entry.data = null; + } + return entry; + } + + initCache(entries) { + this._initThreshold(); + this.files.clear(); + for (const entry of entries) { + this.files.set(entry.key, Static.withData(entry)); + } + } + + updateEntry(entry) { + this.files.set(entry.key, Static.withData(entry)); + } + + // Called from worker message handler: remove entry by key + deleteEntry(key) { + this.files.delete(key); + } + delete(filePath) { const key = this.getKey(filePath); this.files.delete(key); @@ -92,20 +136,67 @@ class Static extends Place { return this.find(filePath, code, true); } + _initThreshold() { + if (this.streamThreshold !== -1) return; + const threshold = this.application.config?.cache?.streamThreshold; + this.streamThreshold = metarhia.metautil.sizeToBytes( + threshold || STREAM_THRESHOLD, + ); + } + async serve(url, transport) { const [filePath] = metarhia.metautil.split(url, '?'); const fileExt = metarhia.metautil.fileExt(filePath); let file = this.find(filePath); + + // SAB-backed cached file (shared memory from main process) + if (file.sab) { + const { sab, byteLength, code } = file; + if (code === -1) { + const data = Buffer.from(sab, 0, byteLength); + return void transport.write(data, 200, 'html'); + } + const { headers } = transport.req; + if (headers.range) { + const range = metarhia.metautil.parseRange(headers.range); + const { start, end = byteLength - 1 } = range; + if (start >= end || start >= byteLength || end >= byteLength) { + file = this.find(filePath, 416); + return void transport.write(file.data, 416, fileExt); + } + if (byteLength > this.streamThreshold) { + const readable = createSABStream(sab, byteLength, { start, end }); + const options = { start, end, size: byteLength }; + return void transport.write(readable, 206, fileExt, options); + } + const data = Buffer.from(sab, start, end - start + 1); + const options = { start, end, size: byteLength }; + return void transport.write(data, 206, fileExt, options); + } + if (byteLength > this.streamThreshold) { + const readable = createSABStream(sab, byteLength); + const options = { size: byteLength }; + return void transport.write(readable, code, fileExt, options); + } + const data = Buffer.from(sab, 0, byteLength); + return void transport.write(data, code, fileExt); + } + + // Legacy path: Buffer-backed cached file (used by cert or local cache) if (file.data && file.stat) { if (file.code === -1) return void transport.write(file.data, 200, 'html'); return void transport.write(file.data, file.code, fileExt); } + + // Uncached large file: stream from disk const absPath = join(this.path, url); - if (absPath.startsWith(this.path)) { - let { stat } = file; - if (!stat) stat = await node.fsp.stat(absPath).catch(() => null); - if (stat && stat.isFile()) { - const { size } = stat; + if (file.size > 0 || (file.stat && absPath.startsWith(this.path))) { + let size = file.size || file.stat?.size; + if (!size) { + const stat = await node.fsp.stat(absPath).catch(() => null); + if (stat && stat.isFile()) size = stat.size; + } + if (size) { const options = { size }; let code = 200; const { headers } = transport.req; @@ -124,8 +215,15 @@ class Static extends Place { return void transport.write(readable, code, fileExt, options); } } - if (file.code === -1) return void transport.write(file.data, 200, 'html'); - return void transport.write(file.data, 404); + + // Status page or virtual file fallback + if (file.data) { + const code = file.code === -1 ? 200 : file.code || 404; + const ext = file.code === -1 ? 'html' : fileExt; + return void transport.write(file.data, code, ext); + } + const errFile = this.find(filePath, 404); + return void transport.write(errFile.data, 404); } } diff --git a/lib/worker.js b/lib/worker.js index 4f15e98d2..dc5dd157f 100644 --- a/lib/worker.js +++ b/lib/worker.js @@ -41,6 +41,21 @@ const invoke = async ({ method, args, exclusive = false }) => { }; const handlers = { + 'cache-init': ({ place, entries }) => { + const target = application[place]; + if (target) target.initCache(entries); + }, + + 'cache-update': ({ place, entry }) => { + const target = application[place]; + if (target) target.updateEntry(entry); + }, + + 'cache-delete': ({ place, key }) => { + const target = application[place]; + if (target) target.deleteEntry(key); + }, + ready: async () => { application.emit('ready'); }, diff --git a/schemas/config/cache.js b/schemas/config/cache.js index 4d44a550a..b1ce22a16 100644 --- a/schemas/config/cache.js +++ b/schemas/config/cache.js @@ -1,5 +1,6 @@ ({ size: 'size', maxFileSize: 'size', + streamThreshold: { type: 'size', required: false }, avoid: { array: 'string', required: false }, }); diff --git a/test/cache-shared.js b/test/cache-shared.js new file mode 100644 index 000000000..3fcea0185 --- /dev/null +++ b/test/cache-shared.js @@ -0,0 +1,160 @@ +'use strict'; + +const { test } = require('node:test'); +const assert = require('node:assert'); +const path = require('node:path'); +const { Static } = require('../lib/static.js'); +const { StaticCache } = require('../lib/cache.js'); + +const root = process.cwd(); + +const application = { + path: path.join(root, 'test'), + watcher: { watch() {} }, + absolute(relative) { + return path.join(this.path, relative); + }, +}; + +// --- StaticCache (main process) --- + +test('StaticCache - should load files into SAB', async () => { + const appPath = path.join(root, 'test'); + const cache = new StaticCache(appPath, {}); + const entries = await cache.loadPlace('lib'); + assert.ok(entries.length > 0); + const entry = entries.find((e) => e.key.includes('add.js')); + assert.ok(entry); + assert.ok(entry.sab instanceof SharedArrayBuffer); + assert.strictEqual(entry.byteLength, entry.sab.byteLength); + assert.ok(entry.version > 0); + const data = Buffer.from(entry.sab, 0, entry.byteLength); + assert.ok(data.length > 0); +}); + +test('StaticCache - entries have correct structure', async () => { + const appPath = path.join(root, 'test'); + const cache = new StaticCache(appPath, {}); + const entries = await cache.loadPlace('lib'); + for (const entry of entries) { + assert.strictEqual(typeof entry.key, 'string'); + assert.ok(entry.key.startsWith('/')); + assert.strictEqual(typeof entry.byteLength, 'number'); + assert.strictEqual(typeof entry.size, 'number'); + assert.strictEqual(typeof entry.version, 'number'); + if (entry.sab) { + assert.ok(entry.sab instanceof SharedArrayBuffer); + } + } +}); + +test('StaticCache.getPlaceEntries - returns entries', async () => { + const appPath = path.join(root, 'test'); + const cache = new StaticCache(appPath, {}); + await cache.loadPlace('lib'); + const entries = StaticCache.getPlaceEntries(cache, 'lib'); + assert.ok(entries.length > 0); + const missing = StaticCache.getPlaceEntries(cache, 'none'); + assert.strictEqual(missing.length, 0); +}); + +// --- Static (worker side) --- + +test('Static initCache - populate from SAB entries', () => { + const cache = new Static('lib', application); + const content = Buffer.from('hello world'); + const sab = new SharedArrayBuffer(content.byteLength); + new Uint8Array(sab).set(content); + cache.initCache([ + { + key: '/index.html', + sab, + byteLength: content.byteLength, + size: content.byteLength, + version: 1, + }, + ]); + assert.strictEqual(cache.files.size, 1); + const file = cache.get('/index.html'); + assert.ok(file.data instanceof Buffer); + assert.strictEqual(file.data.length, content.byteLength); + assert.deepStrictEqual(file.data, content); + assert.ok(file.sab instanceof SharedArrayBuffer); +}); + +test('Static updateEntry - updates SAB entry', () => { + const cache = new Static('lib', application); + const sab1 = new SharedArrayBuffer(9); + new Uint8Array(sab1).set(Buffer.from('version 1')); + cache.initCache([ + { key: '/f.js', sab: sab1, byteLength: 9, size: 9, version: 1 }, + ]); + const content2 = Buffer.from('version 2 updated'); + const sab2 = new SharedArrayBuffer(content2.byteLength); + new Uint8Array(sab2).set(content2); + cache.updateEntry({ + key: '/f.js', + sab: sab2, + byteLength: content2.byteLength, + size: content2.byteLength, + version: 2, + }); + const file = cache.get('/f.js'); + assert.deepStrictEqual(file.data, content2); + assert.strictEqual(file.version, 2); +}); + +test('Static deleteEntry - removes entry by key', () => { + const cache = new Static('lib', application); + const sab = new SharedArrayBuffer(4); + new Uint8Array(sab).set([1, 2, 3, 4]); + cache.initCache([ + { key: '/a.js', sab, byteLength: 4, size: 4, version: 1 }, + { key: '/b.js', sab, byteLength: 4, size: 4, version: 2 }, + ]); + assert.strictEqual(cache.files.size, 2); + cache.deleteEntry('/a.js'); + assert.strictEqual(cache.files.size, 1); + assert.strictEqual(cache.get('/a.js'), undefined); + assert.ok(cache.get('/b.js')); +}); + +test('Static withData - null sab has null data', () => { + const cache = new Static('lib', application); + cache.initCache([ + { + key: '/big.bin', + sab: null, + byteLength: 0, + size: 20000000, + version: 1, + }, + ]); + const file = cache.get('/big.bin'); + assert.strictEqual(file.data, null); + assert.strictEqual(file.size, 20000000); +}); + +test('Static SAB data is zero-copy view', () => { + const sab = new SharedArrayBuffer(5); + new Uint8Array(sab).set([10, 20, 30, 40, 50]); + const cache = new Static('lib', application); + cache.initCache([{ key: '/f.bin', sab, byteLength: 5, size: 5, version: 1 }]); + const file = cache.get('/f.bin'); + assert.strictEqual(file.data.buffer, sab); +}); + +test('Static initCache clears previous entries', () => { + const cache = new Static('lib', application); + const sab = new SharedArrayBuffer(2); + cache.initCache([ + { key: '/old.js', sab, byteLength: 2, size: 2, version: 1 }, + ]); + assert.strictEqual(cache.files.size, 1); + cache.initCache([ + { key: '/new.js', sab, byteLength: 2, size: 2, version: 2 }, + ]); + assert.strictEqual(cache.files.size, 1); + assert.strictEqual(cache.get('/old.js'), undefined); + assert.ok(cache.get('/new.js')); +}); From 04bac0379900549d1cc20d69c2a21c88b969f310 Mon Sep 17 00:00:00 2001 From: Turone Date: Wed, 1 Apr 2026 14:36:31 +0300 Subject: [PATCH 4/9] optimize cache and static file handling after testing with SAB --- lib/cache.js | 27 ++++++--- lib/static.js | 119 +++++++++++++++++++++++----------------- schemas/config/cache.js | 1 + 3 files changed, 88 insertions(+), 59 deletions(-) diff --git a/lib/cache.js b/lib/cache.js index 52e5e1552..b6618f530 100644 --- a/lib/cache.js +++ b/lib/cache.js @@ -31,7 +31,7 @@ class StaticCache { const dirPath = node.path.join(this.appPath, name); await metarhia.metautil.ensureDirectory(dirPath); const files = new Map(); - this.places.set(name, { files, path: dirPath }); + this.places.set(name, { files, path: dirPath, entriesCache: null }); await this._loadDir(name, dirPath); return StaticCache.getPlaceEntries(this, name); } @@ -57,6 +57,7 @@ class StaticCache { const stat = await node.fsp.stat(filePath); const key = StaticCache.getKey(filePath, place.path); const ver = ++this.version; + place.entriesCache = null; if (stat.size > this.maxFileSize) { const entry = { key, @@ -66,7 +67,7 @@ class StaticCache { version: ver, }; place.files.set(key, entry); - return; + return entry; } const data = await node.fsp.readFile(filePath); const sab = new SharedArrayBuffer(data.byteLength); @@ -79,15 +80,20 @@ class StaticCache { version: ver, }; place.files.set(key, entry); + return entry; } catch { // File may have been removed between readdir and stat + return null; } } static getPlaceEntries(cache, name) { const place = cache.places.get(name); if (!place) return []; - return Array.from(place.files.values()); + if (place.entriesCache) return place.entriesCache; + const entries = Array.from(place.files.values()); + place.entriesCache = entries; + return entries; } startWatch(timeout, broadcast) { @@ -105,15 +111,17 @@ class StaticCache { if (err) return; if (stat.isDirectory()) { this.watcher.watch(filePath); + const before = new Set(this.places.get(name).files.keys()); await this._loadDir(name, filePath); - const entries = StaticCache.getPlaceEntries(this, name); - broadcast({ name: 'cache-init', place: name, entries }); + const place = this.places.get(name); + for (const [key, entry] of place.files) { + if (!before.has(key)) { + broadcast({ name: 'cache-update', place: name, entry }); + } + } return; } - await this._loadFile(name, filePath); - const place = this.places.get(name); - const key = StaticCache.getKey(filePath, place.path); - const entry = place.files.get(key); + const entry = await this._loadFile(name, filePath); if (entry) { broadcast({ name: 'cache-update', place: name, entry }); } @@ -127,6 +135,7 @@ class StaticCache { const place = this.places.get(name); if (place) { place.files.delete(key); + place.entriesCache = null; broadcast({ name: 'cache-delete', place: name, key }); } }); diff --git a/lib/static.js b/lib/static.js index ab6ccd18d..37d7e8808 100644 --- a/lib/static.js +++ b/lib/static.js @@ -45,6 +45,8 @@ class Static extends Place { this.ext = options.ext; this.maxFileSize = -1; this.streamThreshold = -1; + this.search = this.lookup; + this.errorPage = (code) => status(code); } get(key) { @@ -66,8 +68,23 @@ class Static extends Place { return entry; } + lookup(filePath) { + let file = this.files.get(filePath); + if (file) return { file, code: 200 }; + if (filePath.endsWith('/')) { + file = this.files.get(join(filePath, 'index.html')); + if (file) return { file, code: 200 }; + } + return null; + } + initCache(entries) { this._initThreshold(); + const cacheConfig = this.application.config?.cache; + if (cacheConfig?.virtualFS) { + this.search = this.find; + this.errorPage = (code, path) => this.find(path, code).file; + } this.files.clear(); for (const entry of entries) { this.files.set(entry.key, Static.withData(entry)); @@ -117,18 +134,18 @@ class Static extends Place { const fileName = `.${code}.html`; filePath = join(filePath, fileName); const file = this.get(filePath); - if (file) return { data: file.data, stat: null, code }; - if (root) return status(code); + if (file) return { file, code }; + if (root) return { file: status(code), code }; } else { const folder = path.endsWith('/'); if (folder && !parent) { filePath = join(path, 'index.html'); } let file = this.get(filePath); - if (file) return { ...file, code: 200 }; + if (file) return { file, code: 200 }; filePath = join(path, '.virtual.html'); file = this.get(filePath); - if (file) return { ...file, code: -1 }; + if (file) return { file, code: -1 }; if (root) return this.find(filePath, 404, true); } filePath = node.path.dirname(path); @@ -147,51 +164,58 @@ class Static extends Place { async serve(url, transport) { const [filePath] = metarhia.metautil.split(url, '?'); const fileExt = metarhia.metautil.fileExt(filePath); - let file = this.find(filePath); - - // SAB-backed cached file (shared memory from main process) - if (file.sab) { - const { sab, byteLength, code } = file; - if (code === -1) { - const data = Buffer.from(sab, 0, byteLength); - return void transport.write(data, 200, 'html'); - } - const { headers } = transport.req; - if (headers.range) { - const range = metarhia.metautil.parseRange(headers.range); - const { start, end = byteLength - 1 } = range; - if (start >= end || start >= byteLength || end >= byteLength) { - file = this.find(filePath, 416); - return void transport.write(file.data, 416, fileExt); + const result = this.search(filePath); + + if (result) { + const { file, code } = result; + + // SAB-backed cached file (shared memory from main process) + if (file.sab) { + const { sab, data, byteLength } = file; + if (code === -1) return void transport.write(data, 200, 'html'); + const { headers } = transport.req; + if (headers.range) { + const range = metarhia.metautil.parseRange(headers.range); + const { start, end = byteLength - 1 } = range; + if (start >= end || start >= byteLength || end >= byteLength) { + const err = this.errorPage(416, filePath); + return void transport.write(err.data, 416, fileExt); + } + if (byteLength > this.streamThreshold) { + const readable = createSABStream(sab, byteLength, { start, end }); + const options = { start, end, size: byteLength }; + return void transport.write(readable, 206, fileExt, options); + } + const slice = data.subarray(start, end + 1); + const options = { start, end, size: byteLength }; + return void transport.write(slice, 206, fileExt, options); } if (byteLength > this.streamThreshold) { - const readable = createSABStream(sab, byteLength, { start, end }); - const options = { start, end, size: byteLength }; - return void transport.write(readable, 206, fileExt, options); + const readable = createSABStream(sab, byteLength); + const options = { size: byteLength }; + return void transport.write(readable, code, fileExt, options); } - const data = Buffer.from(sab, start, end - start + 1); - const options = { start, end, size: byteLength }; - return void transport.write(data, 206, fileExt, options); + return void transport.write(data, code, fileExt); } - if (byteLength > this.streamThreshold) { - const readable = createSABStream(sab, byteLength); - const options = { size: byteLength }; - return void transport.write(readable, code, fileExt, options); + + // Legacy path: Buffer-backed cached file (used by cert or local cache) + if (file.data && file.stat) { + if (code === -1) return void transport.write(file.data, 200, 'html'); + return void transport.write(file.data, code, fileExt); } - const data = Buffer.from(sab, 0, byteLength); - return void transport.write(data, code, fileExt); - } - // Legacy path: Buffer-backed cached file (used by cert or local cache) - if (file.data && file.stat) { - if (file.code === -1) return void transport.write(file.data, 200, 'html'); - return void transport.write(file.data, file.code, fileExt); + // Status page or virtual file fallback (from find/virtualFS) + if (file.data) { + const ext = code === -1 ? 'html' : fileExt; + const statusCode = code === -1 ? 200 : code || 404; + return void transport.write(file.data, statusCode, ext); + } } - // Uncached large file: stream from disk + // Uncached or oversized file: stream from disk const absPath = join(this.path, url); - if (file.size > 0 || (file.stat && absPath.startsWith(this.path))) { - let size = file.size || file.stat?.size; + if (absPath.startsWith(this.path)) { + let size = result?.file?.size || result?.file?.stat?.size || 0; if (!size) { const stat = await node.fsp.stat(absPath).catch(() => null); if (stat && stat.isFile()) size = stat.size; @@ -204,8 +228,8 @@ class Static extends Place { const range = metarhia.metautil.parseRange(headers.range); const { start, end = size - 1 } = range; if (start >= end || start >= size || end >= size) { - file = this.find(filePath, 416); - return void transport.write(file.data, 416, fileExt); + const err = this.errorPage(416, filePath); + return void transport.write(err.data, 416, fileExt); } options.start = start; options.end = end; @@ -216,14 +240,9 @@ class Static extends Place { } } - // Status page or virtual file fallback - if (file.data) { - const code = file.code === -1 ? 200 : file.code || 404; - const ext = file.code === -1 ? 'html' : fileExt; - return void transport.write(file.data, code, ext); - } - const errFile = this.find(filePath, 404); - return void transport.write(errFile.data, 404); + // 404 + const err = this.errorPage(404, filePath); + return void transport.write(err.data, 404); } } diff --git a/schemas/config/cache.js b/schemas/config/cache.js index b1ce22a16..2275b0aa0 100644 --- a/schemas/config/cache.js +++ b/schemas/config/cache.js @@ -2,5 +2,6 @@ size: 'size', maxFileSize: 'size', streamThreshold: { type: 'size', required: false }, + virtualFS: { type: 'boolean', required: false }, avoid: { array: 'string', required: false }, }); From c254183ed3e4dbae4d4bd1d07c97e5ff06536c48 Mon Sep 17 00:00:00 2001 From: Turone Date: Fri, 3 Apr 2026 17:58:33 +0300 Subject: [PATCH 5/9] Slight code optimizations --- lib/cache.js | 7 ++----- lib/static.js | 19 ++++++++----------- test/cache-shared.js | 24 ++++++------------------ 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/lib/cache.js b/lib/cache.js index b6618f530..9acb7cea2 100644 --- a/lib/cache.js +++ b/lib/cache.js @@ -11,7 +11,6 @@ class StaticCache { this.appPath = appPath; this.places = new Map(); this.directories = []; - this.version = 0; this.watcher = null; const cacheConfig = config?.cache || {}; const { sizeToBytes } = metarhia.metautil; @@ -56,15 +55,12 @@ class StaticCache { try { const stat = await node.fsp.stat(filePath); const key = StaticCache.getKey(filePath, place.path); - const ver = ++this.version; - place.entriesCache = null; if (stat.size > this.maxFileSize) { const entry = { key, sab: null, byteLength: 0, size: stat.size, - version: ver, }; place.files.set(key, entry); return entry; @@ -77,7 +73,6 @@ class StaticCache { sab, byteLength: data.byteLength, size: stat.size, - version: ver, }; place.files.set(key, entry); return entry; @@ -114,6 +109,7 @@ class StaticCache { const before = new Set(this.places.get(name).files.keys()); await this._loadDir(name, filePath); const place = this.places.get(name); + place.entriesCache = null; for (const [key, entry] of place.files) { if (!before.has(key)) { broadcast({ name: 'cache-update', place: name, entry }); @@ -123,6 +119,7 @@ class StaticCache { } const entry = await this._loadFile(name, filePath); if (entry) { + this.places.get(name).entriesCache = null; broadcast({ name: 'cache-update', place: name, entry }); } }); diff --git a/lib/static.js b/lib/static.js index 37d7e8808..7a42bbe0c 100644 --- a/lib/static.js +++ b/lib/static.js @@ -15,17 +15,17 @@ const STATUS_CACHE = new Map(); const status = (code) => { let file = STATUS_CACHE.get(code); if (file) return file; - const status = node.http.STATUS_CODES[code] || 'Unknown error'; + const statusText = node.http.STATUS_CODES[code] || 'Unknown error'; const data = Buffer.from(` -${code} ${status} -

${code} ${status}

`); +${code} ${statusText} +

${code} ${statusText}

`); file = { data, stat: null, code }; STATUS_CACHE.set(code, file); return file; }; const createSABStream = (sab, byteLength, options = {}) => { - const start = options.start || 0; + const start = options.start ?? 0; const end = options.end ?? byteLength - 1; let offset = start; return new Readable({ @@ -213,14 +213,11 @@ class Static extends Place { } // Uncached or oversized file: stream from disk - const absPath = join(this.path, url); + const absPath = join(this.path, filePath); if (absPath.startsWith(this.path)) { - let size = result?.file?.size || result?.file?.stat?.size || 0; - if (!size) { - const stat = await node.fsp.stat(absPath).catch(() => null); - if (stat && stat.isFile()) size = stat.size; - } - if (size) { + const stat = await node.fsp.stat(absPath).catch(() => null); + if (stat?.isFile()) { + const { size } = stat; const options = { size }; let code = 200; const { headers } = transport.req; diff --git a/test/cache-shared.js b/test/cache-shared.js index 3fcea0185..5478c1a2d 100644 --- a/test/cache-shared.js +++ b/test/cache-shared.js @@ -27,7 +27,6 @@ test('StaticCache - should load files into SAB', async () => { assert.ok(entry); assert.ok(entry.sab instanceof SharedArrayBuffer); assert.strictEqual(entry.byteLength, entry.sab.byteLength); - assert.ok(entry.version > 0); const data = Buffer.from(entry.sab, 0, entry.byteLength); assert.ok(data.length > 0); }); @@ -41,7 +40,6 @@ test('StaticCache - entries have correct structure', async () => { assert.ok(entry.key.startsWith('/')); assert.strictEqual(typeof entry.byteLength, 'number'); assert.strictEqual(typeof entry.size, 'number'); - assert.strictEqual(typeof entry.version, 'number'); if (entry.sab) { assert.ok(entry.sab instanceof SharedArrayBuffer); } @@ -71,7 +69,6 @@ test('Static initCache - populate from SAB entries', () => { sab, byteLength: content.byteLength, size: content.byteLength, - version: 1, }, ]); assert.strictEqual(cache.files.size, 1); @@ -86,9 +83,7 @@ test('Static updateEntry - updates SAB entry', () => { const cache = new Static('lib', application); const sab1 = new SharedArrayBuffer(9); new Uint8Array(sab1).set(Buffer.from('version 1')); - cache.initCache([ - { key: '/f.js', sab: sab1, byteLength: 9, size: 9, version: 1 }, - ]); + cache.initCache([{ key: '/f.js', sab: sab1, byteLength: 9, size: 9 }]); const content2 = Buffer.from('version 2 updated'); const sab2 = new SharedArrayBuffer(content2.byteLength); new Uint8Array(sab2).set(content2); @@ -97,11 +92,9 @@ test('Static updateEntry - updates SAB entry', () => { sab: sab2, byteLength: content2.byteLength, size: content2.byteLength, - version: 2, }); const file = cache.get('/f.js'); assert.deepStrictEqual(file.data, content2); - assert.strictEqual(file.version, 2); }); test('Static deleteEntry - removes entry by key', () => { @@ -109,8 +102,8 @@ test('Static deleteEntry - removes entry by key', () => { const sab = new SharedArrayBuffer(4); new Uint8Array(sab).set([1, 2, 3, 4]); cache.initCache([ - { key: '/a.js', sab, byteLength: 4, size: 4, version: 1 }, - { key: '/b.js', sab, byteLength: 4, size: 4, version: 2 }, + { key: '/a.js', sab, byteLength: 4, size: 4 }, + { key: '/b.js', sab, byteLength: 4, size: 4 }, ]); assert.strictEqual(cache.files.size, 2); cache.deleteEntry('/a.js'); @@ -127,7 +120,6 @@ test('Static withData - null sab has null data', () => { sab: null, byteLength: 0, size: 20000000, - version: 1, }, ]); const file = cache.get('/big.bin'); @@ -139,7 +131,7 @@ test('Static SAB data is zero-copy view', () => { const sab = new SharedArrayBuffer(5); new Uint8Array(sab).set([10, 20, 30, 40, 50]); const cache = new Static('lib', application); - cache.initCache([{ key: '/f.bin', sab, byteLength: 5, size: 5, version: 1 }]); + cache.initCache([{ key: '/f.bin', sab, byteLength: 5, size: 5 }]); const file = cache.get('/f.bin'); assert.strictEqual(file.data.buffer, sab); }); @@ -147,13 +139,9 @@ test('Static SAB data is zero-copy view', () => { test('Static initCache clears previous entries', () => { const cache = new Static('lib', application); const sab = new SharedArrayBuffer(2); - cache.initCache([ - { key: '/old.js', sab, byteLength: 2, size: 2, version: 1 }, - ]); + cache.initCache([{ key: '/old.js', sab, byteLength: 2, size: 2 }]); assert.strictEqual(cache.files.size, 1); - cache.initCache([ - { key: '/new.js', sab, byteLength: 2, size: 2, version: 2 }, - ]); + cache.initCache([{ key: '/new.js', sab, byteLength: 2, size: 2 }]); assert.strictEqual(cache.files.size, 1); assert.strictEqual(cache.get('/old.js'), undefined); assert.ok(cache.get('/new.js')); From a6fa9b60f9aba66e58ed2b93b70534b2fae340c9 Mon Sep 17 00:00:00 2001 From: Turone Date: Tue, 21 Apr 2026 16:33:27 +0300 Subject: [PATCH 6/9] fix: Add Copilot instructions and sync script --- .github/copilot-instructions.md | 57 +++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 .github/copilot-instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 000000000..95a2e2945 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,57 @@ +GitHub Copilot Instructions — Impress + +These instructions define the project-wide baseline for work in this repository. +Branch- or subsystem-specific architecture belongs in `.github/instructions/*.instructions.md`. +When working in an area covered by one of those files, treat the matching +instruction file as the implementation-detail source of truth. + +PROJECT CONTEXT +This repository is metarhia/impress, a high-performance server runtime using: + +- worker_threads concurrency +- hot-reload and filesystem watch +- strict backward-compatibility requirements across the Metarhia ecosystem + +GENERAL RULES + +- Do not break existing public APIs or user-visible behavior without a clear reason. +- Preserve backward compatibility unless the change explicitly requires otherwise. +- Keep code, tests, and documentation in sync. +- Prefer minimal changes that preserve the current external behavior. +- Keep module boundaries intact; do not introduce unnecessary coupling. + +FILE OWNERSHIP + +- `.github/copilot-instructions.md` contains only repository-wide, branch-agnostic rules. +- `AGENTS.md` contains workflow/process rules for agents. +- `.github/instructions/*.instructions.md` contains branch- or subsystem-specific implementation details. + +BRANCH-AWARE INSTRUCTION SELECTION + +- Instruction files in `.github/instructions/` may include a `branch` field in their YAML frontmatter. +- A file with `branch: X` applies ONLY when the current Git branch is `X`. +- When multiple instruction files match an edited file's path, determine the current Git branch and follow ONLY the instruction file whose `branch` value matches. Ignore all non-matching branch-scoped files. +- Instruction files WITHOUT a `branch` field are general and apply to all branches. +- If no instruction file matches the current branch, do not invent constraints from non-matching files. + +WORKING RULES FOR INSTRUCTION FILES + +- Before changing files matched by an instruction file's `applyTo`, read that instruction file. +- Multiple instruction files may coexist for different modules, subsystems, or branches; keep each focused and scoped. +- If a branch-specific instruction file and the code diverge, update the instruction file to match the code in the current branch. + +SYNCING BRANCH-SPECIFIC INSTRUCTIONS + +- Branch-specific instruction files are stored on the `CopilotInstructions` branch and synced to feature branches as untracked files. +- They MUST NOT be committed on feature branches or included in pull requests. +- To sync, run: `.github/scripts/sync-instructions.ps1` (PowerShell) or the equivalent shell commands. +- First-time bootstrap (when the script is not yet present locally): + `git fetch origin CopilotInstructions; git checkout origin/CopilotInstructions -- .github/instructions/ .github/scripts/; git reset HEAD -- .github/instructions/ .github/scripts/` + +TESTING + +- Validate changes with the existing test suite when behavior may be affected. +- Update or add tests when behavior, integration, or configuration semantics change. + +FINAL RULE +This file defines only the baseline constraints for Impress. Do not place branch- or subsystem-specific architecture here; keep that in `.github/instructions/*.instructions.md`. From 25413cb4d62f028853df99d4f361351ecd50b1d4 Mon Sep 17 00:00:00 2001 From: Turone Date: Thu, 23 Apr 2026 17:52:42 +0300 Subject: [PATCH 7/9] feat: replace per-worker file cache with SharedArrayBuffer shared cache --- VFS-SAB.md | 507 +++++++++++++++++++++++++++++++++++ impress.js | 52 ++-- lib/application.js | 33 ++- lib/cache.js | 156 ----------- lib/cache/LimitCache.js | 383 ++++++++++++++++++++++++++ lib/cache/PerFileCache.js | 108 ++++++++ lib/cache/PlacementSource.js | 40 +++ lib/cache/SharedCache.js | 373 ++++++++++++++++++++++++++ lib/cert.js | 49 +++- lib/static.js | 213 ++++++--------- lib/worker.js | 54 +++- schemas/config/cache.js | 17 ++ test/cache-shared.js | 278 ++++++++++++------- test/static.js | 222 +++++++++++++-- 14 files changed, 2042 insertions(+), 443 deletions(-) create mode 100644 VFS-SAB.md delete mode 100644 lib/cache.js create mode 100644 lib/cache/LimitCache.js create mode 100644 lib/cache/PerFileCache.js create mode 100644 lib/cache/PlacementSource.js create mode 100644 lib/cache/SharedCache.js diff --git a/VFS-SAB.md b/VFS-SAB.md new file mode 100644 index 000000000..cd2b9461d --- /dev/null +++ b/VFS-SAB.md @@ -0,0 +1,507 @@ +# SharedArrayBuffer Cache in Impress + +## Motivation + +Impress uses `worker_threads` to handle HTTP requests. Each worker serves static files (HTML, CSS, JS, images, etc.). Without shared memory, every worker keeps its own copy of every file — with 8 workers and 100 MiB of static assets, total consumption reaches 800 MiB. SharedArrayBuffer stores all files in shared memory accessible to all threads. + +## Benchmarks + +Baseline: no SAB (per-worker file copies). Delta columns show improvement relative to baseline — positive values are better even for latency and memory metrics. + +| File | Metric | Before (no SAB) | SAB limit mode | Delta limit | SAB per-file mode | Delta per-file | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| bench-64k.bin | RPS | 1767.14 | 1663.80 | -5.85% | 1711.10 | -3.17% | +| bench-64k.bin | Throughput MB/s | 111.16 | 104.65 | -5.86% | 107.66 | -3.15% | +| bench-64k.bin | p95 ms | 330.00 | 321.00 | +2.73% | 62.00 | +81.21% | +| bench-256k.bin | RPS | 507.04 | 507.21 | +0.03% | 494.04 | -2.56% | +| bench-256k.bin | Throughput MB/s | 126.97 | 127.02 | +0.04% | 123.73 | -2.55% | +| bench-256k.bin | p95 ms | 752.00 | 770.00 | -2.39% | 254.00 | +66.22% | +| bench-1m.bin | RPS | 127.80 | 128.71 | +0.71% | 167.16 | +30.80% | +| bench-1m.bin | Throughput MB/s | 127.84 | 128.79 | +0.74% | 167.21 | +30.80% | +| bench-1m.bin | p95 ms | 1976.00 | 2047.00 | -3.59% | 1017.00 | +48.53% | +| bench-5m.bin | RPS | 83.12 | 113.72 | +36.81% | 100.58 | +21.01% | +| bench-5m.bin | Throughput MB/s | 415.72 | 568.79 | +36.82% | 503.00 | +20.99% | +| bench-5m.bin | p95 ms | 23172.00 | 13300.00 | +42.60% | 5187.00 | +77.62% | +| bench-10m.bin | RPS | 79.60 | 93.40 | +17.34% | 99.75 | +25.31% | +| bench-10m.bin | Throughput MB/s | 759.08 | 891.02 | +17.38% | 951.38 | +25.33% | +| bench-10m.bin | p95 ms | 25680.00 | 23264.00 | +9.41% | 9827.00 | +61.73% | + +| System metric | Before (no SAB) | SAB limit mode | Delta limit | SAB per-file mode | Delta per-file | +| --- | ---: | ---: | ---: | ---: | ---: | +| CPU max % | 22.91 | 21.82 | +4.76% | 21.03 | +8.21% | +| Working set max MB | 5066.02 | 1207.50 | +76.16% | 1256.71 | +75.19% | +| Private max MB | 5110.43 | 1360.43 | +73.38% | 1297.94 | +74.60% | + +## Architecture + +The system is split into four modules behind one orchestrator: + +| Module | Location | Purpose | +|--------|----------|---------| +| **SharedCache** | `lib/cache/SharedCache.js` | Orchestration: mode selection, configuration, watcher, ACK tracking, compaction dispatch | +| **LimitCache** | `lib/cache/LimitCache.js` | Limit-mode backend: slab allocator with pooled SAB segments, extent-based allocation, compaction | +| **PerFileCache** | `lib/cache/PerLimitCache.js` | Per-file-mode backend: one SAB per file, no pooling, no ACK | +| **PlacementSource** | `lib/cache/PlacementSource.js` | Filesystem scanner, returns `{ stat, path }` per file | + +`SharedCache` selects the backend once at construction time based on `config.cache.mode` and delegates all allocation, snapshot, projection, free, and compact operations to it. + +`LimitCache` has no dependencies on Node.js built-ins or Impress internals. This allows it to be used in a browser or in tests without mocks. + +## Dual-mode design + +The cache supports two backends, selectable via `config.cache.mode`: + +| | **Limit mode** (default) | **Per-file mode** | +|---|---|---| +| Backend class | `LimitCache` | `PerFileCache` | +| SAB strategy | Pooled segments (slab allocator) | One SAB per file | +| ACK required | Yes | No | +| Compaction | Yes — defragments segments | No — `compact()` returns `null` | +| Free | Deferred until all workers ACK | Immediate (GC when references drop) | +| Epoch flush | `#flushEpochWithAck` (tracks `updateId`) | `#flushEpochFireAndForget` (no `updateId`) | +| Snapshot segments | `[{ id, sab }, ...]` | `null` | +| Shared entry shape | `{ kind: 'shared', segmentId, offset, length, stat }` | `{ kind: 'shared', sab, length, stat }` | + +Worker-side code detects the mode by the presence of `sharedCache.segments` in the snapshot. + +`needsAck` is not a property of the backends; `SharedCache` derives it directly from `mode === 'limit'`. + +--- + +## Limit mode: Slab Allocator + +The memory management model follows the Linux SLUB allocator principle: SharedArrayBuffer segments are **never returned to the OS**. Instead, they go through a lifecycle: + +``` +-----------¬ files deleted -----------¬ memory needed -----------¬ +¦ Active ¦ ------------------> ¦ Clean ¦ ------------------> ¦ Active ¦ +¦ (data) ¦ ¦ (empty) ¦ ¦ (data) ¦ +L----------- L----------- L----------- +``` + +**Why SABs are never freed:** V8 cannot reduce reserved virtual memory after a SharedArrayBuffer is deallocated. Recreating a SAB of the same size still allocates a new page. Retaining empty segments (`cleanSegmentIds`) and reusing them completely eliminates allocation system calls. + +### Internal classes in LimitCache.js + +#### Pool + +Manages the memory budget and the set of SAB segments. + +``` +Pool ++-- segments: Map — all segments ++-- cleanSegmentIds: Set — empty, ready for reuse ++-- limit: number — total budget (default 1 GiB) ++-- baseSegmentSize: number — segment size (default 64 MiB) ++-- totalUsed: number — total size of all SABs +¦ ++-- createBaseSegment() — takes a clean segment or creates a new SAB ++-- retireSegment(id) — marks an empty segment as clean +L-- getSegment(id) — access by ID +``` + +Key detail: `baseSegmentSize = Math.ceil(maxFileSize / configured) * configured`. The segment size is rounded up to the nearest multiple of the configured value that can fit `maxFileSize`. For example, with `configured = 64 MiB` and `maxFileSize = 100 MiB`, the segment size becomes `128 MiB` (2 ? 64). When `maxFileSize` is smaller than `configured` (the typical case, e.g. `10 MB` and `64 MiB`), the segment size stays at `configured`. The `limit` must be evenly divisible by the effective segment size — otherwise the remainder is wasted. There are no dedicated segments — a single segment type serves all files. + +#### Registry + +Extent-based allocator within base segments. Each segment is tracked via: + +- **`tails: Map`** — boundary of written data (high water mark) +- **`freeExtents: Map>`** — freed regions + +`allocate(size, noCreate=false)` algorithm: + +``` +1. Best-fit search across free extents of all segments + > found a match > return { segmentId, offset } + > exact size match > remove the extent + > larger than needed > shrink the extent + +2. Tail-append: find a segment where tail + size ? baseSegmentSize + > found > advance tail, return + +3. New segment (if !noCreate): + > pool.createBaseSegment() > registerSegment > tail = size + > budget exhausted > return null (file becomes a disk entry) +``` + +With `noCreate=true` (used by compact), step 3 is skipped — data is only moved into existing segments. + +`free()` inserts an extent into a sorted list and merges adjacent ones: + +``` +[100..200] + [200..300] > [100..300] // merge right +[0..100] + [100..300] > [0..300] // merge left +``` + +### Entry types (limit mode) + +**Shared entry** — file in SAB: +```js +{ kind: 'shared', segmentId, offset, length, stat } +``` +Zero-byte files are shared entries with `segmentId: 0, offset: 0, length: 0` — no segment is allocated. + +**Disk entry** — file on disk (size > maxFileSize, or budget exhausted): +```js +{ kind: 'disk', path, stat, data: null } +``` + +--- + +## Per-file mode + +Each file with `size <= maxFileSize` gets its own `SharedArrayBuffer`. No pool, no allocator metadata, no compaction, no ACK flow. + +### Entry types (per-file mode) + +**Shared entry** — file in its own SAB: +```js +{ kind: 'shared', sab, length, stat } +``` +Zero-byte files use `new SharedArrayBuffer(0)` — a valid zero-length SAB. + +**Disk entry** — same as limit mode: +```js +{ kind: 'disk', path, stat, data: null } +``` + +`free()` and `compact()` are no-ops. Snapshot returns `{ segments: null, indexes: {...} }`. + +--- + +## File loading + +### Initial load (main thread) + +``` +SharedCache.initialize() + > for each placement: + source.load() // PlacementSource scans the directory + cache.load(name, files) // Backend distributes files across SABs +``` + +If shared cache initialization fails (configuration, filesystem, or reader error), application startup is aborted — there is no fallback to per-worker static loading. Empty placements are valid: initialization succeeds with an empty index and zero allocated segments. + +In limit mode, `load()` sorts files by descending size — large files are placed first, reducing fragmentation. For each file, `#allocateEntry()` is called: + +1. `size > maxFileSize` > disk entry +2. No data and no reader > disk entry +3. `size === 0` > shared entry with `segmentId: 0, offset: 0, length: 0` (no segment allocated) +4. `registry.allocate(size)` > obtains `{ segmentId, offset }` — free space in a segment +5. `reader(path, sab, offset, size)` > reads the file from disk directly into SAB, bypassing the heap; if `data` is already in memory — copies via `Uint8Array.set(data)` + +In per-file mode, `#allocateEntry()` creates a new `SharedArrayBuffer(size)` for each file (or `SharedArrayBuffer(0)` for zero-byte files) and reads the content into it directly. + +The reader is injected when SharedCache is created — it is `async (path, sab, offset, size) => void`. In Node.js it is implemented via `fh.read(Buffer.from(sab, offset, size))` — a Buffer view is created over the SharedArrayBuffer region, and `fs` writes data directly there. + +### Delivery to workers + +``` +workerData.sharedCache = cache.snapshot() +``` + +Limit mode snapshot: +```js +{ segments: [{ id, sab }, ...], indexes: { placement: { entries: [...] } } } +``` + +Per-file mode snapshot: +```js +{ segments: null, indexes: { placement: { entries: [...] } } } +``` + +SharedArrayBuffer is passed via `workerData` — V8 transfers only a reference, no data copying occurs. + +## Worker-side projection + +The worker detects the backend mode by the presence of `sharedCache.segments`: + +```js +// worker.js — module scope +if (sharedCache && sharedCache.segments) { + // Limit mode: build segmentsMap, use LimitCache.projectEntry + const { LimitCache } = require('./cache/LimitCache.js'); + for (const seg of sharedCache.segments) segmentsMap.set(seg.id, seg.sab); + projectEntry = (entry) => LimitCache.projectEntry(entry, segmentsMap); +} else if (sharedCache) { + // Per-file mode: use PerFileCache.projectEntry + const { PerFileCache } = require('./cache/PerLimitCache.js'); + projectEntry = (entry) => PerFileCache.projectEntry(entry); +} +``` + +ACK function is also mode-dependent: + +```js +const sendAck = sharedCache && sharedCache.segments + ? (updateId) => parentPort.postMessage({ name: 'ack-update', updateId }) + : () => {}; // no-op in per-file mode +``` + +### Limit mode projection + +Each shared entry is projected into an object with an eager Buffer view: + +```js +{ data: Buffer.from(segmentsMap.get(segmentId), offset, length), stat } +``` + +Zero-byte entries (`length === 0`) are projected as `{ data: Buffer.alloc(0), stat }` without consulting `segmentsMap`. `free()` also skips zero-byte entries — they hold no segment allocation. + +`Buffer.from(sab, offset, length)` creates a lightweight view (~64 bytes descriptor) over the SAB region — no data copy. The view is created once at projection time. Since segments are never freed (slab retention), SAB references in `segmentsMap` live for the entire process lifetime. When a file is removed via `deleteFiles`, the projected object loses its last reference and is GC'd along with the Buffer view. Stale data in the segment is overwritten upon reuse. + +### Per-file mode projection + +Each shared entry is projected similarly, but the SAB is stored directly on the entry: + +```js +{ data: Buffer.from(entry.sab, 0, length), stat } +``` + +No `segmentsMap` is needed — each entry carries its own SAB reference. + +### Common + +Disk entries in both modes are projected as `{ data: null, stat, path }`. + +## Hot-reload: epoch-based delta updates + +metawatch debounces filesystem events, collecting them into a batch during a quiet period. SharedCache uses **epoch coalescing** on top of this: all changes and deletions in a single metawatch batch are collected into one epoch, then flushed as minimal broadcasts. + +Routing from a filesystem event to a placement is done by the first path segment relative to application root, not by absolute-path prefix matching. This avoids collisions such as `static` vs `static2`. + +``` +metawatch SharedCache + ¦ ¦ + +-- debounce fs.watch events ¦ + +-- 'before' ------------------> epoch = { updates, deletes, oldEntries } + +-- 'change' file1 -----------> push processChange() promise + +-- 'change' file2 -----------> push processChange() promise + +-- 'delete' file3 -----------> processDelete() (sync) + +-- 'after' -------------------> Promise.all > flushEpoch() +``` + +`#flushEpoch` is set once during `watch()` — it is `#flushEpochWithAck` in limit mode and `#flushEpochFireAndForget` in per-file mode. + +### Limit mode flush + +Sends at most `2 ? placements` messages (one `file-update` and one `file-delete` per placement), each carrying an `updateId`. All old entries are tracked against the **last** `updateId` — since `worker_threads` guarantees FIFO ordering, an ACK for the last message implies all prior messages have been processed. + +``` +flushEpochWithAck: + 1 file-update per placement (entries + newSegments + updateId) + 1 file-delete per placement (keys + updateId) + trackUpdate(lastUpdateId, all old entries) +``` + +1000 file changes > 1 broadcast with 1000 entries > N workers receive 1 message > N ACKs > 1 free cycle. + +### Per-file mode flush + +Sends the same `file-update` and `file-delete` messages but without `updateId` or `newSegments`. Old entries from the epoch are not tracked — memory is freed by GC when old SABs lose their last reference. + +``` +flushEpochFireAndForget: + 1 file-update per placement (entries only) + 1 file-delete per placement (keys only) +``` + +### ACK protocol (limit mode only) + +Old entries are not freed immediately — a worker may be reading data at the moment of an update. Protocol: + +``` +Main thread Workers + ¦ ¦ + +-- file-update (updateId=5) -----> ¦ + +-- file-delete (updateId=6) -----> ¦ + ¦ +-- apply update, ack 5 + ¦ <-------------------- ack 5 (ignored, not tracked) + ¦ +-- apply delete, ack 6 + ¦ <-------------------- ack 6 -----+ + ¦ ... all workers acked 6 ... ¦ + +-- free(all oldEntries) ¦ + +-- tryCompact() ¦ + L------------------------------------- +``` + +If a worker crashes, the `worker.exit` event triggers `sharedCache.handleWorkerExit(id)`, which immediately removes the worker from all pending ACK sets. If it was the last expected worker, `free` is called right away. The new worker is restarted and receives a fresh `snapshot()`. + +There is no timeout-based forced free — a live worker will always eventually process its message queue and send an ACK. Forced free of a slow-but-alive worker would risk data corruption: the freed extent could be reused by another file while the worker's Buffer view still points to it. + +## Compaction (limit mode only) + +After entries are freed, `compact(threshold=0.3)` is called: + +1. Finds the base segment with the lowest utilization below `threshold` +2. Requires at least 2 base segments (a single segment has nowhere to compact to) +3. Attempts to move all files from the target segment into others (via `allocate(size, noCreate=true)`) +4. On success — updates indexes, groups moved files by placement, sends one `file-update` per affected placement, and tracks all `oldEntries` against the **last** `updateId` of the compaction batch +5. On failure — full rollback: restores extents and tail of the target segment + +Compaction uses the same batch-first ACK rule as epoch flush: workers may receive several `file-update` messages from one compaction, but memory is released only after the ACK for the last message in that batch. + +After compaction, the emptied segment automatically enters `cleanSegmentIds` through the normal `free > retireSegment` cycle. + +``` +Before compaction: + +Segment 1: [fileA][____][fileB][________] utilization 20% +Segment 2: [fileC][fileD][______________] utilization 60% + +After: + +Segment 1: > clean (empty, ready for reuse) +Segment 2: [fileC][fileD][fileA][fileB][_] utilization 80% +``` + +## Serving (lib/static.js) + +`Static` is the worker-side serving layer. Each shared-cache placement creates a `Static` instance that holds projected `files` and handles HTTP responses. + +### Initialization + +`initServing(config)` is called after projection. It reads two config options: + +- **`streamThreshold`** — file size above which responses are streamed rather than written as a single buffer. Default: `'1 mb'`. Accepts any size unit (`sizeToBytes`). +- **`virtualFS`** — enables recursive virtual filesystem resolution. Default: `false`. + +When `virtualFS` is **off** (default): +- `search` = `lookup()` — exact match + `index.html` for directory paths +- `errorPage` — generates a minimal HTML page (`

404 Not Found

`) + +When `virtualFS` is **on**: +- `search` = `find()` — walks up the directory tree looking for `index.html`, `.virtual.html`, `.{code}.html` +- `errorPage` — searches for custom error pages (`.404.html`, `.416.html`) in the file tree + +### Serve flow + +``` +serve(url, transport) + ¦ + +- 1. Fast exact-hit (file has data + stat, not internal) + ¦ +-- Range request? > validate > stream or subarray > 206 + ¦ +-- size > streamThreshold? > createSABStream() > 200 + ¦ L-- small file > transport.write(data) > 200 + ¦ + +- 2. Recursive search via lookup() or find() + ¦ +-- file with data + stat (e.g. index.html via directory path) > write directly + ¦ L-- file with data only (status/virtual pages) > write directly + ¦ Range and streaming intentionally omitted — always small HTML files + ¦ + +- 3. Disk fallback (uncached or oversized) + ¦ +-- Range request? > validate > fs.createReadStream(options) > 206 + ¦ L-- fs.createReadStream() > 200 + ¦ + L- 4. 404 +``` + +### SAB streaming + +When a cached file exceeds `streamThreshold`, it is sent via `createSABStream()` which reads 64 KiB chunks from the SAB Buffer view. This applies to both full responses and range requests. Files below the threshold are written as a single buffer (or `subarray` for ranges). + +### Range requests + +Supported in the exact-hit and disk fallback paths: +- Valid range > 206 Partial Content (stream or subarray depending on size vs threshold) +- Invalid range (`start >= end`, `start >= size`, `end >= size`) > 416 Range Not Satisfiable + +Range requests reaching the recursive search path (step 2) are ignored by design — those paths resolve only small HTML files that are always served in full. + +### Disk fallback + +Files with `data: null` (oversized or budget-exhausted) are served from disk via `fs.createReadStream()`, with range support via `start`/`end` options. + +## Configuration + +```js +// config/cache.js +({ + mode: 'limit', // 'limit' (default) or any other value for per-file mode + maxFileSize: '10 mb', // files larger than this > disk entry + streamThreshold: '1 mb', // files larger than this > streamed in chunks (default '1 mb') + virtualFS: false, // enable recursive virtual FS resolution (default false) + placements: [ + { name: 'static' }, + { name: 'resources' }, + { name: 'assets', ext: ['.png', '.jpg', '.woff2'] }, + ], + // Limit mode options (ignored in per-file mode): + sab: { + limit: '1 gib', // total SAB budget (must be divisible by segment size) + baseSegmentSize: '64 mib', // single segment size (must be ? maxFileSize) + }, +}); +``` + +All size values support both binary (KiB, MiB, GiB) and decimal (KB, MB, GB) units. + +The entire `cache` section is optional — when absent, all defaults apply (`mode: 'limit'`, `maxFileSize: '10 mb'`, `streamThreshold: '1 mb'`, `virtualFS: false`, `sab.limit: '1 gib'`, `sab.baseSegmentSize: '64 mib'`, placements: `static` + `resources`). + +**Important (limit mode):** `limit` must be evenly divisible by the effective `baseSegmentSize`. Otherwise the remainder is wasted — Pool cannot create a segment smaller than `baseSegmentSize`. The effective segment size is `Math.ceil(maxFileSize / configured) * configured`. + +## Safety invariants + +**Common (both modes):** +- Workers **never write** to SharedArrayBuffer +- All worker Buffer views are zero-copy descriptors over shared memory + +**Limit mode:** +- Old memory is freed **only after ACK** from all workers or worker exit +- SAB references in worker `segmentsMap` live for the entire process lifetime (slab retention) +- All worker Buffer views reference SABs from a **single** `segmentsMap` (not a copy) +- SAB segments are **never returned to the OS** — only reused +- Total memory usage is always ? `limit` + +**Per-file mode:** +- Each SAB is independent — no cross-file memory sharing +- Old SABs are freed by GC when all workers drop their references +- No memory budget enforcement — total usage scales with total file size + +## Patterns and influences + +The cache design draws on several well-known systems patterns: + +- **SLUB slab allocator** (Linux kernel) — segments are never returned to the OS; empty segments are marked clean and reused, eliminating allocation system calls (limit mode) +- **Extent-based allocation** (ext4, XFS) — free space tracked as `{ offset, length }` extents with best-fit search and adjacent merge on free (limit mode) +- **Event coalescing / group commit** (PostgreSQL WAL, Nagle's algorithm) — metawatch debounces fs events into batches, SharedCache coalesces each batch into minimal broadcasts via epoch flush (both modes) +- **Copy-on-write update** (MVCC) — file updates allocate a new extent, old data lives until all workers ACK; readers never see partial writes (limit mode) +- **Dependency injection** — LimitCache accepts an injectable `reader` function, keeping it free of Node.js built-in dependencies for cross-platform use + +## Data flow diagram + +``` + -----------------------------------¬ + ¦ Main Thread ¦ + ¦ ¦ + ¦ SharedCache (orchestrator) ¦ + ¦ +-- mode detection ¦ + ¦ +-- LimitCache (limit mode) ¦ + ¦ ¦ +-- Pool ¦ + ¦ ¦ ¦ L-- SAB segments ¦ + ¦ ¦ L-- Registry ¦ + ¦ ¦ L-- extents/tails ¦ + ¦ +-- PerFileCache (per-file mode) ¦ + ¦ ¦ L-- individual SABs ¦ + ¦ +-- PlacementSource[] ¦ + ¦ L-- Watcher ¦ + L----------T------------------------ + ¦ + snapshot / file-update / file-delete + ¦ + -------------------+------------------¬ + ¡ ¡ ¡ + --------------¬ --------------¬ --------------¬ + ¦ Worker 1 ¦ ¦ Worker 2 ¦ ¦ Worker N ¦ + ¦ ¦ ¦ ¦ ¦ ¦ + ¦ mode detect ¦ ¦ mode detect ¦ ¦ mode detect ¦ + ¦ project() ¦ ¦ project() ¦ ¦ project() ¦ + ¦ ¦ ¦ ¦ ¦ ¦ + ¦ place.files ¦ ¦ place.files ¦ ¦ place.files ¦ + ¦ (views) ¦ ¦ (views) ¦ ¦ (views) ¦ + L-------------- L-------------- L-------------- + ¦ ¦ ¦ + L---- Buffer.from(sab, ...) ----------- + zero-copy data access +``` diff --git a/impress.js b/impress.js index 885fbc6f8..5fa7d2398 100644 --- a/impress.js +++ b/impress.js @@ -11,7 +11,7 @@ const { Pool, isError } = require('metautil'); const { loadSchema } = require('metaschema'); const { Logger } = require('metalog'); const { Planner } = require('./lib/planner.js'); -const { StaticCache } = require('./lib/cache.js'); +const { SharedCache } = require('./lib/cache/SharedCache.js'); const CONFIG_SECTIONS = ['log', 'scale', 'server', 'sessions']; const PATH = process.cwd(); @@ -60,7 +60,15 @@ const broadcast = (app, data) => { }; const startWorker = async (app, kind, port, id = ++impress.lastWorkerId) => { - const workerData = { id, kind, root: app.root, path: app.path, port }; + const sharedCache = app.sharedCache.snapshot(); + const workerData = { + id, + kind, + root: app.root, + path: app.path, + port, + sharedCache, + }; const execArgv = [...process.execArgv, `--test-reporter=${REPORTER_PATH}`]; const options = { trackUnmanagedFds: true, workerData, execArgv }; const worker = new Worker(WORKER_PATH, options); @@ -70,22 +78,12 @@ const startWorker = async (app, kind, port, id = ++impress.lastWorkerId) => { } app.threads.set(id, worker); - if (app.cache) { - for (const place of ['static', 'resources']) { - const entries = StaticCache.getPlaceEntries(app.cache, place); - worker.postMessage({ - name: 'cache-init', - place, - entries, - }); - } - } - worker.on('error', (error) => { impress.console.error(error.message); }); worker.on('exit', (code) => { + app.sharedCache.handleWorkerExit(id); if (code !== 0) startWorker(app, kind, port, id); else app.threads.delete(id); if (impress.initialization) exit('Can not start Application server', 1); @@ -140,6 +138,10 @@ const startWorker = async (app, kind, port, id = ++impress.lastWorkerId) => { terminate: ({ code }) => { process.emit('TERMINATE', code); }, + + 'ack-update': ({ updateId }) => { + app.sharedCache.handleAck(updateId, id); + }, }; worker.on('message', (msg) => { @@ -186,20 +188,32 @@ const loadApplication = async (root, dir, master) => { impress.planner = await new Planner(tasksPath, tasksConfig, impress); impress.config = config; } - const cache = new StaticCache(dir, config); - await cache.loadPlace('static'); - await cache.loadPlace('resources'); + const cacheOptions = { config, dir, console: impress.console }; + const sharedCache = new SharedCache(cacheOptions); + try { + await sharedCache.initialize(); + } catch (error) { + error.message = `Shared cache init failed: ${error.message}`; + throw error; + } const { balancer, ports = [], workers = {} } = config.server; const threads = new Map(); const pool = new Pool({ timeout: workers.wait }); - const app = { root, path: dir, config, threads, pool, ready: 0, cache }; + const app = { + root, + path: dir, + config, + threads, + pool, + ready: 0, + sharedCache, + }; + sharedCache.watch(app); if (balancer) await startWorker(app, 'balancer', balancer); for (const port of ports) await startWorker(app, 'server', port); const poolSize = workers.pool || 0; for (let i = 0; i < poolSize; i++) await startWorker(app, 'worker'); - const watchTimeout = config.server.timeouts.watch; - cache.startWatch(watchTimeout, (data) => broadcast(app, data)); impress.applications.set(dir, app); }; diff --git a/lib/application.js b/lib/application.js index 872ca2601..9ae9d1d6e 100644 --- a/lib/application.js +++ b/lib/application.js @@ -55,6 +55,7 @@ class Application extends EventEmitter { this.watcher = null; this.semaphore = null; this.server = null; + this.sharedCache = null; } absolute(relative) { @@ -70,7 +71,11 @@ class Application extends EventEmitter { } } - async load({ invoke }) { + async load({ invoke, sharedCache }) { + if (sharedCache) { + this.sharedCache = sharedCache; + this.applySharedCache(sharedCache); + } this.startWatch(); this.createSandbox(); this.sandbox.application.invoke = invoke; @@ -92,6 +97,25 @@ class Application extends EventEmitter { await this.start(); } + applySharedCache(sharedCache) { + const { projectEntry, config } = this; + const { indexes } = sharedCache; + for (const name of Object.keys(indexes)) { + const index = indexes[name]; + const entries = + index.entries instanceof Map ? index.entries : new Map(index.entries); + const files = new Map(); + for (const [key, entry] of entries) { + files.set(key, projectEntry(entry)); + } + const place = this[name]; + if (!place) continue; + place.setFiles(files); + if (place.initServing) place.initServing(config); + } + sharedCache.segments = null; + } + async start() { const { sandbox, config, cert, mode } = this; const { kind, port } = workerData; @@ -196,12 +220,15 @@ class Application extends EventEmitter { startWatch() { const timeout = this.config.server.timeouts.watch; this.watcher = new DirectoryWatcher({ timeout }); + const shared = this.sharedCache + ? new Set(Object.keys(this.sharedCache.indexes)) + : new Set(); this.watcher.on('change', (filePath) => { const relPath = filePath.substring(this.path.length + 1); const sepIndex = relPath.indexOf(node.path.sep); const place = relPath.substring(0, sepIndex); - if (place === 'static' || place === 'resources') return; + if (shared.has(place)) return; node.fs.stat(filePath, (error, stat) => { if (error) return; if (stat.isDirectory()) return void this[place].load(filePath); @@ -214,7 +241,7 @@ class Application extends EventEmitter { const relPath = filePath.substring(this.path.length + 1); const sepIndex = relPath.indexOf(node.path.sep); const place = relPath.substring(0, sepIndex); - if (place === 'static' || place === 'resources') return; + if (shared.has(place)) return; this[place].delete(filePath); if (threadId === 1) this.console.debug('Deleted: /' + relPath); }); diff --git a/lib/cache.js b/lib/cache.js deleted file mode 100644 index 9acb7cea2..000000000 --- a/lib/cache.js +++ /dev/null @@ -1,156 +0,0 @@ -'use strict'; - -const { node, metarhia } = require('./deps.js'); - -const WIN = process.platform === 'win32'; -const MAX_FILE_SIZE = '10 mb'; -const STREAM_THRESHOLD = '1 mb'; - -class StaticCache { - constructor(appPath, config) { - this.appPath = appPath; - this.places = new Map(); - this.directories = []; - this.watcher = null; - const cacheConfig = config?.cache || {}; - const { sizeToBytes } = metarhia.metautil; - this.maxFileSize = sizeToBytes(cacheConfig.maxFileSize || MAX_FILE_SIZE); - this.streamThreshold = sizeToBytes( - cacheConfig.streamThreshold || STREAM_THRESHOLD, - ); - } - - static getKey(filePath, basePath) { - const key = filePath.substring(basePath.length); - if (WIN) return metarhia.metautil.replace(key, node.path.sep, '/'); - return key; - } - - async loadPlace(name) { - const dirPath = node.path.join(this.appPath, name); - await metarhia.metautil.ensureDirectory(dirPath); - const files = new Map(); - this.places.set(name, { files, path: dirPath, entriesCache: null }); - await this._loadDir(name, dirPath); - return StaticCache.getPlaceEntries(this, name); - } - - async _loadDir(placeName, dirPath) { - this.directories.push(dirPath); - try { - const items = await node.fsp.readdir(dirPath, { withFileTypes: true }); - for (const item of items) { - if (item.name.startsWith('.eslint')) continue; - const filePath = node.path.join(dirPath, item.name); - if (item.isDirectory()) await this._loadDir(placeName, filePath); - else await this._loadFile(placeName, filePath); - } - } catch { - // Directory may not exist yet - } - } - - async _loadFile(placeName, filePath) { - const place = this.places.get(placeName); - try { - const stat = await node.fsp.stat(filePath); - const key = StaticCache.getKey(filePath, place.path); - if (stat.size > this.maxFileSize) { - const entry = { - key, - sab: null, - byteLength: 0, - size: stat.size, - }; - place.files.set(key, entry); - return entry; - } - const data = await node.fsp.readFile(filePath); - const sab = new SharedArrayBuffer(data.byteLength); - new Uint8Array(sab).set(data); - const entry = { - key, - sab, - byteLength: data.byteLength, - size: stat.size, - }; - place.files.set(key, entry); - return entry; - } catch { - // File may have been removed between readdir and stat - return null; - } - } - - static getPlaceEntries(cache, name) { - const place = cache.places.get(name); - if (!place) return []; - if (place.entriesCache) return place.entriesCache; - const entries = Array.from(place.files.values()); - place.entriesCache = entries; - return entries; - } - - startWatch(timeout, broadcast) { - const { DirectoryWatcher } = metarhia.metawatch; - this.watcher = new DirectoryWatcher({ timeout }); - for (const dir of this.directories) { - this.watcher.watch(dir); - } - - this.watcher.on('change', (filePath) => { - const resolved = this._resolve(filePath); - if (!resolved.name) return; - const { name } = resolved; - node.fs.stat(filePath, async (err, stat) => { - if (err) return; - if (stat.isDirectory()) { - this.watcher.watch(filePath); - const before = new Set(this.places.get(name).files.keys()); - await this._loadDir(name, filePath); - const place = this.places.get(name); - place.entriesCache = null; - for (const [key, entry] of place.files) { - if (!before.has(key)) { - broadcast({ name: 'cache-update', place: name, entry }); - } - } - return; - } - const entry = await this._loadFile(name, filePath); - if (entry) { - this.places.get(name).entriesCache = null; - broadcast({ name: 'cache-update', place: name, entry }); - } - }); - }); - - this.watcher.on('delete', (filePath) => { - const resolved = this._resolve(filePath); - if (!resolved.name) return; - const { name, key } = resolved; - const place = this.places.get(name); - if (place) { - place.files.delete(key); - place.entriesCache = null; - broadcast({ name: 'cache-delete', place: name, key }); - } - }); - } - - _resolve(filePath) { - for (const [name, place] of this.places) { - if (filePath.startsWith(place.path)) { - const key = StaticCache.getKey(filePath, place.path); - return { name, key }; - } - } - return {}; - } - - close() { - if (this.watcher) this.watcher.close(); - } -} - -module.exports = { StaticCache }; diff --git a/lib/cache/LimitCache.js b/lib/cache/LimitCache.js new file mode 100644 index 000000000..3d7f25c70 --- /dev/null +++ b/lib/cache/LimitCache.js @@ -0,0 +1,383 @@ +'use strict'; + +const DEFAULT_LIMIT = 1024 * 1024 * 1024; +const DEFAULT_BASE_SEGMENT_SIZE = 64 * 1024 * 1024; +const DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024; + +class Pool { + constructor(limit, baseSegmentSize) { + this.limit = limit; + this.baseSegmentSize = baseSegmentSize; + this.segments = new Map(); + this.cleanSegmentIds = new Set(); + this.totalUsed = 0; + this.nextSegmentId = 1; + } + + canAllocate(size) { + return this.totalUsed + size <= this.limit; + } + + createBaseSegment() { + for (const id of this.cleanSegmentIds) { + this.cleanSegmentIds.delete(id); + return this.segments.get(id); + } + const size = this.baseSegmentSize; + if (!this.canAllocate(size)) return null; + const id = this.nextSegmentId++; + const sab = new SharedArrayBuffer(size); + const segment = { id, sab, size }; + this.segments.set(id, segment); + this.totalUsed += size; + return segment; + } + + retireSegment(id) { + if (!this.segments.has(id)) return false; + if (this.cleanSegmentIds.has(id)) return true; + this.cleanSegmentIds.add(id); + return true; + } + + getSegment(id) { + return this.segments.get(id) || null; + } + + getSegmentsSnapshot() { + const result = []; + for (const seg of this.segments.values()) { + result.push({ id: seg.id, sab: seg.sab }); + } + return result; + } +} + +class Registry { + constructor(pool) { + this.pool = pool; + this.freeExtents = new Map(); + this.tails = new Map(); + } + + registerSegment(segmentId) { + this.freeExtents.set(segmentId, []); + this.tails.set(segmentId, 0); + } + + allocate(size, noCreate = false) { + let bestFit = null; + for (const [segmentId, extents] of this.freeExtents) { + for (let i = 0; i < extents.length; i++) { + const extent = extents[i]; + if (extent.length < size) continue; + if (!bestFit || extent.length < bestFit.extent.length) { + bestFit = { segmentId, index: i, extent }; + } + } + } + if (bestFit) { + const { segmentId, index, extent } = bestFit; + const offset = extent.offset; + const extents = this.freeExtents.get(segmentId); + if (extent.length === size) { + extents.splice(index, 1); + } else { + extents[index] = { + offset: extent.offset + size, + length: extent.length - size, + }; + } + return { segmentId, offset }; + } + for (const [segmentId, tail] of this.tails) { + if (tail + size <= this.pool.baseSegmentSize) { + this.tails.set(segmentId, tail + size); + return { segmentId, offset: tail }; + } + } + if (noCreate) return null; + const segment = this.pool.createBaseSegment(); + if (!segment) return null; + this.registerSegment(segment.id); + this.tails.set(segment.id, size); + return { segmentId: segment.id, offset: 0 }; + } + + free(segmentId, offset, length) { + const extents = this.freeExtents.get(segmentId); + if (!extents) return; + const newExtent = { offset, length }; + let insertIndex = extents.findIndex((e) => e.offset > offset); + if (insertIndex === -1) insertIndex = extents.length; + extents.splice(insertIndex, 0, newExtent); + Registry.mergeAdjacent(extents, insertIndex); + } + + static mergeAdjacent(extents, index) { + if (index + 1 < extents.length) { + const current = extents[index]; + const next = extents[index + 1]; + if (current.offset + current.length === next.offset) { + current.length += next.length; + extents.splice(index + 1, 1); + } + } + if (index > 0) { + const prev = extents[index - 1]; + const current = extents[index]; + if (prev.offset + prev.length === current.offset) { + prev.length += current.length; + extents.splice(index, 1); + } + } + } + + segmentUsed(segmentId) { + const tail = this.tails.get(segmentId); + if (!tail) return 0; + const extents = this.freeExtents.get(segmentId); + if (!extents) return tail; + let free = 0; + for (const e of extents) free += e.length; + return tail - free; + } + + isSegmentEmpty(segmentId) { + return this.segmentUsed(segmentId) === 0; + } + + unregisterSegment(segmentId) { + this.freeExtents.delete(segmentId); + this.tails.delete(segmentId); + } +} + +class LimitCache { + constructor(options = {}) { + const limit = options.limit || DEFAULT_LIMIT; + const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; + const configured = options.baseSegmentSize || DEFAULT_BASE_SEGMENT_SIZE; + const baseSegmentSize = Math.ceil(maxFileSize / configured) * configured; + this.maxFileSize = maxFileSize; + this.baseSegmentSize = baseSegmentSize; + this.reader = options.reader || null; + this.pool = new Pool(limit, baseSegmentSize); + this.registry = new Registry(this.pool); + this.indexes = {}; + } + + get totalUsed() { + return this.pool.totalUsed; + } + + getSegment(id) { + return this.pool.getSegment(id); + } + + async load(name, filesMap) { + const entries = new Map(); + const segmentIds = new Set(); + const candidates = [...filesMap.entries()]; + candidates.sort((a, b) => (b[1].stat?.size || 0) - (a[1].stat?.size || 0)); + for (const [key, file] of candidates) { + const entry = await this.#allocateEntry(file, segmentIds); + entries.set(key, entry); + } + const index = { entries, segmentIds }; + this.indexes[name] = index; + return index; + } + + async allocate(name, key, file) { + let index = this.indexes[name]; + if (!index) { + index = { entries: new Map(), segmentIds: new Set() }; + this.indexes[name] = index; + } + const entry = await this.#allocateEntry(file, index.segmentIds); + index.entries.set(key, entry); + return entry; + } + + remove(name, key) { + const index = this.indexes[name]; + if (!index) return null; + const entry = index.entries.get(key); + if (!entry) return null; + index.entries.delete(key); + return entry; + } + + free(entry) { + if (!entry || entry.kind !== 'shared' || entry.length === 0) return; + const { segmentId } = entry; + const segment = this.pool.getSegment(segmentId); + if (!segment) return; + this.registry.free(segmentId, entry.offset, entry.length); + if (this.registry.isSegmentEmpty(segmentId)) { + this.registry.unregisterSegment(segmentId); + this.pool.retireSegment(segmentId); + } + } + + compact(threshold = 0.3) { + let target = null; + let minUtil = threshold; + let baseCount = 0; + for (const [segmentId, tail] of this.registry.tails) { + if (tail === 0) continue; + const segment = this.pool.getSegment(segmentId); + if (!segment) continue; + baseCount++; + const used = this.registry.segmentUsed(segmentId); + const util = used / segment.size; + if (util < minUtil) { + minUtil = util; + target = segmentId; + } + } + if (baseCount < 2 || !target) return null; + const items = []; + for (const name of Object.keys(this.indexes)) { + for (const [key, entry] of this.indexes[name].entries) { + if (entry.kind === 'shared' && entry.segmentId === target) { + items.push({ name, key, entry }); + } + } + } + if (items.length === 0) return null; + const savedTail = this.registry.tails.get(target); + const savedExtents = this.registry.freeExtents + .get(target) + .map((e) => ({ ...e })); + this.registry.unregisterSegment(target); + const moved = []; + let success = true; + for (const { name, key, entry } of items) { + const allocation = this.registry.allocate(entry.length, true); + if (!allocation) { + success = false; + break; + } + const oldSab = this.pool.getSegment(target).sab; + const src = new Uint8Array(oldSab, entry.offset, entry.length); + const dstSeg = this.pool.getSegment(allocation.segmentId); + new Uint8Array(dstSeg.sab, allocation.offset, entry.length).set(src); + moved.push({ + name, + key, + oldEntry: entry, + newEntry: { + kind: 'shared', + segmentId: allocation.segmentId, + offset: allocation.offset, + length: entry.length, + stat: entry.stat, + }, + }); + } + if (!success) { + for (const { newEntry } of moved) { + this.registry.free( + newEntry.segmentId, + newEntry.offset, + newEntry.length, + ); + } + this.registry.registerSegment(target); + this.registry.freeExtents.set(target, savedExtents); + this.registry.tails.set(target, savedTail); + return null; + } + const updates = []; + const oldEntries = []; + const newSegmentIds = new Set(); + for (const { name, key, oldEntry, newEntry } of moved) { + this.indexes[name].entries.set(key, newEntry); + this.indexes[name].segmentIds.add(newEntry.segmentId); + newSegmentIds.add(newEntry.segmentId); + updates.push({ name, key, entry: newEntry, oldEntry }); + oldEntries.push(oldEntry); + } + for (const name of Object.keys(this.indexes)) { + this.indexes[name].segmentIds.delete(target); + } + const newSegments = []; + for (const id of newSegmentIds) { + const seg = this.pool.getSegment(id); + if (seg) newSegments.push({ id: seg.id, sab: seg.sab }); + } + return { updates, oldEntries, newSegments }; + } + + snapshot() { + const segments = this.pool.getSegmentsSnapshot(); + const indexes = {}; + for (const name of Object.keys(this.indexes)) { + const { entries } = this.indexes[name]; + indexes[name] = { entries: [...entries] }; + } + return { segments, indexes }; + } + + async #allocateEntry(file, segmentIds) { + const { data, stat, path: filePath } = file; + const size = stat?.size || 0; + if (size > this.maxFileSize) { + return { kind: 'disk', path: filePath, stat, data: null }; + } + if (!data && !this.reader) { + return { kind: 'disk', path: filePath, stat, data: null }; + } + if (size === 0) { + return { kind: 'shared', segmentId: 0, offset: 0, length: 0, stat }; + } + const allocation = this.registry.allocate(size); + if (!allocation) return { kind: 'disk', path: filePath, stat, data: null }; + const { segmentId, offset } = allocation; + const segment = this.pool.getSegment(segmentId); + await this.#writeToSegment(segment.sab, offset, size, data, filePath); + segmentIds.add(segmentId); + return { kind: 'shared', segmentId, offset, length: size, stat }; + } + + async #writeToSegment(sab, offset, size, data, filePath) { + if (size === 0) return; + if (data) { + const view = new Uint8Array(sab, offset, size); + view.set(data); + return; + } + if (this.reader) { + await this.reader(filePath, sab, offset, size); + return; + } + throw new Error(`No reader and no data for: ${filePath}`); + } + + static project(index, segmentsMap) { + const files = new Map(); + const entries = + index.entries instanceof Map ? index.entries : new Map(index.entries); + for (const [key, entry] of entries) { + files.set(key, LimitCache.projectEntry(entry, segmentsMap)); + } + return files; + } + + static projectEntry(entry, segmentsMap) { + if (entry.kind === 'shared') { + const { segmentId, offset, length } = entry; + if (length === 0) { + return { data: Buffer.alloc(0), stat: entry.stat }; + } + const sab = segmentsMap.get(segmentId); + const data = Buffer.from(sab, offset, length); + return { data, stat: entry.stat }; + } + return { data: null, stat: entry.stat, path: entry.path }; + } +} + +module.exports = { LimitCache }; diff --git a/lib/cache/PerFileCache.js b/lib/cache/PerFileCache.js new file mode 100644 index 000000000..aa7061956 --- /dev/null +++ b/lib/cache/PerFileCache.js @@ -0,0 +1,108 @@ +'use strict'; + +class PerFileCache { + constructor(options = {}) { + this.maxFileSize = options.maxFileSize || 10 * 1024 * 1024; + this.reader = options.reader || null; + this.indexes = {}; + } + + get totalUsed() { + let total = 0; + for (const name of Object.keys(this.indexes)) { + for (const entry of this.indexes[name].entries.values()) { + if (entry.kind === 'shared') total += entry.length; + } + } + return total; + } + + async load(name, filesMap) { + const entries = new Map(); + for (const [key, file] of filesMap) { + const entry = await this.#allocateEntry(file); + entries.set(key, entry); + } + const index = { entries }; + this.indexes[name] = index; + return index; + } + + async allocate(name, key, file) { + let index = this.indexes[name]; + if (!index) { + index = { entries: new Map() }; + this.indexes[name] = index; + } + const entry = await this.#allocateEntry(file); + index.entries.set(key, entry); + return entry; + } + + remove(name, key) { + const index = this.indexes[name]; + if (!index) return null; + const entry = index.entries.get(key); + if (!entry) return null; + index.entries.delete(key); + return entry; + } + + free() {} + + compact() { + return null; + } + + snapshot() { + const indexes = {}; + for (const name of Object.keys(this.indexes)) { + const { entries } = this.indexes[name]; + indexes[name] = { entries: [...entries] }; + } + return { segments: null, indexes }; + } + + async #allocateEntry(file) { + const { data, stat, path: filePath } = file; + const size = stat?.size || 0; + if (size > this.maxFileSize) { + return { kind: 'disk', path: filePath, stat, data: null }; + } + if (!data && !this.reader) { + return { kind: 'disk', path: filePath, stat, data: null }; + } + if (size === 0) { + const sab = new SharedArrayBuffer(0); + return { kind: 'shared', sab, length: 0, stat }; + } + const sab = new SharedArrayBuffer(size); + if (data) { + new Uint8Array(sab).set(data); + } else if (this.reader) { + await this.reader(filePath, sab, 0, size); + } + return { kind: 'shared', sab, length: size, stat }; + } + + static project(index) { + const files = new Map(); + const entries = + index.entries instanceof Map ? index.entries : new Map(index.entries); + for (const [key, entry] of entries) { + files.set(key, PerFileCache.projectEntry(entry)); + } + return files; + } + + static projectEntry(entry) { + if (entry.kind === 'shared') { + const { sab, length } = entry; + const data = Buffer.from(sab, 0, length); + return { data, stat: entry.stat }; + } + return { data: null, stat: entry.stat, path: entry.path }; + } +} + +module.exports = { PerFileCache }; diff --git a/lib/cache/PlacementSource.js b/lib/cache/PlacementSource.js new file mode 100644 index 000000000..815dea0c1 --- /dev/null +++ b/lib/cache/PlacementSource.js @@ -0,0 +1,40 @@ +'use strict'; + +const { node, metarhia } = require('../deps.js'); +const { Place } = require('../place.js'); + +const WIN = process.platform === 'win32'; + +const toKey = WIN + ? (filePath, base) => { + const key = filePath.substring(base.length); + return metarhia.metautil.replace(key, node.path.sep, '/'); + } + : (filePath, base) => filePath.substring(base.length); + +class PlacementSource extends Place { + constructor(name, application, options = {}) { + super(name, application); + this.files = new Map(); + this.ext = options.ext; + } + + getKey(filePath) { + return toKey(filePath, this.path); + } + + async change(filePath) { + const ext = metarhia.metautil.fileExt(filePath); + if (this.ext && !this.ext.includes(ext)) return; + try { + const stat = await node.fsp.stat(filePath); + const key = this.getKey(filePath); + this.files.set(key, { stat, path: filePath }); + } catch { + const key = this.getKey(filePath); + this.files.delete(key); + } + } +} + +module.exports = { PlacementSource }; diff --git a/lib/cache/SharedCache.js b/lib/cache/SharedCache.js new file mode 100644 index 000000000..0b2b992d3 --- /dev/null +++ b/lib/cache/SharedCache.js @@ -0,0 +1,373 @@ +'use strict'; + +const { node, metarhia } = require('../deps.js'); +const { LimitCache } = require('./LimitCache.js'); +const { PerFileCache } = require('./PerFileCache.js'); +const { PlacementSource } = require('./PlacementSource.js'); + +const DEFAULT_PLACEMENTS = [{ name: 'static' }, { name: 'resources' }]; +const NOOP = () => {}; + +const PREFIXES = { k: 1, m: 2, g: 3, t: 4 }; + +const sizeToBytes = (value) => { + if (typeof value === 'number') return value; + const str = value.trim().toLowerCase(); + const binary = str.endsWith('ib'); + const suffix = binary ? 3 : 2; + const unit = str.slice(-suffix); + const num = parseInt(str.slice(0, -suffix)); + const base = binary ? 1024 : 1000; + const exp = PREFIXES[unit[0]]; + if (!exp) throw new Error(`Unknown unit: ${unit}`); + return num * base ** exp; +}; + +class SharedCache { + constructor({ config, dir, console }) { + const cacheConfig = config.cache || {}; + const mode = cacheConfig.mode || 'limit'; + const sabConfig = cacheConfig.sab || {}; + const maxFileSize = sizeToBytes(cacheConfig.maxFileSize || '10 mb'); + const reader = async (filePath, sab, offset, size) => { + const fh = await node.fsp.open(filePath, 'r'); + try { + const buf = Buffer.from(sab, offset, size); + await fh.read(buf, 0, size, 0); + } finally { + await fh.close(); + } + }; + + if (mode === 'limit') { + const limit = sizeToBytes(sabConfig.limit || '1 gib'); + const baseSegmentSize = sizeToBytes( + sabConfig.baseSegmentSize || '64 mib', + ); + this.cache = new LimitCache({ + limit, + baseSegmentSize, + maxFileSize, + reader, + }); + } else { + this.cache = new PerFileCache({ maxFileSize, reader }); + } + + this.placements = cacheConfig.placements || DEFAULT_PLACEMENTS; + this.dir = dir; + this.config = config; + this.console = console; + this.sources = {}; + this.app = null; + this.watcher = null; + this.nextUpdateId = 0; + + // Pre-initialize ACK-dependent functions + const needsAck = mode === 'limit'; + if (needsAck) { + this.pendingFrees = new Map(); + this.#afterAck = (pending) => { + this.#freeEntries(pending); + }; + } else { + this.pendingFrees = null; + this.#afterAck = NOOP; + } + this.needsAck = needsAck; + } + + #afterAck; + + async initialize() { + const appObj = { + path: this.dir, + config: this.config, + watcher: null, + console: this.console, + absolute: (relative) => node.path.join(this.dir, relative), + }; + const timeout = this.config.server.timeouts.watch; + const { DirectoryWatcher } = metarhia.metawatch; + this.watcher = new DirectoryWatcher({ timeout }); + appObj.watcher = this.watcher; + for (const placement of this.placements) { + const opts = placement.ext ? { ext: placement.ext } : {}; + this.sources[placement.name] = new PlacementSource( + placement.name, + appObj, + opts, + ); + } + for (const name of Object.keys(this.sources)) { + const source = this.sources[name]; + await source.load(); + await this.cache.load(name, source.files); + } + } + + snapshot() { + return this.cache.snapshot(); + } + + handleAck(updateId, workerId) { + if (!this.pendingFrees) return; + const pending = this.pendingFrees.get(updateId); + if (!pending) return; + pending.workerIds.delete(workerId); + if (pending.workerIds.size === 0) { + this.#afterAck(pending); + this.pendingFrees.delete(updateId); + } + } + + handleWorkerExit(workerId) { + if (!this.pendingFrees) return; + for (const [updateId, pending] of this.pendingFrees) { + pending.workerIds.delete(workerId); + if (pending.workerIds.size === 0) { + this.#afterAck(pending); + this.pendingFrees.delete(updateId); + } + } + } + + watch(app) { + this.app = app; + const { sources, cache, needsAck } = this; + + const sourcesByName = new Map(Object.entries(sources)); + + const findSource = (filePath) => { + const relPath = node.path.relative(this.dir, filePath); + if (!relPath || relPath.startsWith('..')) return null; + if (node.path.isAbsolute(relPath)) return null; + const sepIndex = relPath.indexOf(node.path.sep); + const name = sepIndex === -1 ? relPath : relPath.substring(0, sepIndex); + const source = sourcesByName.get(name); + return source ? { name, source } : null; + }; + + let epoch = null; + + const processChange = async (ep, name, source, filePath) => { + const stat = await node.fsp.stat(filePath).catch(() => null); + if (!stat) return; + if (stat.isDirectory()) { + const before = new Set(source.files.keys()); + await source.load(filePath); + for (const [key, file] of source.files) { + if (before.has(key)) continue; + const newEntry = await cache.allocate(name, key, file); + const group = + ep.updates[name] || + (ep.updates[name] = { entries: [], segmentIds: new Set() }); + group.entries.push([key, newEntry]); + if (newEntry.kind === 'shared' && newEntry.segmentId) { + group.segmentIds.add(newEntry.segmentId); + } + } + return; + } + await source.change(filePath); + const key = source.getKey(filePath); + const file = source.files.get(key); + if (!file) return; + const oldEntry = cache.indexes[name]?.entries.get(key); + const newEntry = await cache.allocate(name, key, file); + const group = + ep.updates[name] || + (ep.updates[name] = { entries: [], segmentIds: new Set() }); + group.entries.push([key, newEntry]); + if (newEntry.kind === 'shared' && newEntry.segmentId) { + group.segmentIds.add(newEntry.segmentId); + } + if (oldEntry && oldEntry.kind === 'shared') { + ep.oldEntries.push(oldEntry); + } + }; + + const processDelete = (ep, name, source, filePath) => { + const prefix = source.getKey(filePath); + const keys = []; + const exactEntry = source.files.get(prefix); + if (exactEntry) { + source.files.delete(prefix); + keys.push(prefix); + } + const dirPrefix = prefix.endsWith('/') ? prefix : prefix + '/'; + for (const key of source.files.keys()) { + if (key.startsWith(dirPrefix)) { + source.files.delete(key); + keys.push(key); + } + } + if (keys.length === 0) return; + const group = ep.deletes[name] || (ep.deletes[name] = []); + for (const key of keys) { + group.push(key); + const old = cache.remove(name, key); + if (old && old.kind === 'shared') ep.oldEntries.push(old); + } + }; + + this.watcher.on('before', () => { + epoch = { updates: {}, deletes: {}, oldEntries: [], promises: [] }; + }); + + this.watcher.on('change', (filePath) => { + const entry = findSource(filePath); + const ep = epoch; + if (entry && ep) { + ep.promises.push(processChange(ep, entry.name, entry.source, filePath)); + } + }); + + this.watcher.on('delete', (filePath) => { + const entry = findSource(filePath); + const ep = epoch; + if (entry && ep) processDelete(ep, entry.name, entry.source, filePath); + }); + + this.watcher.on('after', () => { + const current = epoch; + epoch = null; + if (!current) return; + Promise.all(current.promises) + .then(() => this.#flushEpoch(current)) + .catch((err) => this.console.error(`[cache] epoch: ${err.message}`)); + }); + + // Pre-build flush function for the mode + if (needsAck) { + this.#flushEpoch = this.#flushEpochWithAck; + } else { + this.#flushEpoch = this.#flushEpochFireAndForget; + } + } + + #flushEpoch; + + #flushEpochWithAck(epoch) { + const { updates, deletes, oldEntries } = epoch; + let lastUpdateId = 0; + for (const name of Object.keys(updates)) { + const { entries, segmentIds } = updates[name]; + if (entries.length === 0) continue; + const newSegments = []; + for (const id of segmentIds) { + const seg = this.cache.getSegment(id); + if (seg) newSegments.push({ id: seg.id, sab: seg.sab }); + } + lastUpdateId = ++this.nextUpdateId; + this.#broadcast({ + name: 'file-update', + target: name, + updateId: lastUpdateId, + updates: entries, + newSegments, + }); + } + for (const name of Object.keys(deletes)) { + const keys = deletes[name]; + if (keys.length === 0) continue; + lastUpdateId = ++this.nextUpdateId; + this.#broadcast({ + name: 'file-delete', + target: name, + updateId: lastUpdateId, + keys, + }); + } + if (oldEntries.length > 0 && lastUpdateId > 0) { + this.#trackUpdate(lastUpdateId, oldEntries); + } + } + + #flushEpochFireAndForget(epoch) { + const { updates, deletes } = epoch; + for (const name of Object.keys(updates)) { + const { entries } = updates[name]; + if (entries.length === 0) continue; + this.#broadcast({ + name: 'file-update', + target: name, + updates: entries, + }); + } + for (const name of Object.keys(deletes)) { + const keys = deletes[name]; + if (keys.length === 0) continue; + this.#broadcast({ + name: 'file-delete', + target: name, + keys, + }); + } + } + + #broadcast(data) { + for (const thread of this.app.threads.values()) { + thread.postMessage(data); + } + } + + #freeEntries(pending) { + if (!pending) return; + for (const entry of pending.entries) this.cache.free(entry); + const { pool, registry } = this.cache; + const segs = [...pool.segments.values()]; + const cleanCount = pool.cleanSegmentIds.size; + const info = segs.map((s) => { + const used = registry.segmentUsed(s.id); + const pct = ((used / s.size) * 100).toFixed(1); + const mark = pool.cleanSegmentIds.has(s.id) ? ' [clean]' : ''; + return ` seg ${s.id}: ${used}/${s.size} (${pct}%)${mark}`; + }); + const count = pending.entries.length; + this.console.info( + `[cache] freeEntries: ${count} entries freed, ` + + `${segs.length} segments (${cleanCount} clean), ` + + `totalUsed=${this.cache.totalUsed}\n${info.join('\n')}`, + ); + this.#tryCompact(); + } + + #tryCompact() { + const result = this.cache.compact(); + if (!result) { + this.console.info('[cache] compact: no target found'); + return; + } + this.console.info( + `[cache] compact: moved ${result.updates.length} files, ` + + `freed ${result.oldEntries.length} old entries`, + ); + const byName = {}; + for (const { name, key, entry } of result.updates) { + if (!byName[name]) byName[name] = []; + byName[name].push([key, entry]); + } + let lastUpdateId = 0; + for (const name of Object.keys(byName)) { + lastUpdateId = ++this.nextUpdateId; + this.#broadcast({ + name: 'file-update', + target: name, + updateId: lastUpdateId, + updates: byName[name], + newSegments: result.newSegments, + }); + } + if (result.oldEntries.length > 0 && lastUpdateId > 0) { + this.#trackUpdate(lastUpdateId, result.oldEntries); + } + } + + #trackUpdate(updateId, entries) { + const workerIds = new Set(this.app.threads.keys()); + this.pendingFrees.set(updateId, { workerIds, entries }); + } +} + +module.exports = { SharedCache, sizeToBytes }; diff --git a/lib/cert.js b/lib/cert.js index 614879202..841974af7 100644 --- a/lib/cert.js +++ b/lib/cert.js @@ -1,26 +1,65 @@ 'use strict'; const { node, metarhia, wt } = require('./deps.js'); -const { Static } = require('./static.js'); +const { Place } = require('./place.js'); -class Cert extends Static { +const WIN = process.platform === 'win32'; +const MAX_FILE_SIZE = '10 mb'; + +class Cert extends Place { constructor(name, application, options = {}) { - super(name, application, options); + super(name, application); + this.files = new Map(); this.domains = new Map(); + this.ext = options.ext; + this.maxFileSize = -1; } get(key) { return this.domains.get(key); } + getKey(filePath) { + const key = filePath.substring(this.path.length); + if (WIN) return metarhia.metautil.replace(key, node.path.sep, '/'); + return key; + } + + delete(filePath) { + const key = this.getKey(filePath); + this.files.delete(key); + } + + async change(filePath) { + if (this.maxFileSize === -1) { + const maxFileSize = this.application.config?.cache?.maxFileSize; + const size = maxFileSize || MAX_FILE_SIZE; + this.maxFileSize = metarhia.metautil.sizeToBytes(size); + } + const ext = metarhia.metautil.fileExt(filePath); + if (this.ext && !this.ext.includes(ext)) return; + try { + const stat = await node.fsp.stat(filePath); + const key = this.getKey(filePath); + if (stat.size > this.maxFileSize) { + this.files.set(key, { data: null, stat }); + } else { + const data = await node.fsp.readFile(filePath); + this.files.set(key, { data, stat }); + } + } catch { + this.delete(filePath); + } + } + async before(changes) { const folders = new Set(); for (const [name, event] of changes) { const dir = node.path.dirname(name); const folder = node.path.basename(dir); folders.add(folder); - if (event === 'change') await super.change(name); - if (event === 'datele') super.delete(name); + if (event === 'change') await this.change(name); + if (event === 'datele') this.delete(name); } await this.init([...folders]); changes.length = 0; diff --git a/lib/static.js b/lib/static.js index 7a42bbe0c..5ca1e2715 100644 --- a/lib/static.js +++ b/lib/static.js @@ -1,13 +1,9 @@ 'use strict'; const { node, metarhia } = require('./deps.js'); -const { Place } = require('./place.js'); const { join } = node.path.posix; const { Readable } = node.stream; -const WIN = process.platform === 'win32'; -const MAX_FILE_SIZE = '10 mb'; -const STREAM_THRESHOLD = '1 mb'; const CHUNK_SIZE = 65536; const STATUS_CACHE = new Map(); @@ -24,9 +20,12 @@ const status = (code) => { return file; }; -const createSABStream = (sab, byteLength, options = {}) => { - const start = options.start ?? 0; - const end = options.end ?? byteLength - 1; +const createSABStream = (data, options = {}) => { + const sab = data.buffer; + const base = data.byteOffset; + const total = data.byteLength; + const start = base + (options.start ?? 0); + const end = base + (options.end ?? total - 1); let offset = start; return new Readable({ read() { @@ -38,93 +37,55 @@ const createSABStream = (sab, byteLength, options = {}) => { }); }; -class Static extends Place { - constructor(name, application, options = {}) { - super(name, application); +class Static { + constructor(name, application) { + this.name = name; + this.path = application.absolute(name); this.files = new Map(); - this.ext = options.ext; - this.maxFileSize = -1; - this.streamThreshold = -1; - this.search = this.lookup; - this.errorPage = (code) => status(code); + this.streamThreshold = Infinity; } get(key) { return this.files.get(key); } - getKey(filePath) { - const key = filePath.substring(this.path.length); - if (WIN) return metarhia.metautil.replace(key, node.path.sep, '/'); - return key; + setFiles(filesMap) { + this.files = filesMap; } - static withData(entry) { - if (entry.sab) { - entry.data = Buffer.from(entry.sab, 0, entry.byteLength); - } else { - entry.data = null; + updateFiles(updates) { + for (const [key, file] of updates) { + this.files.set(key, file); } - return entry; } - lookup(filePath) { - let file = this.files.get(filePath); - if (file) return { file, code: 200 }; - if (filePath.endsWith('/')) { - file = this.files.get(join(filePath, 'index.html')); - if (file) return { file, code: 200 }; + deleteFiles(keys) { + for (const key of keys) { + this.files.delete(key); } - return null; } - initCache(entries) { - this._initThreshold(); - const cacheConfig = this.application.config?.cache; - if (cacheConfig?.virtualFS) { + initServing(config) { + const cacheConfig = config?.cache || {}; + const { sizeToBytes } = metarhia.metautil; + this.streamThreshold = sizeToBytes(cacheConfig.streamThreshold || '1 mb'); + if (cacheConfig.virtualFS) { this.search = this.find; - this.errorPage = (code, path) => this.find(path, code).file; - } - this.files.clear(); - for (const entry of entries) { - this.files.set(entry.key, Static.withData(entry)); + this.errorPage = (code, path) => this.find(path, code); + } else { + this.search = this.lookup; + this.errorPage = (code) => status(code); } } - updateEntry(entry) { - this.files.set(entry.key, Static.withData(entry)); - } - - // Called from worker message handler: remove entry by key - deleteEntry(key) { - this.files.delete(key); - } - - delete(filePath) { - const key = this.getKey(filePath); - this.files.delete(key); - } - - async change(filePath) { - if (this.maxFileSize === -1) { - const maxFileSize = this.application.config?.cache?.maxFileSize; - const size = maxFileSize || MAX_FILE_SIZE; - this.maxFileSize = metarhia.metautil.sizeToBytes(size); - } - const ext = metarhia.metautil.fileExt(filePath); - if (this.ext && !this.ext.includes(ext)) return; - try { - const stat = await node.fsp.stat(filePath); - const key = this.getKey(filePath); - if (stat.size > this.maxFileSize) { - this.files.set(key, { data: null, stat }); - } else { - const data = await node.fsp.readFile(filePath); - this.files.set(key, { data, stat }); - } - } catch { - this.delete(filePath); + lookup(filePath) { + let file = this.files.get(filePath); + if (file) return { ...file, code: 200 }; + if (filePath.endsWith('/')) { + file = this.files.get(join(filePath, 'index.html')); + if (file) return { ...file, code: 200 }; } + return null; } find(path, code, parent = false) { @@ -134,18 +95,18 @@ class Static extends Place { const fileName = `.${code}.html`; filePath = join(filePath, fileName); const file = this.get(filePath); - if (file) return { file, code }; - if (root) return { file: status(code), code }; + if (file) return { data: file.data, stat: null, code }; + if (root) return status(code); } else { const folder = path.endsWith('/'); if (folder && !parent) { filePath = join(path, 'index.html'); } let file = this.get(filePath); - if (file) return { file, code: 200 }; + if (file) return { ...file, code: 200 }; filePath = join(path, '.virtual.html'); file = this.get(filePath); - if (file) return { file, code: -1 }; + if (file) return { ...file, code: -1 }; if (root) return this.find(filePath, 404, true); } filePath = node.path.dirname(path); @@ -153,71 +114,65 @@ class Static extends Place { return this.find(filePath, code, true); } - _initThreshold() { - if (this.streamThreshold !== -1) return; - const threshold = this.application.config?.cache?.streamThreshold; - this.streamThreshold = metarhia.metautil.sizeToBytes( - threshold || STREAM_THRESHOLD, - ); - } async serve(url, transport) { const [filePath] = metarhia.metautil.split(url, '?'); const fileExt = metarhia.metautil.fileExt(filePath); - const result = this.search(filePath); - - if (result) { - const { file, code } = result; - // SAB-backed cached file (shared memory from main process) - if (file.sab) { - const { sab, data, byteLength } = file; - if (code === -1) return void transport.write(data, 200, 'html'); - const { headers } = transport.req; - if (headers.range) { - const range = metarhia.metautil.parseRange(headers.range); - const { start, end = byteLength - 1 } = range; - if (start >= end || start >= byteLength || end >= byteLength) { - const err = this.errorPage(416, filePath); - return void transport.write(err.data, 416, fileExt); - } - if (byteLength > this.streamThreshold) { - const readable = createSABStream(sab, byteLength, { start, end }); - const options = { start, end, size: byteLength }; - return void transport.write(readable, 206, fileExt, options); - } - const slice = data.subarray(start, end + 1); - const options = { start, end, size: byteLength }; - return void transport.write(slice, 206, fileExt, options); + // Fast exact-hit path for ordinary cached files + const exact = this.get(filePath); + const internal = node.path.basename(filePath).startsWith('.'); + if (exact && exact.data && exact.stat && !internal) { + const { data } = exact; + const size = data.byteLength; + const { headers } = transport.req; + if (headers.range) { + const range = metarhia.metautil.parseRange(headers.range); + const { start, end = size - 1 } = range; + if (start >= end || start >= size || end >= size) { + const err = this.errorPage(416, filePath); + return void transport.write(err.data, 416, fileExt); } - if (byteLength > this.streamThreshold) { - const readable = createSABStream(sab, byteLength); - const options = { size: byteLength }; - return void transport.write(readable, code, fileExt, options); + if (size > this.streamThreshold) { + const readable = createSABStream(data, { start, end }); + const options = { start, end, size }; + return void transport.write(readable, 206, fileExt, options); } - return void transport.write(data, code, fileExt); + const slice = data.subarray(start, end + 1); + const options = { start, end, size }; + return void transport.write(slice, 206, fileExt, options); } - - // Legacy path: Buffer-backed cached file (used by cert or local cache) - if (file.data && file.stat) { - if (code === -1) return void transport.write(file.data, 200, 'html'); - return void transport.write(file.data, code, fileExt); + if (size > this.streamThreshold) { + const readable = createSABStream(data); + const options = { size }; + return void transport.write(readable, 200, fileExt, options); } + return void transport.write(data, 200, fileExt); + } - // Status page or virtual file fallback (from find/virtualFS) - if (file.data) { - const ext = code === -1 ? 'html' : fileExt; - const statusCode = code === -1 ? 200 : code || 404; - return void transport.write(file.data, statusCode, ext); - } + // Recursive search (index, virtual, status pages). + // Range and streaming are intentionally omitted here — these paths only + // resolve small HTML files (index.html, .virtual.html, .NNN.html). + // Direct file requests always hit the exact-hit path above which supports + // full Range/streaming. find()/virtualFS are candidates for deprecation. + const file = this.search(filePath); + if (file && file.data && file.stat) { + if (file.code === -1) return void transport.write(file.data, 200, 'html'); + return void transport.write(file.data, file.code, fileExt); + } + if (file && file.data) { + const ext = file.code === -1 ? 'html' : fileExt; + const statusCode = file.code === -1 ? 200 : file.code || 404; + return void transport.write(file.data, statusCode, ext); } - // Uncached or oversized file: stream from disk + // Disk fallback: uncached or oversized file const absPath = join(this.path, filePath); if (absPath.startsWith(this.path)) { - const stat = await node.fsp.stat(absPath).catch(() => null); - if (stat?.isFile()) { - const { size } = stat; + const fsStat = await node.fsp.stat(absPath).catch(() => null); + const diskStat = file?.stat || fsStat; + if (diskStat && (!diskStat.isFile || diskStat.isFile())) { + const { size } = diskStat; const options = { size }; let code = 200; const { headers } = transport.req; diff --git a/lib/worker.js b/lib/worker.js index dc5dd157f..89e4e4c83 100644 --- a/lib/worker.js +++ b/lib/worker.js @@ -5,6 +5,30 @@ const { parentPort, threadId, workerData } = wt; const application = require('./application.js'); +// Pre-initialize shared cache projection infrastructure +const segmentsMap = new Map(); +const sharedCache = workerData.sharedCache; +let projectEntry = null; + +if (sharedCache && sharedCache.segments) { + // Limit mode: build segmentsMap, import LimitCache for projection + const { LimitCache } = require('./cache/LimitCache.js'); + for (const seg of sharedCache.segments) segmentsMap.set(seg.id, seg.sab); + projectEntry = (entry) => LimitCache.projectEntry(entry, segmentsMap); +} else if (sharedCache) { + // Per-file mode: no segments, import PerFileCache for projection + const { PerFileCache } = require('./cache/PerFileCache.js'); + projectEntry = (entry) => PerFileCache.projectEntry(entry); +} +application.segmentsMap = segmentsMap; +application.projectEntry = projectEntry; + +// Pre-initialize ACK: real function for limit mode, noop for per-file +const sendAck = + sharedCache && sharedCache.segments + ? (updateId) => parentPort.postMessage({ name: 'ack-update', updateId }) + : () => {}; + const logError = (type) => async (err) => { const error = metarhia.metautil.isError(err) ? err : new Error('Unknown'); if (error.name === 'ExperimentalWarning') return; @@ -41,19 +65,25 @@ const invoke = async ({ method, args, exclusive = false }) => { }; const handlers = { - 'cache-init': ({ place, entries }) => { - const target = application[place]; - if (target) target.initCache(entries); - }, - - 'cache-update': ({ place, entry }) => { - const target = application[place]; - if (target) target.updateEntry(entry); + 'file-update': (msg) => { + const { target, updateId, updates, newSegments } = msg; + if (newSegments) { + for (const seg of newSegments) segmentsMap.set(seg.id, seg.sab); + } + const projected = new Map(); + for (const [key, entry] of updates) { + projected.set(key, projectEntry(entry)); + } + const place = application[target]; + if (place) place.updateFiles(projected); + sendAck(updateId); }, - 'cache-delete': ({ place, key }) => { - const target = application[place]; - if (target) target.deleteEntry(key); + 'file-delete': (msg) => { + const { target, updateId, keys } = msg; + const place = application[target]; + if (place) place.deleteFiles(keys); + sendAck(updateId); }, ready: async () => { @@ -128,7 +158,7 @@ parentPort.on('message', async (msg) => { process.exit(0); } - await application.load({ invoke }); + await application.load({ invoke, sharedCache: workerData.sharedCache }); console.info(`Application started in worker ${threadId}`); parentPort.postMessage({ name: 'started', kind: workerData.kind }); })().catch(logError(`Can not start worker ${threadId}`)); diff --git a/schemas/config/cache.js b/schemas/config/cache.js index 2275b0aa0..0657cf25f 100644 --- a/schemas/config/cache.js +++ b/schemas/config/cache.js @@ -1,7 +1,24 @@ ({ + mode: { type: 'string', required: false }, size: 'size', maxFileSize: 'size', streamThreshold: { type: 'size', required: false }, virtualFS: { type: 'boolean', required: false }, avoid: { array: 'string', required: false }, + placements: { + array: { + schema: { + name: 'string', + ext: { array: 'string', required: false }, + }, + }, + required: false, + }, + sab: { + schema: { + limit: 'size', + baseSegmentSize: 'size', + }, + required: false, + }, }); diff --git a/test/cache-shared.js b/test/cache-shared.js index 5478c1a2d..3bdfb4a85 100644 --- a/test/cache-shared.js +++ b/test/cache-shared.js @@ -4,7 +4,8 @@ const { test } = require('node:test'); const assert = require('node:assert'); const path = require('node:path'); const { Static } = require('../lib/static.js'); -const { StaticCache } = require('../lib/cache.js'); +const { LimitCache } = require('../lib/cache/LimitCache.js'); +const { PerFileCache } = require('../lib/cache/PerFileCache.js'); const root = process.cwd(); @@ -16,133 +17,204 @@ const application = { }, }; -// --- StaticCache (main process) --- +// --- LimitCache (limit backend) --- -test('StaticCache - should load files into SAB', async () => { - const appPath = path.join(root, 'test'); - const cache = new StaticCache(appPath, {}); - const entries = await cache.loadPlace('lib'); - assert.ok(entries.length > 0); - const entry = entries.find((e) => e.key.includes('add.js')); - assert.ok(entry); +test('LimitCache - should load files into segments', async () => { + const seg = 1024; + const options = { limit: seg, maxFileSize: seg, baseSegmentSize: seg }; + const cache = new LimitCache(options); + const filesMap = new Map(); + const data = Buffer.from('hello world'); + const stat = { size: data.byteLength }; + filesMap.set('/test.js', { data, stat, path: '/test.js' }); + await cache.load('static', filesMap); + const index = cache.indexes.static; + assert.ok(index); + const entry = index.entries.get('/test.js'); + assert.strictEqual(entry.kind, 'shared'); + assert.strictEqual(entry.length, data.byteLength); +}); + +test('LimitCache - project creates Buffer view', async () => { + const seg = 1024; + const options = { limit: seg, maxFileSize: seg, baseSegmentSize: seg }; + const cache = new LimitCache(options); + const data = Buffer.from('test data'); + const stat = { size: data.byteLength }; + const filesMap = new Map([['/f.js', { data, stat, path: '/f.js' }]]); + await cache.load('static', filesMap); + const snap = cache.snapshot(); + const segmentsMap = new Map(); + for (const seg of snap.segments) segmentsMap.set(seg.id, seg.sab); + const files = LimitCache.project(snap.indexes.static, segmentsMap); + const file = files.get('/f.js'); + assert.ok(file.data instanceof Buffer); + assert.deepStrictEqual(file.data, data); + assert.strictEqual(file.stat, stat); +}); + +test('LimitCache - zero-byte files stay shared without segments', async () => { + const seg = 1024; + const options = { limit: seg, maxFileSize: seg, baseSegmentSize: seg }; + const cache = new LimitCache(options); + const stat = { size: 0 }; + const filesMap = new Map([['/empty.txt', { data: Buffer.alloc(0), stat, path: '/empty.txt' }]]); + await cache.load('static', filesMap); + const entry = cache.indexes.static.entries.get('/empty.txt'); + assert.deepStrictEqual(entry, { + kind: 'shared', + segmentId: 0, + offset: 0, + length: 0, + stat, + }); + const snapshot = cache.snapshot(); + assert.deepStrictEqual(snapshot.segments, []); +}); + +test('LimitCache - projectEntry returns empty Buffer for zero-byte files', () => { + const stat = { size: 0 }; + const file = LimitCache.projectEntry( + { kind: 'shared', segmentId: 0, offset: 0, length: 0, stat }, + new Map(), + ); + assert.ok(file.data instanceof Buffer); + assert.strictEqual(file.data.length, 0); + assert.strictEqual(file.stat, stat); +}); + +// --- PerFileCache (per-file backend) --- + +test('PerFileCache - should load files into individual SABs', async () => { + const cache = new LimitCache({ limit: 1024, maxFileSize: 10 }); + const data = Buffer.alloc(20); + const stat = { size: 20 }; + const filePath = '/tmp/big.bin'; + const file = { data, stat, path: filePath }; + const fm = new Map([['/big.bin', file]]); + await cache.load('static', fm); + const entry = cache.indexes.static.entries.get('/big.bin'); + assert.strictEqual(entry.kind, 'disk'); + assert.strictEqual(entry.data, null); +}); + +test('LimitCache - disk fallback for oversized files', async () => { + const cache = new PerFileCache({ maxFileSize: 1024 * 1024 }); + const data = Buffer.from('hello world'); + const stat = { size: data.byteLength }; + const filesMap = new Map([['/test.js', { data, stat, path: '/test.js' }]]); + await cache.load('static', filesMap); + const entry = cache.indexes.static.entries.get('/test.js'); + assert.strictEqual(entry.kind, 'shared'); assert.ok(entry.sab instanceof SharedArrayBuffer); - assert.strictEqual(entry.byteLength, entry.sab.byteLength); - const data = Buffer.from(entry.sab, 0, entry.byteLength); - assert.ok(data.length > 0); + assert.strictEqual(entry.length, data.byteLength); }); -test('StaticCache - entries have correct structure', async () => { - const appPath = path.join(root, 'test'); - const cache = new StaticCache(appPath, {}); - const entries = await cache.loadPlace('lib'); - for (const entry of entries) { - assert.strictEqual(typeof entry.key, 'string'); - assert.ok(entry.key.startsWith('/')); - assert.strictEqual(typeof entry.byteLength, 'number'); - assert.strictEqual(typeof entry.size, 'number'); - if (entry.sab) { - assert.ok(entry.sab instanceof SharedArrayBuffer); - } - } +test('PerFileCache - project creates Buffer view over SAB', async () => { + const cache = new PerFileCache({ maxFileSize: 1024 * 1024 }); + const data = Buffer.from('test data'); + const stat = { size: data.byteLength }; + const filesMap = new Map([['/f.js', { data, stat, path: '/f.js' }]]); + await cache.load('static', filesMap); + const snap = cache.snapshot(); + assert.strictEqual(snap.segments, null); + const files = PerFileCache.project(snap.indexes.static); + const file = files.get('/f.js'); + assert.ok(file.data instanceof Buffer); + assert.deepStrictEqual(file.data, data); +}); + +test('PerFileCache - disk fallback for oversized', async () => { + const cache = new PerFileCache({ maxFileSize: 10 }); + const data = Buffer.alloc(20); + const stat = { size: 20 }; + const filePath = '/tmp/big.bin'; + const file = { data, stat, path: filePath }; + const fm = new Map([['/big.bin', file]]); + await cache.load('static', fm); + const entry = cache.indexes.static.entries.get('/big.bin'); + assert.strictEqual(entry.kind, 'disk'); }); -test('StaticCache.getPlaceEntries - returns entries', async () => { - const appPath = path.join(root, 'test'); - const cache = new StaticCache(appPath, {}); - await cache.loadPlace('lib'); - const entries = StaticCache.getPlaceEntries(cache, 'lib'); - assert.ok(entries.length > 0); - const missing = StaticCache.getPlaceEntries(cache, 'none'); - assert.strictEqual(missing.length, 0); +test('PerFileCache - free and compact are no-ops', () => { + const cache = new PerFileCache(); + cache.free({ kind: 'shared', sab: new SharedArrayBuffer(4), length: 4 }); + assert.strictEqual(cache.compact(), null); }); // --- Static (worker side) --- -test('Static initCache - populate from SAB entries', () => { - const cache = new Static('lib', application); - const content = Buffer.from('hello world'); - const sab = new SharedArrayBuffer(content.byteLength); - new Uint8Array(sab).set(content); - cache.initCache([ - { - key: '/index.html', - sab, - byteLength: content.byteLength, - size: content.byteLength, - }, - ]); - assert.strictEqual(cache.files.size, 1); - const file = cache.get('/index.html'); +test('Static setFiles - populate from projected entries', () => { + const st = new Static('lib', application); + const data = Buffer.from('hello world'); + const stat = { size: data.byteLength }; + const files = new Map([['/index.html', { data, stat }]]); + st.setFiles(files); + assert.strictEqual(st.files.size, 1); + const file = st.get('/index.html'); assert.ok(file.data instanceof Buffer); - assert.strictEqual(file.data.length, content.byteLength); - assert.deepStrictEqual(file.data, content); - assert.ok(file.sab instanceof SharedArrayBuffer); + assert.deepStrictEqual(file.data, data); }); -test('Static updateEntry - updates SAB entry', () => { - const cache = new Static('lib', application); - const sab1 = new SharedArrayBuffer(9); - new Uint8Array(sab1).set(Buffer.from('version 1')); - cache.initCache([{ key: '/f.js', sab: sab1, byteLength: 9, size: 9 }]); - const content2 = Buffer.from('version 2 updated'); - const sab2 = new SharedArrayBuffer(content2.byteLength); - new Uint8Array(sab2).set(content2); - cache.updateEntry({ - key: '/f.js', - sab: sab2, - byteLength: content2.byteLength, - size: content2.byteLength, - }); - const file = cache.get('/f.js'); - assert.deepStrictEqual(file.data, content2); +test('Static updateFiles - updates entries', () => { + const st = new Static('lib', application); + const data1 = Buffer.from('version 1'); + const stat1 = { size: data1.byteLength }; + st.setFiles(new Map([['/f.js', { data: data1, stat: stat1 }]])); + const data2 = Buffer.from('version 2 updated'); + const stat2 = { size: data2.byteLength }; + st.updateFiles(new Map([['/f.js', { data: data2, stat: stat2 }]])); + const file = st.get('/f.js'); + assert.deepStrictEqual(file.data, data2); }); -test('Static deleteEntry - removes entry by key', () => { - const cache = new Static('lib', application); +test('Static deleteFiles - removes entries by keys', () => { + const st = new Static('lib', application); const sab = new SharedArrayBuffer(4); new Uint8Array(sab).set([1, 2, 3, 4]); - cache.initCache([ - { key: '/a.js', sab, byteLength: 4, size: 4 }, - { key: '/b.js', sab, byteLength: 4, size: 4 }, - ]); - assert.strictEqual(cache.files.size, 2); - cache.deleteEntry('/a.js'); - assert.strictEqual(cache.files.size, 1); - assert.strictEqual(cache.get('/a.js'), undefined); - assert.ok(cache.get('/b.js')); + const data = Buffer.from(sab, 0, 4); + const stat = { size: 4 }; + st.setFiles( + new Map([ + ['/a.js', { data, stat }], + ['/b.js', { data, stat }], + ]), + ); + assert.strictEqual(st.files.size, 2); + st.deleteFiles(['/a.js']); + assert.strictEqual(st.files.size, 1); + assert.strictEqual(st.get('/a.js'), undefined); + assert.ok(st.get('/b.js')); }); -test('Static withData - null sab has null data', () => { - const cache = new Static('lib', application); - cache.initCache([ - { - key: '/big.bin', - sab: null, - byteLength: 0, - size: 20000000, - }, - ]); - const file = cache.get('/big.bin'); +test('Static - disk entry has null data', () => { + const st = new Static('lib', application); + const stat = { size: 20000000 }; + st.setFiles(new Map([['/big.bin', { data: null, stat, path: '/big.bin' }]])); + const file = st.get('/big.bin'); assert.strictEqual(file.data, null); - assert.strictEqual(file.size, 20000000); + assert.strictEqual(file.stat.size, 20000000); }); -test('Static SAB data is zero-copy view', () => { +test('Static - SAB data is zero-copy view', () => { const sab = new SharedArrayBuffer(5); new Uint8Array(sab).set([10, 20, 30, 40, 50]); - const cache = new Static('lib', application); - cache.initCache([{ key: '/f.bin', sab, byteLength: 5, size: 5 }]); - const file = cache.get('/f.bin'); + const data = Buffer.from(sab, 0, 5); + const stat = { size: 5 }; + const st = new Static('lib', application); + st.setFiles(new Map([['/f.bin', { data, stat }]])); + const file = st.get('/f.bin'); assert.strictEqual(file.data.buffer, sab); }); -test('Static initCache clears previous entries', () => { - const cache = new Static('lib', application); - const sab = new SharedArrayBuffer(2); - cache.initCache([{ key: '/old.js', sab, byteLength: 2, size: 2 }]); - assert.strictEqual(cache.files.size, 1); - cache.initCache([{ key: '/new.js', sab, byteLength: 2, size: 2 }]); - assert.strictEqual(cache.files.size, 1); - assert.strictEqual(cache.get('/old.js'), undefined); - assert.ok(cache.get('/new.js')); +test('Static setFiles clears previous entries', () => { + const st = new Static('lib', application); + const data = Buffer.from([1, 2]); + const stat = { size: 2 }; + st.setFiles(new Map([['/old.js', { data, stat }]])); + assert.strictEqual(st.files.size, 1); + st.setFiles(new Map([['/new.js', { data, stat }]])); + assert.strictEqual(st.files.size, 1); + assert.strictEqual(st.get('/old.js'), undefined); + assert.ok(st.get('/new.js')); }); diff --git a/test/static.js b/test/static.js index 7d3373f8c..9b8d534cc 100644 --- a/test/static.js +++ b/test/static.js @@ -3,6 +3,7 @@ const { test } = require('node:test'); const assert = require('node:assert'); const path = require('node:path'); +const { Readable } = require('node:stream'); const { Static } = require('../lib/static.js'); const root = process.cwd(); @@ -15,20 +16,209 @@ const application = { }, }; -test('lib/static load - should load static files correctly', async () => { - const cache = new Static('lib', application); - assert.strictEqual(cache.files instanceof Map, true); - assert.strictEqual(cache.files.size, 0); - assert.strictEqual(cache.ext, undefined); - assert.strictEqual(cache.maxFileSize, -1); - assert.strictEqual(cache.get('/example/add.js'), undefined); - - await cache.load(); - assert.strictEqual(cache.files.size, 13); - const file = cache.get('/example/add.js'); - assert.strictEqual(file.data instanceof Buffer, true); - assert.strictEqual(file.data.length, 158); - assert.strictEqual(cache.get('/example/unknown.js'), undefined); - assert.strictEqual(cache.ext, undefined); - assert.strictEqual(cache.maxFileSize, 10000000); +// Capture transport.write() calls +const makeTransport = (headers = {}) => { + const result = {}; + return { + req: { headers }, + write(data, code, ext, options) { + result.data = data; + result.code = code; + result.ext = ext; + result.options = options; + }, + result, + }; +}; + +// Build a SAB-backed file entry matching the projection shape from cache backends +const makeSABFile = (raw) => { + const sab = new SharedArrayBuffer(raw.byteLength); + new Uint8Array(sab).set(raw); + const data = Buffer.from(sab, 0, raw.byteLength); + const stat = { size: raw.byteLength, isFile: () => true }; + return { data, stat }; +}; + +// Build a Static with serving initialized; override threshold after if needed +const makeStatic = () => { + const st = new Static('lib', application); + st.initServing({}); + return st; +}; + +// --- Constructor --- + +test('lib/static - should create Static correctly', () => { + const st = new Static('lib', application); + assert.strictEqual(st.files instanceof Map, true); + assert.strictEqual(st.files.size, 0); + assert.strictEqual(st.get('/example/add.js'), undefined); +}); + +// --- initServing --- + +test('lib/static - initServing sets search function and finite streamThreshold', () => { + const st = makeStatic(); + assert.strictEqual(typeof st.search, 'function'); + assert.ok(Number.isFinite(st.streamThreshold)); + assert.ok(st.streamThreshold > 0); +}); + +// --- serve: exact-hit, small file (200 + Buffer) --- + +test('lib/static serve - exact-hit small file returns buffer and 200', async () => { + const st = makeStatic(); + const file = makeSABFile(Buffer.from('hello')); + st.setFiles(new Map([['/f.txt', file]])); + const t = makeTransport(); + await st.serve('/f.txt', t); + assert.strictEqual(t.result.code, 200); + assert.ok(Buffer.isBuffer(t.result.data)); + assert.strictEqual(t.result.ext, 'txt'); + assert.deepStrictEqual(t.result.data, file.data); +}); + +// --- serve: exact-hit, large file above threshold (200 + Readable) --- + +test('lib/static serve - large file above threshold streams as Readable', async () => { + const st = makeStatic(); + st.streamThreshold = 100; + const raw = Buffer.alloc(200, 0x41); // 200 bytes > threshold + const file = makeSABFile(raw); + st.setFiles(new Map([['/big.bin', file]])); + const t = makeTransport(); + await st.serve('/big.bin', t); + assert.strictEqual(t.result.code, 200); + assert.ok(t.result.data instanceof Readable); + assert.deepStrictEqual(t.result.options, { size: 200 }); +}); + +test('lib/static serve - streamed response delivers correct bytes', async () => { + const st = makeStatic(); + st.streamThreshold = 100; + const raw = Buffer.alloc(200, 0x42); + const file = makeSABFile(raw); + st.setFiles(new Map([['/big.bin', file]])); + const t = makeTransport(); + await st.serve('/big.bin', t); + const chunks = []; + for await (const chunk of t.result.data) chunks.push(chunk); + assert.deepStrictEqual(Buffer.concat(chunks), raw); +}); + +// --- serve: exact-hit, Range request, small file (206 + subarray) --- + +test('lib/static serve - range request on small file returns 206 with subarray', async () => { + const st = makeStatic(); + const raw = Buffer.from('0123456789'); + const file = makeSABFile(raw); + st.setFiles(new Map([['/data.bin', file]])); + const t = makeTransport({ range: 'bytes=2-5' }); + await st.serve('/data.bin', t); + assert.strictEqual(t.result.code, 206); + assert.ok(Buffer.isBuffer(t.result.data)); + assert.strictEqual(t.result.data.toString(), '2345'); + assert.deepStrictEqual(t.result.options, { start: 2, end: 5, size: 10 }); +}); + +// --- serve: exact-hit, Range request, large file (206 + Readable) --- + +test('lib/static serve - range request on large file returns 206 with Readable', async () => { + const st = makeStatic(); + st.streamThreshold = 100; + const raw = Buffer.alloc(200, 0x43); + const file = makeSABFile(raw); + st.setFiles(new Map([['/big.bin', file]])); + const t = makeTransport({ range: 'bytes=0-99' }); + await st.serve('/big.bin', t); + assert.strictEqual(t.result.code, 206); + assert.ok(t.result.data instanceof Readable); + assert.deepStrictEqual(t.result.options, { start: 0, end: 99, size: 200 }); +}); + +// --- serve: invalid range → 416 --- + +test('lib/static serve - invalid range (start >= size) returns 416', async () => { + const st = makeStatic(); + const file = makeSABFile(Buffer.from('hello')); + st.setFiles(new Map([['/f.txt', file]])); + const t = makeTransport({ range: 'bytes=10-20' }); // start 10 >= size 5 + await st.serve('/f.txt', t); + assert.strictEqual(t.result.code, 416); + assert.ok(Buffer.isBuffer(t.result.data)); +}); + +// --- serve: query string is stripped --- + +test('lib/static serve - query string is stripped from path', async () => { + const st = makeStatic(); + const file = makeSABFile(Buffer.from('body {}')); + st.setFiles(new Map([['/style.css', file]])); + const t = makeTransport(); + await st.serve('/style.css?v=42', t); + assert.strictEqual(t.result.code, 200); + assert.deepStrictEqual(t.result.data, file.data); +}); + +// --- serve: zero-byte file --- + +test('lib/static serve - zero-byte SAB file is served without error', async () => { + const st = makeStatic(); + const sab = new SharedArrayBuffer(0); + const data = Buffer.from(sab, 0, 0); + const stat = { size: 0, isFile: () => true }; + st.setFiles(new Map([['/empty.txt', { data, stat }]])); + const t = makeTransport(); + await st.serve('/empty.txt', t); + assert.strictEqual(t.result.code, 200); + assert.ok(Buffer.isBuffer(t.result.data)); + assert.strictEqual(t.result.data.byteLength, 0); +}); + +// --- serve: recursive lookup — directory → index.html --- + +test('lib/static serve - directory path resolves to index.html (200, full buffer)', async () => { + const st = makeStatic(); + const file = makeSABFile(Buffer.from('')); + st.setFiles(new Map([['/dir/index.html', file]])); + const t = makeTransport(); + await st.serve('/dir/', t); + assert.strictEqual(t.result.code, 200); + assert.deepStrictEqual(t.result.data, file.data); +}); + +test('lib/static serve - index.html via directory lookup ignores Range header', async () => { + // Verifies the documented intentional omission of Range handling in recursive path + const st = makeStatic(); + const file = makeSABFile(Buffer.from('')); + st.setFiles(new Map([['/dir/index.html', file]])); + const t = makeTransport({ range: 'bytes=0-3' }); + await st.serve('/dir/', t); + // Must be 200, NOT 206 — recursive path skips Range handling by design + assert.strictEqual(t.result.code, 200); + assert.deepStrictEqual(t.result.data, file.data); +}); + +// --- serve: internal files (starting with '.') bypass exact-hit path --- + +test('lib/static serve - internal file bypasses exact-hit and ignores Range', async () => { + const st = makeStatic(); + const file = makeSABFile(Buffer.from('

custom 404

')); + st.setFiles(new Map([['/dir/.404.html', file]])); + const t = makeTransport({ range: 'bytes=0-3' }); + await st.serve('/dir/.404.html', t); + // Internal file goes through lookup, not exact-hit — no Range handling → no 206 + assert.strictEqual(t.result.code, 200); + assert.ok(Buffer.isBuffer(t.result.data)); +}); + +// --- serve: 404 --- + +test('lib/static serve - returns 404 for missing file', async () => { + const st = makeStatic(); + const t = makeTransport(); + await st.serve('/not-found.txt', t); + assert.strictEqual(t.result.code, 404); + assert.ok(Buffer.isBuffer(t.result.data)); }); From 514e21eedfc0c5354efff2994b7bea602dd2e5c0 Mon Sep 17 00:00:00 2001 From: turone Date: Fri, 24 Apr 2026 16:01:29 +0300 Subject: [PATCH 8/9] Update lib/cache/LimitCache.js pool is not a collection of segments, it is class class Pool { constructor(limit, baseSegmentSize) { this.limit = limit; this.baseSegmentSize = baseSegmentSize; this.segments = new Map(); this.emptySegmentIds = new Set(); this.totalUsed = 0; this.nextSegmentId = 1; } Co-authored-by: Timur Shemsedinov --- lib/cache/LimitCache.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cache/LimitCache.js b/lib/cache/LimitCache.js index 3d7f25c70..9f8da21b6 100644 --- a/lib/cache/LimitCache.js +++ b/lib/cache/LimitCache.js @@ -55,7 +55,7 @@ class Pool { class Registry { constructor(pool) { - this.pool = pool; + this.all = segments; this.freeExtents = new Map(); this.tails = new Map(); } From e2b4e8649062357d69ea4f667e025ce0f133d6ee Mon Sep 17 00:00:00 2001 From: Turone Date: Sat, 25 Apr 2026 13:16:26 +0300 Subject: [PATCH 9/9] refactor: address review comments, make cache module autonomous --- VFS-SAB.md | 1057 +++++++++-------- impress.js | 21 +- lib/application.js | 8 +- .../{LimitCache.js => FilesystemCache.js} | 155 +-- lib/cache/PerFileCache.js | 108 -- lib/cache/PlacementSource.js | 39 +- lib/cache/SharedCache.js | 165 +-- lib/static.js | 5 +- lib/worker.js | 20 +- schemas/config/cache.js | 1 - test/cache-shared.js | 86 +- 11 files changed, 750 insertions(+), 915 deletions(-) rename lib/cache/{LimitCache.js => FilesystemCache.js} (71%) delete mode 100644 lib/cache/PerFileCache.js diff --git a/VFS-SAB.md b/VFS-SAB.md index cd2b9461d..8fb73f1c8 100644 --- a/VFS-SAB.md +++ b/VFS-SAB.md @@ -1,507 +1,550 @@ -# SharedArrayBuffer Cache in Impress - -## Motivation - -Impress uses `worker_threads` to handle HTTP requests. Each worker serves static files (HTML, CSS, JS, images, etc.). Without shared memory, every worker keeps its own copy of every file — with 8 workers and 100 MiB of static assets, total consumption reaches 800 MiB. SharedArrayBuffer stores all files in shared memory accessible to all threads. - -## Benchmarks - -Baseline: no SAB (per-worker file copies). Delta columns show improvement relative to baseline — positive values are better even for latency and memory metrics. - -| File | Metric | Before (no SAB) | SAB limit mode | Delta limit | SAB per-file mode | Delta per-file | -| --- | --- | ---: | ---: | ---: | ---: | ---: | -| bench-64k.bin | RPS | 1767.14 | 1663.80 | -5.85% | 1711.10 | -3.17% | -| bench-64k.bin | Throughput MB/s | 111.16 | 104.65 | -5.86% | 107.66 | -3.15% | -| bench-64k.bin | p95 ms | 330.00 | 321.00 | +2.73% | 62.00 | +81.21% | -| bench-256k.bin | RPS | 507.04 | 507.21 | +0.03% | 494.04 | -2.56% | -| bench-256k.bin | Throughput MB/s | 126.97 | 127.02 | +0.04% | 123.73 | -2.55% | -| bench-256k.bin | p95 ms | 752.00 | 770.00 | -2.39% | 254.00 | +66.22% | -| bench-1m.bin | RPS | 127.80 | 128.71 | +0.71% | 167.16 | +30.80% | -| bench-1m.bin | Throughput MB/s | 127.84 | 128.79 | +0.74% | 167.21 | +30.80% | -| bench-1m.bin | p95 ms | 1976.00 | 2047.00 | -3.59% | 1017.00 | +48.53% | -| bench-5m.bin | RPS | 83.12 | 113.72 | +36.81% | 100.58 | +21.01% | -| bench-5m.bin | Throughput MB/s | 415.72 | 568.79 | +36.82% | 503.00 | +20.99% | -| bench-5m.bin | p95 ms | 23172.00 | 13300.00 | +42.60% | 5187.00 | +77.62% | -| bench-10m.bin | RPS | 79.60 | 93.40 | +17.34% | 99.75 | +25.31% | -| bench-10m.bin | Throughput MB/s | 759.08 | 891.02 | +17.38% | 951.38 | +25.33% | -| bench-10m.bin | p95 ms | 25680.00 | 23264.00 | +9.41% | 9827.00 | +61.73% | - -| System metric | Before (no SAB) | SAB limit mode | Delta limit | SAB per-file mode | Delta per-file | -| --- | ---: | ---: | ---: | ---: | ---: | -| CPU max % | 22.91 | 21.82 | +4.76% | 21.03 | +8.21% | -| Working set max MB | 5066.02 | 1207.50 | +76.16% | 1256.71 | +75.19% | -| Private max MB | 5110.43 | 1360.43 | +73.38% | 1297.94 | +74.60% | - -## Architecture - -The system is split into four modules behind one orchestrator: - -| Module | Location | Purpose | -|--------|----------|---------| -| **SharedCache** | `lib/cache/SharedCache.js` | Orchestration: mode selection, configuration, watcher, ACK tracking, compaction dispatch | -| **LimitCache** | `lib/cache/LimitCache.js` | Limit-mode backend: slab allocator with pooled SAB segments, extent-based allocation, compaction | -| **PerFileCache** | `lib/cache/PerLimitCache.js` | Per-file-mode backend: one SAB per file, no pooling, no ACK | -| **PlacementSource** | `lib/cache/PlacementSource.js` | Filesystem scanner, returns `{ stat, path }` per file | - -`SharedCache` selects the backend once at construction time based on `config.cache.mode` and delegates all allocation, snapshot, projection, free, and compact operations to it. - -`LimitCache` has no dependencies on Node.js built-ins or Impress internals. This allows it to be used in a browser or in tests without mocks. - -## Dual-mode design - -The cache supports two backends, selectable via `config.cache.mode`: - -| | **Limit mode** (default) | **Per-file mode** | -|---|---|---| -| Backend class | `LimitCache` | `PerFileCache` | -| SAB strategy | Pooled segments (slab allocator) | One SAB per file | -| ACK required | Yes | No | -| Compaction | Yes — defragments segments | No — `compact()` returns `null` | -| Free | Deferred until all workers ACK | Immediate (GC when references drop) | -| Epoch flush | `#flushEpochWithAck` (tracks `updateId`) | `#flushEpochFireAndForget` (no `updateId`) | -| Snapshot segments | `[{ id, sab }, ...]` | `null` | -| Shared entry shape | `{ kind: 'shared', segmentId, offset, length, stat }` | `{ kind: 'shared', sab, length, stat }` | - -Worker-side code detects the mode by the presence of `sharedCache.segments` in the snapshot. - -`needsAck` is not a property of the backends; `SharedCache` derives it directly from `mode === 'limit'`. - ---- - -## Limit mode: Slab Allocator - -The memory management model follows the Linux SLUB allocator principle: SharedArrayBuffer segments are **never returned to the OS**. Instead, they go through a lifecycle: - -``` ------------¬ files deleted -----------¬ memory needed -----------¬ -¦ Active ¦ ------------------> ¦ Clean ¦ ------------------> ¦ Active ¦ -¦ (data) ¦ ¦ (empty) ¦ ¦ (data) ¦ -L----------- L----------- L----------- -``` - -**Why SABs are never freed:** V8 cannot reduce reserved virtual memory after a SharedArrayBuffer is deallocated. Recreating a SAB of the same size still allocates a new page. Retaining empty segments (`cleanSegmentIds`) and reusing them completely eliminates allocation system calls. - -### Internal classes in LimitCache.js - -#### Pool - -Manages the memory budget and the set of SAB segments. - -``` -Pool -+-- segments: Map — all segments -+-- cleanSegmentIds: Set — empty, ready for reuse -+-- limit: number — total budget (default 1 GiB) -+-- baseSegmentSize: number — segment size (default 64 MiB) -+-- totalUsed: number — total size of all SABs -¦ -+-- createBaseSegment() — takes a clean segment or creates a new SAB -+-- retireSegment(id) — marks an empty segment as clean -L-- getSegment(id) — access by ID -``` - -Key detail: `baseSegmentSize = Math.ceil(maxFileSize / configured) * configured`. The segment size is rounded up to the nearest multiple of the configured value that can fit `maxFileSize`. For example, with `configured = 64 MiB` and `maxFileSize = 100 MiB`, the segment size becomes `128 MiB` (2 ? 64). When `maxFileSize` is smaller than `configured` (the typical case, e.g. `10 MB` and `64 MiB`), the segment size stays at `configured`. The `limit` must be evenly divisible by the effective segment size — otherwise the remainder is wasted. There are no dedicated segments — a single segment type serves all files. - -#### Registry - -Extent-based allocator within base segments. Each segment is tracked via: - -- **`tails: Map`** — boundary of written data (high water mark) -- **`freeExtents: Map>`** — freed regions - -`allocate(size, noCreate=false)` algorithm: - -``` -1. Best-fit search across free extents of all segments - > found a match > return { segmentId, offset } - > exact size match > remove the extent - > larger than needed > shrink the extent - -2. Tail-append: find a segment where tail + size ? baseSegmentSize - > found > advance tail, return - -3. New segment (if !noCreate): - > pool.createBaseSegment() > registerSegment > tail = size - > budget exhausted > return null (file becomes a disk entry) -``` - -With `noCreate=true` (used by compact), step 3 is skipped — data is only moved into existing segments. - -`free()` inserts an extent into a sorted list and merges adjacent ones: - -``` -[100..200] + [200..300] > [100..300] // merge right -[0..100] + [100..300] > [0..300] // merge left -``` - -### Entry types (limit mode) - -**Shared entry** — file in SAB: -```js -{ kind: 'shared', segmentId, offset, length, stat } -``` -Zero-byte files are shared entries with `segmentId: 0, offset: 0, length: 0` — no segment is allocated. - -**Disk entry** — file on disk (size > maxFileSize, or budget exhausted): -```js -{ kind: 'disk', path, stat, data: null } -``` - ---- - -## Per-file mode - -Each file with `size <= maxFileSize` gets its own `SharedArrayBuffer`. No pool, no allocator metadata, no compaction, no ACK flow. - -### Entry types (per-file mode) - -**Shared entry** — file in its own SAB: -```js -{ kind: 'shared', sab, length, stat } -``` -Zero-byte files use `new SharedArrayBuffer(0)` — a valid zero-length SAB. - -**Disk entry** — same as limit mode: -```js -{ kind: 'disk', path, stat, data: null } -``` - -`free()` and `compact()` are no-ops. Snapshot returns `{ segments: null, indexes: {...} }`. - ---- - -## File loading - -### Initial load (main thread) - -``` -SharedCache.initialize() - > for each placement: - source.load() // PlacementSource scans the directory - cache.load(name, files) // Backend distributes files across SABs -``` - -If shared cache initialization fails (configuration, filesystem, or reader error), application startup is aborted — there is no fallback to per-worker static loading. Empty placements are valid: initialization succeeds with an empty index and zero allocated segments. - -In limit mode, `load()` sorts files by descending size — large files are placed first, reducing fragmentation. For each file, `#allocateEntry()` is called: - -1. `size > maxFileSize` > disk entry -2. No data and no reader > disk entry -3. `size === 0` > shared entry with `segmentId: 0, offset: 0, length: 0` (no segment allocated) -4. `registry.allocate(size)` > obtains `{ segmentId, offset }` — free space in a segment -5. `reader(path, sab, offset, size)` > reads the file from disk directly into SAB, bypassing the heap; if `data` is already in memory — copies via `Uint8Array.set(data)` - -In per-file mode, `#allocateEntry()` creates a new `SharedArrayBuffer(size)` for each file (or `SharedArrayBuffer(0)` for zero-byte files) and reads the content into it directly. - -The reader is injected when SharedCache is created — it is `async (path, sab, offset, size) => void`. In Node.js it is implemented via `fh.read(Buffer.from(sab, offset, size))` — a Buffer view is created over the SharedArrayBuffer region, and `fs` writes data directly there. - -### Delivery to workers - -``` -workerData.sharedCache = cache.snapshot() -``` - -Limit mode snapshot: -```js -{ segments: [{ id, sab }, ...], indexes: { placement: { entries: [...] } } } -``` - -Per-file mode snapshot: -```js -{ segments: null, indexes: { placement: { entries: [...] } } } -``` - -SharedArrayBuffer is passed via `workerData` — V8 transfers only a reference, no data copying occurs. - -## Worker-side projection - -The worker detects the backend mode by the presence of `sharedCache.segments`: - -```js -// worker.js — module scope -if (sharedCache && sharedCache.segments) { - // Limit mode: build segmentsMap, use LimitCache.projectEntry - const { LimitCache } = require('./cache/LimitCache.js'); - for (const seg of sharedCache.segments) segmentsMap.set(seg.id, seg.sab); - projectEntry = (entry) => LimitCache.projectEntry(entry, segmentsMap); -} else if (sharedCache) { - // Per-file mode: use PerFileCache.projectEntry - const { PerFileCache } = require('./cache/PerLimitCache.js'); - projectEntry = (entry) => PerFileCache.projectEntry(entry); -} -``` - -ACK function is also mode-dependent: - -```js -const sendAck = sharedCache && sharedCache.segments - ? (updateId) => parentPort.postMessage({ name: 'ack-update', updateId }) - : () => {}; // no-op in per-file mode -``` - -### Limit mode projection - -Each shared entry is projected into an object with an eager Buffer view: - -```js -{ data: Buffer.from(segmentsMap.get(segmentId), offset, length), stat } -``` - -Zero-byte entries (`length === 0`) are projected as `{ data: Buffer.alloc(0), stat }` without consulting `segmentsMap`. `free()` also skips zero-byte entries — they hold no segment allocation. - -`Buffer.from(sab, offset, length)` creates a lightweight view (~64 bytes descriptor) over the SAB region — no data copy. The view is created once at projection time. Since segments are never freed (slab retention), SAB references in `segmentsMap` live for the entire process lifetime. When a file is removed via `deleteFiles`, the projected object loses its last reference and is GC'd along with the Buffer view. Stale data in the segment is overwritten upon reuse. - -### Per-file mode projection - -Each shared entry is projected similarly, but the SAB is stored directly on the entry: - -```js -{ data: Buffer.from(entry.sab, 0, length), stat } -``` - -No `segmentsMap` is needed — each entry carries its own SAB reference. - -### Common - -Disk entries in both modes are projected as `{ data: null, stat, path }`. - -## Hot-reload: epoch-based delta updates - -metawatch debounces filesystem events, collecting them into a batch during a quiet period. SharedCache uses **epoch coalescing** on top of this: all changes and deletions in a single metawatch batch are collected into one epoch, then flushed as minimal broadcasts. - -Routing from a filesystem event to a placement is done by the first path segment relative to application root, not by absolute-path prefix matching. This avoids collisions such as `static` vs `static2`. - -``` -metawatch SharedCache - ¦ ¦ - +-- debounce fs.watch events ¦ - +-- 'before' ------------------> epoch = { updates, deletes, oldEntries } - +-- 'change' file1 -----------> push processChange() promise - +-- 'change' file2 -----------> push processChange() promise - +-- 'delete' file3 -----------> processDelete() (sync) - +-- 'after' -------------------> Promise.all > flushEpoch() -``` - -`#flushEpoch` is set once during `watch()` — it is `#flushEpochWithAck` in limit mode and `#flushEpochFireAndForget` in per-file mode. - -### Limit mode flush - -Sends at most `2 ? placements` messages (one `file-update` and one `file-delete` per placement), each carrying an `updateId`. All old entries are tracked against the **last** `updateId` — since `worker_threads` guarantees FIFO ordering, an ACK for the last message implies all prior messages have been processed. - -``` -flushEpochWithAck: - 1 file-update per placement (entries + newSegments + updateId) - 1 file-delete per placement (keys + updateId) - trackUpdate(lastUpdateId, all old entries) -``` - -1000 file changes > 1 broadcast with 1000 entries > N workers receive 1 message > N ACKs > 1 free cycle. - -### Per-file mode flush - -Sends the same `file-update` and `file-delete` messages but without `updateId` or `newSegments`. Old entries from the epoch are not tracked — memory is freed by GC when old SABs lose their last reference. - -``` -flushEpochFireAndForget: - 1 file-update per placement (entries only) - 1 file-delete per placement (keys only) -``` - -### ACK protocol (limit mode only) - -Old entries are not freed immediately — a worker may be reading data at the moment of an update. Protocol: - -``` -Main thread Workers - ¦ ¦ - +-- file-update (updateId=5) -----> ¦ - +-- file-delete (updateId=6) -----> ¦ - ¦ +-- apply update, ack 5 - ¦ <-------------------- ack 5 (ignored, not tracked) - ¦ +-- apply delete, ack 6 - ¦ <-------------------- ack 6 -----+ - ¦ ... all workers acked 6 ... ¦ - +-- free(all oldEntries) ¦ - +-- tryCompact() ¦ - L------------------------------------- -``` - -If a worker crashes, the `worker.exit` event triggers `sharedCache.handleWorkerExit(id)`, which immediately removes the worker from all pending ACK sets. If it was the last expected worker, `free` is called right away. The new worker is restarted and receives a fresh `snapshot()`. - -There is no timeout-based forced free — a live worker will always eventually process its message queue and send an ACK. Forced free of a slow-but-alive worker would risk data corruption: the freed extent could be reused by another file while the worker's Buffer view still points to it. - -## Compaction (limit mode only) - -After entries are freed, `compact(threshold=0.3)` is called: - -1. Finds the base segment with the lowest utilization below `threshold` -2. Requires at least 2 base segments (a single segment has nowhere to compact to) -3. Attempts to move all files from the target segment into others (via `allocate(size, noCreate=true)`) -4. On success — updates indexes, groups moved files by placement, sends one `file-update` per affected placement, and tracks all `oldEntries` against the **last** `updateId` of the compaction batch -5. On failure — full rollback: restores extents and tail of the target segment - -Compaction uses the same batch-first ACK rule as epoch flush: workers may receive several `file-update` messages from one compaction, but memory is released only after the ACK for the last message in that batch. - -After compaction, the emptied segment automatically enters `cleanSegmentIds` through the normal `free > retireSegment` cycle. - -``` -Before compaction: - -Segment 1: [fileA][____][fileB][________] utilization 20% -Segment 2: [fileC][fileD][______________] utilization 60% - -After: - -Segment 1: > clean (empty, ready for reuse) -Segment 2: [fileC][fileD][fileA][fileB][_] utilization 80% -``` - -## Serving (lib/static.js) - -`Static` is the worker-side serving layer. Each shared-cache placement creates a `Static` instance that holds projected `files` and handles HTTP responses. - -### Initialization - -`initServing(config)` is called after projection. It reads two config options: - -- **`streamThreshold`** — file size above which responses are streamed rather than written as a single buffer. Default: `'1 mb'`. Accepts any size unit (`sizeToBytes`). -- **`virtualFS`** — enables recursive virtual filesystem resolution. Default: `false`. - -When `virtualFS` is **off** (default): -- `search` = `lookup()` — exact match + `index.html` for directory paths -- `errorPage` — generates a minimal HTML page (`

404 Not Found

`) - -When `virtualFS` is **on**: -- `search` = `find()` — walks up the directory tree looking for `index.html`, `.virtual.html`, `.{code}.html` -- `errorPage` — searches for custom error pages (`.404.html`, `.416.html`) in the file tree - -### Serve flow - -``` -serve(url, transport) - ¦ - +- 1. Fast exact-hit (file has data + stat, not internal) - ¦ +-- Range request? > validate > stream or subarray > 206 - ¦ +-- size > streamThreshold? > createSABStream() > 200 - ¦ L-- small file > transport.write(data) > 200 - ¦ - +- 2. Recursive search via lookup() or find() - ¦ +-- file with data + stat (e.g. index.html via directory path) > write directly - ¦ L-- file with data only (status/virtual pages) > write directly - ¦ Range and streaming intentionally omitted — always small HTML files - ¦ - +- 3. Disk fallback (uncached or oversized) - ¦ +-- Range request? > validate > fs.createReadStream(options) > 206 - ¦ L-- fs.createReadStream() > 200 - ¦ - L- 4. 404 -``` - -### SAB streaming - -When a cached file exceeds `streamThreshold`, it is sent via `createSABStream()` which reads 64 KiB chunks from the SAB Buffer view. This applies to both full responses and range requests. Files below the threshold are written as a single buffer (or `subarray` for ranges). - -### Range requests - -Supported in the exact-hit and disk fallback paths: -- Valid range > 206 Partial Content (stream or subarray depending on size vs threshold) -- Invalid range (`start >= end`, `start >= size`, `end >= size`) > 416 Range Not Satisfiable - -Range requests reaching the recursive search path (step 2) are ignored by design — those paths resolve only small HTML files that are always served in full. - -### Disk fallback - -Files with `data: null` (oversized or budget-exhausted) are served from disk via `fs.createReadStream()`, with range support via `start`/`end` options. - -## Configuration - -```js -// config/cache.js -({ - mode: 'limit', // 'limit' (default) or any other value for per-file mode - maxFileSize: '10 mb', // files larger than this > disk entry - streamThreshold: '1 mb', // files larger than this > streamed in chunks (default '1 mb') - virtualFS: false, // enable recursive virtual FS resolution (default false) - placements: [ - { name: 'static' }, - { name: 'resources' }, - { name: 'assets', ext: ['.png', '.jpg', '.woff2'] }, - ], - // Limit mode options (ignored in per-file mode): - sab: { - limit: '1 gib', // total SAB budget (must be divisible by segment size) - baseSegmentSize: '64 mib', // single segment size (must be ? maxFileSize) - }, -}); -``` - -All size values support both binary (KiB, MiB, GiB) and decimal (KB, MB, GB) units. - -The entire `cache` section is optional — when absent, all defaults apply (`mode: 'limit'`, `maxFileSize: '10 mb'`, `streamThreshold: '1 mb'`, `virtualFS: false`, `sab.limit: '1 gib'`, `sab.baseSegmentSize: '64 mib'`, placements: `static` + `resources`). - -**Important (limit mode):** `limit` must be evenly divisible by the effective `baseSegmentSize`. Otherwise the remainder is wasted — Pool cannot create a segment smaller than `baseSegmentSize`. The effective segment size is `Math.ceil(maxFileSize / configured) * configured`. - -## Safety invariants - -**Common (both modes):** -- Workers **never write** to SharedArrayBuffer -- All worker Buffer views are zero-copy descriptors over shared memory - -**Limit mode:** -- Old memory is freed **only after ACK** from all workers or worker exit -- SAB references in worker `segmentsMap` live for the entire process lifetime (slab retention) -- All worker Buffer views reference SABs from a **single** `segmentsMap` (not a copy) -- SAB segments are **never returned to the OS** — only reused -- Total memory usage is always ? `limit` - -**Per-file mode:** -- Each SAB is independent — no cross-file memory sharing -- Old SABs are freed by GC when all workers drop their references -- No memory budget enforcement — total usage scales with total file size - -## Patterns and influences - -The cache design draws on several well-known systems patterns: - -- **SLUB slab allocator** (Linux kernel) — segments are never returned to the OS; empty segments are marked clean and reused, eliminating allocation system calls (limit mode) -- **Extent-based allocation** (ext4, XFS) — free space tracked as `{ offset, length }` extents with best-fit search and adjacent merge on free (limit mode) -- **Event coalescing / group commit** (PostgreSQL WAL, Nagle's algorithm) — metawatch debounces fs events into batches, SharedCache coalesces each batch into minimal broadcasts via epoch flush (both modes) -- **Copy-on-write update** (MVCC) — file updates allocate a new extent, old data lives until all workers ACK; readers never see partial writes (limit mode) -- **Dependency injection** — LimitCache accepts an injectable `reader` function, keeping it free of Node.js built-in dependencies for cross-platform use - -## Data flow diagram - -``` - -----------------------------------¬ - ¦ Main Thread ¦ - ¦ ¦ - ¦ SharedCache (orchestrator) ¦ - ¦ +-- mode detection ¦ - ¦ +-- LimitCache (limit mode) ¦ - ¦ ¦ +-- Pool ¦ - ¦ ¦ ¦ L-- SAB segments ¦ - ¦ ¦ L-- Registry ¦ - ¦ ¦ L-- extents/tails ¦ - ¦ +-- PerFileCache (per-file mode) ¦ - ¦ ¦ L-- individual SABs ¦ - ¦ +-- PlacementSource[] ¦ - ¦ L-- Watcher ¦ - L----------T------------------------ - ¦ - snapshot / file-update / file-delete - ¦ - -------------------+------------------¬ - ¡ ¡ ¡ - --------------¬ --------------¬ --------------¬ - ¦ Worker 1 ¦ ¦ Worker 2 ¦ ¦ Worker N ¦ - ¦ ¦ ¦ ¦ ¦ ¦ - ¦ mode detect ¦ ¦ mode detect ¦ ¦ mode detect ¦ - ¦ project() ¦ ¦ project() ¦ ¦ project() ¦ - ¦ ¦ ¦ ¦ ¦ ¦ - ¦ place.files ¦ ¦ place.files ¦ ¦ place.files ¦ - ¦ (views) ¦ ¦ (views) ¦ ¦ (views) ¦ - L-------------- L-------------- L-------------- - ¦ ¦ ¦ - L---- Buffer.from(sab, ...) ----------- - zero-copy data access -``` +# SharedArrayBuffer Cache in Impress + +## Motivation + +Impress uses `worker_threads` to handle HTTP requests. Each worker serves static files (HTML, CSS, JS, images, etc.). Without shared memory, every worker keeps its own copy of every file � with 8 workers and 100 MiB of static assets, total consumption reaches 800 MiB. SharedArrayBuffer stores all files in shared memory accessible to all threads. + +## Benchmarks + +Baseline: no SAB (per-worker file copies). Delta columns show improvement relative to baseline � positive values are better even for latency and memory metrics. + +# Compare before-sab -> after-sab + +| File | Metric | Before | After | Delta | +| --- | --- | ---: | ---: | ---: | +| bench-64k.bin | RPS | 1767.14 | 1759.49 | -0.43% | +| bench-64k.bin | Throughput MB/s | 111.16 | 110.67 | -0.44% | +| bench-64k.bin | p95 ms | 330.00 | 309.00 | +6.36% | + +| bench-256k.bin | RPS | 507.04 | 507.04 | +0.00% | +| bench-256k.bin | Throughput MB/s | 126.97 | 126.97 | +0.00% | +| bench-256k.bin | p95 ms | 752.00 | 748.00 | +0.53% | + +| bench-1m.bin | RPS | 127.80 | 128.63 | +0.65% | +| bench-1m.bin | Throughput MB/s | 127.84 | 128.71 | +0.68% | +| bench-1m.bin | p95 ms | 1976.00 | 1993.00 | -0.86% | + +| bench-5m.bin | RPS | 83.12 | 114.00 | +37.15% | +| bench-5m.bin | Throughput MB/s | 415.72 | 570.22 | +37.16% | +| bench-5m.bin | p95 ms | 23172.00 | 13236.00 | +42.88% | + +| bench-10m.bin | RPS | 79.60 | 92.00 | +15.58% | +| bench-10m.bin | Throughput MB/s | 759.08 | 877.44 | +15.59% | +| bench-10m.bin | p95 ms | 25680.00 | 22631.00 | +11.87% | + +| System metric | Before | After | Delta | +| --- | ---: | ---: | ---: | +| CPU max % | 22.91 | 23.65 | -3.23% | +| Working set max MB | 5066.02 | 1210.59 | +76.10% | +| Private max MB | 5110.43 | 1358.33 | +73.42% | + +## Architecture + +The system is split into four modules behind one orchestrator: + +| Module | Location | Purpose | +|--------|----------|---------| +| **SharedCache** | `lib/cache/SharedCache.js` | Orchestration: watcher, ACK tracking, compaction dispatch, broadcast | +| **FilesystemCache** | `lib/cache/FilesystemCache.js` | Slab allocator with pooled SAB segments, extent-based allocation, compaction | +| **PlacementSource** | `lib/cache/PlacementSource.js` | Filesystem scanner, returns `{ stat, path }` per file | + +`SharedCache` owns the `FilesystemCache` instance and delegates all allocation, snapshot, projection, free, and compact operations to it. + +`FilesystemCache` has no dependencies on Node.js built-ins. This allows it to be used in a browser or in tests without mocks. + +## Limit mode: Slab Allocator + +The memory management model follows the Linux SLUB allocator principle: SharedArrayBuffer segments are **never returned to the OS**. Instead, they go through a lifecycle: + +``` +-----------� files deleted -----------� memory needed -----------� +� Active � ------------------> � Clean � ------------------> � Active � +� (data) � � (empty) � � (data) � +L----------- L----------- L----------- +``` + +**Why SABs are never freed:** V8 cannot reduce reserved virtual memory after a SharedArrayBuffer is deallocated. Recreating a SAB of the same size still allocates a new page. Retaining empty segments (`emptySegmentIds`) and reusing them completely eliminates allocation system calls. + +### Internal classes in FilesystemCache.js + +#### Pool + +Manages the memory budget and the set of SAB segments. + +``` +Pool ++-- segments: Map � all segments ++-- emptySegmentIds: Set — empty, ready for reuse ++-- limit: number � total budget (default 1 GiB) ++-- baseSegmentSize: number � segment size (default 64 MiB) ++-- totalUsed: number � total size of all SABs +� ++-- createBaseSegment() � takes a clean segment or creates a new SAB ++-- freeSegment(id) — marks an empty segment as clean +L-- getSegment(id) � access by ID +``` + +Key detail: `baseSegmentSize = Math.ceil(maxFileSize / configured) * configured`. The segment size is rounded up to the nearest multiple of the configured value that can fit `maxFileSize`. For example, with `configured = 64 MiB` and `maxFileSize = 100 MiB`, the segment size becomes `128 MiB` (2 ? 64). When `maxFileSize` is smaller than `configured` (the typical case, e.g. `10 MB` and `64 MiB`), the segment size stays at `configured`. The `limit` must be evenly divisible by the effective segment size � otherwise the remainder is wasted. There are no dedicated segments � a single segment type serves all files. + +#### SegmentRegistry + +Extent-based allocator within base segments. Each segment is tracked via: + +- **`partially: Map`** � boundary of written data (high water mark) +- **`empty: Map>`** � freed regions + +`allocate(size, limitReached=false)` algorithm: + +``` +1. Best-fit search across free extents of all segments + > found a match > return { segmentId, offset } + > exact size match > remove the extent + > larger than needed > shrink the extent + +2. Tail-append: find a segment where tail + size ? baseSegmentSize + > found > advance tail, return + +3. New segment (if !limitReached): + > pool.createBaseSegment() > registerSegment > tail = size + > budget exhausted > return null (file becomes a disk entry) +``` + +With `limitReached=true` (used by compact), step 3 is skipped � data is only moved into existing segments. + +`free()` inserts an extent into a sorted list and merges adjacent ones: + +``` +[100..200] + [200..300] > [100..300] // merge right +[0..100] + [100..300] > [0..300] // merge left +``` + +### Entry types (limit mode) + +**Shared entry** � file in SAB: +```js +{ kind: 'shared', segmentId, offset, length, stat } +``` +Zero-byte files are shared entries with `segmentId: 0, offset: 0, length: 0` � no segment is allocated. + +**Disk entry** � file on disk (size > maxFileSize, or budget exhausted): +```js +{ kind: 'disk', path, stat, data: null } +``` + +--- + + +## File loading + +### Initial load (main thread) + +``` +SharedCache.initialize() + > for each placement: + source.load() // PlacementSource scans the directory + cache.load(name, files) // Backend distributes files across SABs +``` + +If shared cache initialization fails (configuration, filesystem, or reader error), application startup is aborted � there is no fallback to per-worker static loading. Empty placements are valid: initialization succeeds with an empty index and zero allocated segments. + +In limit mode, `load()` sorts files by descending size � large files are placed first, reducing fragmentation. For each file, `#allocateEntry()` is called: + +1. `size > maxFileSize` > disk entry +2. No data and no reader > disk entry +3. `size === 0` > shared entry with `segmentId: 0, offset: 0, length: 0` (no segment allocated) +4. `registry.allocate(size)` > obtains `{ segmentId, offset }` � free space in a segment +5. `reader(path, sab, offset, size)` > reads the file from disk directly into SAB, bypassing the heap; if `data` is already in memory � copies via `Uint8Array.set(data)` + + +The reader is injected when SharedCache is created � it is `async (path, sab, offset, size) => void`. In Node.js it is implemented via `fh.read(Buffer.from(sab, offset, size))` � a Buffer view is created over the SharedArrayBuffer region, and `fs` writes data directly there. + +### Delivery to workers + +``` +workerData.sharedCache = cache.snapshot() +``` + +Limit mode snapshot: +```js +{ segments: [{ id, sab }, ...], filesystems: { placement: { entries: [...] } } } +``` + + +SharedArrayBuffer is passed via `workerData` � V8 transfers only a reference, no data copying occurs. + +## Worker-side projection + +```js +const { FilesystemCache } = require('./cache/FilesystemCache.js'); +const segmentsMap = new Map(); +for (const seg of sharedCache.segments) segmentsMap.set(seg.id, seg.sab); +const projectEntry = (entry) => FilesystemCache.projectEntry(entry, segmentsMap); +``` + +ACK: + +```js +const sendAck = (updateId) => parentPort.postMessage({ name: 'ack-update', updateId }); +``` + + +### Limit mode projection + +Each shared entry is projected into an object with an eager Buffer view: + +```js +{ data: Buffer.from(segmentsMap.get(segmentId), offset, length), stat } +``` + +Zero-byte entries (`length === 0`) are projected as `{ data: Buffer.alloc(0), stat }` without consulting `segmentsMap`. `free()` also skips zero-byte entries � they hold no segment allocation. + +`Buffer.from(sab, offset, length)` creates a lightweight view (~64 bytes descriptor) over the SAB region � no data copy. The view is created once at projection time. Since segments are never freed (slab retention), SAB references in `segmentsMap` live for the entire process lifetime. When a file is removed via `deleteFiles`, the projected object loses its last reference and is GC'd along with the Buffer view. Stale data in the segment is overwritten upon reuse. + + +### Common + +Disk entries in both modes are projected as `{ data: null, stat, path }`. + +## Hot-reload: epoch-based delta updates + +metawatch debounces filesystem events, collecting them into a batch during a quiet period. SharedCache uses **epoch coalescing** on top of this: all changes and deletions in a single metawatch batch are collected into one epoch, then flushed as minimal broadcasts. + +Routing from a filesystem event to a placement is done by the first path segment relative to application root, not by absolute-path prefix matching. This avoids collisions such as `static` vs `static2`. + +``` +metawatch SharedCache + � � + +-- debounce fs.watch events � + +-- 'before' ------------------> epoch = { updates, deletes, oldEntries } + +-- 'change' file1 -----------> push processChange() promise + +-- 'change' file2 -----------> push processChange() promise + +-- 'delete' file3 -----------> processDelete() (sync) + +-- 'after' -------------------> Promise.all > flushEpoch() +``` + +`#flushEpoch` delegates to `#flushEpochWithAck`. + +### Limit mode flush + +Sends at most `2 ? placements` messages (one `file-update` and one `file-delete` per placement), each carrying an `updateId`. All old entries are tracked against the **last** `updateId` � since `worker_threads` guarantees FIFO ordering, an ACK for the last message implies all prior messages have been processed. + +``` +flushEpochWithAck: + 1 file-update per placement (entries + newSegments + updateId) + 1 file-delete per placement (keys + updateId) + trackUpdate(lastUpdateId, all old entries) +``` + +1000 file changes > 1 broadcast with 1000 entries > N workers receive 1 message > N ACKs > 1 free cycle. + +### ACK protocol (limit mode only) + +Old entries are not freed immediately � a worker may be reading data at the moment of an update. Protocol: + +``` +Main thread Workers + � � + +-- file-update (updateId=5) -----> � + +-- file-delete (updateId=6) -----> � + � +-- apply update, ack 5 + � <-------------------- ack 5 (ignored, not tracked) + � +-- apply delete, ack 6 + � <-------------------- ack 6 -----+ + � ... all workers acked 6 ... � + +-- free(all oldEntries) � + +-- tryCompact() � + L------------------------------------- +``` + +If a worker crashes, the `worker.exit` event triggers `sharedCache.handleWorkerExit(id)`, which immediately removes the worker from all pending ACK sets. If it was the last expected worker, `free` is called right away. The new worker is restarted and receives a fresh `snapshot()`. + +There is no timeout-based forced free � a live worker will always eventually process its message queue and send an ACK. Forced free of a slow-but-alive worker would risk data corruption: the freed extent could be reused by another file while the worker's Buffer view still points to it. + +## Compaction (limit mode only) + +After entries are freed, `compact(threshold=0.3)` is called: + +1. Finds the base segment with the lowest utilization below `threshold` +2. Requires at least 2 base segments (a single segment has nowhere to compact to) +3. Attempts to move all files from the target segment into others (via `allocate(size, limitReached=true)`) +4. On success � updates indexes, groups moved files by placement, sends one `file-update` per affected placement, and tracks all `oldEntries` against the **last** `updateId` of the compaction batch +5. On failure � full rollback: restores extents and tail of the target segment + +Compaction uses the same batch-first ACK rule as epoch flush: workers may receive several `file-update` messages from one compaction, but memory is released only after the ACK for the last message in that batch. + +After compaction, the emptied segment automatically enters `emptySegmentIds` through the normal `free > freeSegment` cycle. + +``` +Before compaction: + +Segment 1: [fileA][____][fileB][________] utilization 20% +Segment 2: [fileC][fileD][______________] utilization 60% + +After: + +Segment 1: > clean (empty, ready for reuse) +Segment 2: [fileC][fileD][fileA][fileB][_] utilization 80% +``` + +## Serving (lib/static.js) + +`Static` is the worker-side serving layer. Each shared-cache placement creates a `Static` instance that holds projected `files` and handles HTTP responses. + +### Initialization + +`initServing(config)` is called after projection. It reads two config options: + +- **`streamThreshold`** � file size above which responses are streamed rather than written as a single buffer. Default: `'1 mb'`. Accepts any size unit (`sizeToBytes`). +- **`virtualFS`** � enables recursive virtual filesystem resolution. Default: `false`. + +When `virtualFS` is **off** (default): +- `search` = `lookup()` � exact match + `index.html` for directory paths +- `errorPage` � generates a minimal HTML page (`

404 Not Found

`) + +When `virtualFS` is **on**: +- `search` = `find()` � walks up the directory tree looking for `index.html`, `.virtual.html`, `.{code}.html` +- `errorPage` � searches for custom error pages (`.404.html`, `.416.html`) in the file tree + +### Serve flow + +``` +serve(url, transport) + � + +- 1. Fast exact-hit (file has data + stat, not internal) + � +-- Range request? > validate > stream or subarray > 206 + � +-- size > streamThreshold? > createSABStream() > 200 + � L-- small file > transport.write(data) > 200 + � + +- 2. Recursive search via lookup() or find() + � +-- file with data + stat (e.g. index.html via directory path) > write directly + � L-- file with data only (status/virtual pages) > write directly + � Range and streaming intentionally omitted � always small HTML files + � + +- 3. Disk fallback (uncached or oversized) + � +-- Range request? > validate > fs.createReadStream(options) > 206 + � L-- fs.createReadStream() > 200 + � + L- 4. 404 +``` + +### SAB streaming + +When a cached file exceeds `streamThreshold`, it is sent via `createSABStream()` which reads 64 KiB chunks from the SAB Buffer view. This applies to both full responses and range requests. Files below the threshold are written as a single buffer (or `subarray` for ranges). + +### Range requests + +Supported in the exact-hit and disk fallback paths: +- Valid range > 206 Partial Content (stream or subarray depending on size vs threshold) +- Invalid range (`start >= end`, `start >= size`, `end >= size`) > 416 Range Not Satisfiable + +Range requests reaching the recursive search path (step 2) are ignored by design � those paths resolve only small HTML files that are always served in full. + +### Disk fallback + +Files with `data: null` (oversized or budget-exhausted) are served from disk via `fs.createReadStream()`, with range support via `start`/`end` options. + +## Configuration + +```js +// config/cache.js +({ + maxFileSize: '10 mb', // files larger than this > disk entry + streamThreshold: '1 mb', // files larger than this > streamed in chunks (default '1 mb') + virtualFS: false, // enable recursive virtual FS resolution (default false) + placements: [ + { name: 'static' }, + { name: 'resources' }, + { name: 'assets', ext: ['.png', '.jpg', '.woff2'] }, + ], + // Limit mode (slab allocator options): + sab: { + limit: '1 gib', // total SAB budget (must be divisible by segment size) + baseSegmentSize: '64 mib', // single segment size (must be ? maxFileSize) + }, +}); +``` + +All size values support both binary (KiB, MiB, GiB) and decimal (KB, MB, GB) units. + +The entire `cache` section is optional � when absent, all defaults apply (`mode: 'limit'`, `maxFileSize: '10 mb'`, `streamThreshold: '1 mb'`, `virtualFS: false`, `sab.limit: '1 gib'`, `sab.baseSegmentSize: '64 mib'`, placements: `static` + `resources`). + +**Important (limit mode):** `limit` must be evenly divisible by the effective `baseSegmentSize`. Otherwise the remainder is wasted � Pool cannot create a segment smaller than `baseSegmentSize`. The effective segment size is `Math.ceil(maxFileSize / configured) * configured`. + +## Safety invariants + +**Common (both modes):** +- Workers **never write** to SharedArrayBuffer +- All worker Buffer views are zero-copy descriptors over shared memory + +**Limit mode:** +- Old memory is freed **only after ACK** from all workers or worker exit +- SAB references in worker `segmentsMap` live for the entire process lifetime (slab retention) +- All worker Buffer views reference SABs from a **single** `segmentsMap` (not a copy) +- SAB segments are **never returned to the OS** � only reused +- Total memory usage is always ? `limit` + +## Patterns and influences + +The cache design draws on several well-known systems patterns: + +- **SLUB slab allocator** (Linux kernel) � segments are never returned to the OS; empty segments are marked clean and reused, eliminating allocation system calls (limit mode) +- **Extent-based allocation** (ext4, XFS) � free space tracked as `{ offset, length }` extents with best-fit search and adjacent merge on free (limit mode) +- **Event coalescing / group commit** (PostgreSQL WAL, Nagle's algorithm) � metawatch debounces fs events into batches, SharedCache coalesces each batch into minimal broadcasts via epoch flush (both modes) +- **Copy-on-write update** (MVCC) � file updates allocate a new extent, old data lives until all workers ACK; readers never see partial writes (limit mode) +- **Dependency injection** � LimitCache accepts an injectable `reader` function, keeping it free of Node.js built-in dependencies for cross-platform use + +## Data flow diagram + +``` + -----------------------------------� + � Main Thread � + � � + � SharedCache (orchestrator) � + � +-- FilesystemCache � + � � +-- Pool � + � � � L-- SAB segments � + � � L-- SegmentRegistry � + � � L-- extents/tails � + � +-- PlacementSource[] � + � L-- Watcher � + L----------T------------------------ + � + snapshot / file-update / file-delete + � + -------------------+------------------� + � � � + --------------� --------------� --------------� + � Worker 1 � � Worker 2 � � Worker N � + � � � � � � + � mode detect � � mode detect � � mode detect � + � project() � � project() � � project() � + � � � � � � + � place.files � � place.files � � place.files � + � (views) � � (views) � � (views) � + L-------------- L-------------- L-------------- + � � � + L---- Buffer.from(sab, ...) ----------- + zero-copy data access +``` + +## Integration Guide + +The cache module (`lib/cache/`) is self-contained: it depends only on `metawatch` and `metautil` from the Metarhia ecosystem and has no knowledge of the application framework structure or worker lifecycle. + +### Install dependencies + +```sh +npm install metawatch metautil +``` + +### Create and initialize SharedCache + +```js +const { SharedCache } = require('./lib/cache/SharedCache.js'); +const { Worker } = require('node:worker_threads'); + +// threads Map must be created before SharedCache so the closures capture it +const threads = new Map(); + +const cache = new SharedCache({ + // SAB budget — all optional, defaults shown + limit: '1 gib', + baseSegmentSize: '64 mib', + maxFileSize: '10 mb', + + // DirectoryWatcher debounce timeout, ms — optional + watchTimeout: 2000, + + // Directories to serve under `dir` — optional, default: static + resources + placements: [ + { name: 'static' }, + { name: 'resources' }, + { name: 'assets', ext: ['.png', '.jpg', '.woff2'] }, + ], + + // Application root directory (placements live here) + dir: '/path/to/app', + + // Console-compatible logger + console, + + // Called once per message batch to all active workers + broadcast: (data) => { + for (const thread of threads.values()) thread.postMessage(data); + }, + + // Returns iterable of active worker IDs (used to track pending ACKs) + getWorkerIds: () => threads.keys(), +}); + +await cache.initialize(); // scan placements, load files into SAB +cache.watch(); // start filesystem watcher +``` + +### Deliver snapshot to a new worker + +Pass the snapshot in `workerData` before creating the worker. The snapshot contains SAB references — V8 transfers only descriptors, no data is copied. + +```js +const workerData = { + sharedCache: cache.snapshot(), + // ... other workerData fields +}; +const worker = new Worker(workerPath, { workerData }); +threads.set(workerId, worker); + +worker.on('message', (msg) => { + if (msg.name === 'ack-update') cache.handleAck(msg.updateId, workerId); +}); + +worker.on('exit', () => { + cache.handleWorkerExit(workerId); + threads.delete(workerId); +}); +``` + +### Worker side + +```js +const { workerData, parentPort } = require('node:worker_threads'); +const { FilesystemCache } = require('./cache/FilesystemCache.js'); + +const { sharedCache } = workerData; + +// Build segment map from initial snapshot +const segmentsMap = new Map(); +for (const seg of sharedCache.segments) segmentsMap.set(seg.id, seg.sab); + +// Project a placement into a files Map (key -> { data: Buffer|null, stat, path? }) +const files = FilesystemCache.project(sharedCache.filesystems['static'], segmentsMap); + +// Send ACK after applying each update +const sendAck = (updateId) => + parentPort.postMessage({ name: 'ack-update', updateId }); + +// Handle delta messages from main thread +parentPort.on('message', (msg) => { + if (msg.name === 'file-update') { + // Register new segments before projecting entries + for (const seg of msg.newSegments) segmentsMap.set(seg.id, seg.sab); + for (const [key, entry] of msg.updates) { + files.set(key, FilesystemCache.projectEntry(entry, segmentsMap)); + } + sendAck(msg.updateId); + } else if (msg.name === 'file-delete') { + for (const key of msg.keys) files.delete(key); + sendAck(msg.updateId); + } +}); +``` + +### Message protocol reference + +| Message (main → worker) | Fields | Notes | +|---|---|---| +| `file-update` | `target`, `updateId`, `updates: [[key, entry], ...]`, `newSegments: [{id, sab}]` | Apply entries then ACK | +| `file-delete` | `target`, `updateId`, `keys: [string]` | Delete keys then ACK | + +| Message (worker → main) | Fields | Notes | +|---|---|---| +| `ack-update` | `updateId` | Sent after applying any message with `updateId` | diff --git a/impress.js b/impress.js index 5fa7d2398..d2309d126 100644 --- a/impress.js +++ b/impress.js @@ -188,7 +188,22 @@ const loadApplication = async (root, dir, master) => { impress.planner = await new Planner(tasksPath, tasksConfig, impress); impress.config = config; } - const cacheOptions = { config, dir, console: impress.console }; + const { balancer, ports = [], workers = {} } = config.server; + const { cache = {} } = config; + const threads = new Map(); + const cacheOptions = { + limit: cache.sab?.limit, + baseSegmentSize: cache.sab?.baseSegmentSize, + maxFileSize: cache.maxFileSize, + watchTimeout: config.server.timeouts.watch, + placements: cache.placements, + dir, + console: impress.console, + broadcast: (data) => { + for (const thread of threads.values()) thread.postMessage(data); + }, + getWorkerIds: () => threads.keys(), + }; const sharedCache = new SharedCache(cacheOptions); try { await sharedCache.initialize(); @@ -197,8 +212,6 @@ const loadApplication = async (root, dir, master) => { throw error; } - const { balancer, ports = [], workers = {} } = config.server; - const threads = new Map(); const pool = new Pool({ timeout: workers.wait }); const app = { root, @@ -209,7 +222,7 @@ const loadApplication = async (root, dir, master) => { ready: 0, sharedCache, }; - sharedCache.watch(app); + sharedCache.watch(); if (balancer) await startWorker(app, 'balancer', balancer); for (const port of ports) await startWorker(app, 'server', port); const poolSize = workers.pool || 0; diff --git a/lib/application.js b/lib/application.js index 9ae9d1d6e..7298100e7 100644 --- a/lib/application.js +++ b/lib/application.js @@ -99,9 +99,9 @@ class Application extends EventEmitter { applySharedCache(sharedCache) { const { projectEntry, config } = this; - const { indexes } = sharedCache; - for (const name of Object.keys(indexes)) { - const index = indexes[name]; + const { filesystems } = sharedCache; + for (const name of Object.keys(filesystems)) { + const index = filesystems[name]; const entries = index.entries instanceof Map ? index.entries : new Map(index.entries); const files = new Map(); @@ -221,7 +221,7 @@ class Application extends EventEmitter { const timeout = this.config.server.timeouts.watch; this.watcher = new DirectoryWatcher({ timeout }); const shared = this.sharedCache - ? new Set(Object.keys(this.sharedCache.indexes)) + ? new Set(Object.keys(this.sharedCache.filesystems)) : new Set(); this.watcher.on('change', (filePath) => { diff --git a/lib/cache/LimitCache.js b/lib/cache/FilesystemCache.js similarity index 71% rename from lib/cache/LimitCache.js rename to lib/cache/FilesystemCache.js index 9f8da21b6..848923c91 100644 --- a/lib/cache/LimitCache.js +++ b/lib/cache/FilesystemCache.js @@ -9,7 +9,7 @@ class Pool { this.limit = limit; this.baseSegmentSize = baseSegmentSize; this.segments = new Map(); - this.cleanSegmentIds = new Set(); + this.emptySegmentIds = new Set(); this.totalUsed = 0; this.nextSegmentId = 1; } @@ -19,24 +19,24 @@ class Pool { } createBaseSegment() { - for (const id of this.cleanSegmentIds) { - this.cleanSegmentIds.delete(id); + for (const id of this.emptySegmentIds) { + this.emptySegmentIds.delete(id); return this.segments.get(id); } const size = this.baseSegmentSize; if (!this.canAllocate(size)) return null; const id = this.nextSegmentId++; const sab = new SharedArrayBuffer(size); - const segment = { id, sab, size }; + const segment = { id, sab }; this.segments.set(id, segment); this.totalUsed += size; return segment; } - retireSegment(id) { + freeSegment(id) { if (!this.segments.has(id)) return false; - if (this.cleanSegmentIds.has(id)) return true; - this.cleanSegmentIds.add(id); + if (this.emptySegmentIds.has(id)) return true; + this.emptySegmentIds.add(id); return true; } @@ -53,21 +53,21 @@ class Pool { } } -class Registry { +class SegmentRegistry { constructor(pool) { - this.all = segments; - this.freeExtents = new Map(); - this.tails = new Map(); + this.pool = pool; + this.empty = new Map(); + this.partially = new Map(); } - registerSegment(segmentId) { - this.freeExtents.set(segmentId, []); - this.tails.set(segmentId, 0); + register(segmentId) { + this.empty.set(segmentId, []); + this.partially.set(segmentId, 0); } - allocate(size, noCreate = false) { + allocate(size, limitReached = false) { let bestFit = null; - for (const [segmentId, extents] of this.freeExtents) { + for (const [segmentId, extents] of this.empty) { for (let i = 0; i < extents.length; i++) { const extent = extents[i]; if (extent.length < size) continue; @@ -79,7 +79,7 @@ class Registry { if (bestFit) { const { segmentId, index, extent } = bestFit; const offset = extent.offset; - const extents = this.freeExtents.get(segmentId); + const extents = this.empty.get(segmentId); if (extent.length === size) { extents.splice(index, 1); } else { @@ -90,31 +90,31 @@ class Registry { } return { segmentId, offset }; } - for (const [segmentId, tail] of this.tails) { + for (const [segmentId, tail] of this.partially) { if (tail + size <= this.pool.baseSegmentSize) { - this.tails.set(segmentId, tail + size); + this.partially.set(segmentId, tail + size); return { segmentId, offset: tail }; } } - if (noCreate) return null; + if (limitReached) return null; const segment = this.pool.createBaseSegment(); if (!segment) return null; - this.registerSegment(segment.id); - this.tails.set(segment.id, size); + this.register(segment.id); + this.partially.set(segment.id, size); return { segmentId: segment.id, offset: 0 }; } free(segmentId, offset, length) { - const extents = this.freeExtents.get(segmentId); + const extents = this.empty.get(segmentId); if (!extents) return; const newExtent = { offset, length }; let insertIndex = extents.findIndex((e) => e.offset > offset); if (insertIndex === -1) insertIndex = extents.length; extents.splice(insertIndex, 0, newExtent); - Registry.mergeAdjacent(extents, insertIndex); + SegmentRegistry.mergeSiblings(extents, insertIndex); } - static mergeAdjacent(extents, index) { + static mergeSiblings(extents, index) { if (index + 1 < extents.length) { const current = extents[index]; const next = extents[index + 1]; @@ -133,27 +133,27 @@ class Registry { } } - segmentUsed(segmentId) { - const tail = this.tails.get(segmentId); + isUsed(segmentId) { + const tail = this.partially.get(segmentId); if (!tail) return 0; - const extents = this.freeExtents.get(segmentId); + const extents = this.empty.get(segmentId); if (!extents) return tail; let free = 0; for (const e of extents) free += e.length; return tail - free; } - isSegmentEmpty(segmentId) { - return this.segmentUsed(segmentId) === 0; + isEmpty(segmentId) { + return this.isUsed(segmentId) === 0; } - unregisterSegment(segmentId) { - this.freeExtents.delete(segmentId); - this.tails.delete(segmentId); + unregister(segmentId) { + this.empty.delete(segmentId); + this.partially.delete(segmentId); } } -class LimitCache { +class FilesystemCache { constructor(options = {}) { const limit = options.limit || DEFAULT_LIMIT; const maxFileSize = options.maxFileSize || DEFAULT_MAX_FILE_SIZE; @@ -163,8 +163,8 @@ class LimitCache { this.baseSegmentSize = baseSegmentSize; this.reader = options.reader || null; this.pool = new Pool(limit, baseSegmentSize); - this.registry = new Registry(this.pool); - this.indexes = {}; + this.registry = new SegmentRegistry(this.pool); + this.filesystems = {}; } get totalUsed() { @@ -185,27 +185,27 @@ class LimitCache { entries.set(key, entry); } const index = { entries, segmentIds }; - this.indexes[name] = index; + this.filesystems[name] = index; return index; } async allocate(name, key, file) { - let index = this.indexes[name]; + let index = this.filesystems[name]; if (!index) { index = { entries: new Map(), segmentIds: new Set() }; - this.indexes[name] = index; + this.filesystems[name] = index; } const entry = await this.#allocateEntry(file, index.segmentIds); index.entries.set(key, entry); return entry; } - remove(name, key) { - const index = this.indexes[name]; + remove(fsId, filename) { + const index = this.filesystems[fsId]; if (!index) return null; - const entry = index.entries.get(key); + const entry = index.entries.get(filename); if (!entry) return null; - index.entries.delete(key); + index.entries.delete(filename); return entry; } @@ -215,9 +215,9 @@ class LimitCache { const segment = this.pool.getSegment(segmentId); if (!segment) return; this.registry.free(segmentId, entry.offset, entry.length); - if (this.registry.isSegmentEmpty(segmentId)) { - this.registry.unregisterSegment(segmentId); - this.pool.retireSegment(segmentId); + if (this.registry.isEmpty(segmentId)) { + this.registry.unregister(segmentId); + this.pool.freeSegment(segmentId); } } @@ -225,13 +225,13 @@ class LimitCache { let target = null; let minUtil = threshold; let baseCount = 0; - for (const [segmentId, tail] of this.registry.tails) { + for (const [segmentId, tail] of this.registry.partially) { if (tail === 0) continue; const segment = this.pool.getSegment(segmentId); if (!segment) continue; baseCount++; - const used = this.registry.segmentUsed(segmentId); - const util = used / segment.size; + const used = this.registry.isUsed(segmentId); + const util = used / this.baseSegmentSize; if (util < minUtil) { minUtil = util; target = segmentId; @@ -239,19 +239,17 @@ class LimitCache { } if (baseCount < 2 || !target) return null; const items = []; - for (const name of Object.keys(this.indexes)) { - for (const [key, entry] of this.indexes[name].entries) { + for (const name of Object.keys(this.filesystems)) { + for (const [key, entry] of this.filesystems[name].entries) { if (entry.kind === 'shared' && entry.segmentId === target) { items.push({ name, key, entry }); } } } if (items.length === 0) return null; - const savedTail = this.registry.tails.get(target); - const savedExtents = this.registry.freeExtents - .get(target) - .map((e) => ({ ...e })); - this.registry.unregisterSegment(target); + const savedTail = this.registry.partially.get(target); + const savedExtents = this.registry.empty.get(target).map((e) => ({ ...e })); + this.registry.unregister(target); const moved = []; let success = true; for (const { name, key, entry } of items) { @@ -285,23 +283,23 @@ class LimitCache { newEntry.length, ); } - this.registry.registerSegment(target); - this.registry.freeExtents.set(target, savedExtents); - this.registry.tails.set(target, savedTail); + this.registry.register(target); + this.registry.empty.set(target, savedExtents); + this.registry.partially.set(target, savedTail); return null; } const updates = []; const oldEntries = []; const newSegmentIds = new Set(); for (const { name, key, oldEntry, newEntry } of moved) { - this.indexes[name].entries.set(key, newEntry); - this.indexes[name].segmentIds.add(newEntry.segmentId); + this.filesystems[name].entries.set(key, newEntry); + this.filesystems[name].segmentIds.add(newEntry.segmentId); newSegmentIds.add(newEntry.segmentId); updates.push({ name, key, entry: newEntry, oldEntry }); oldEntries.push(oldEntry); } - for (const name of Object.keys(this.indexes)) { - this.indexes[name].segmentIds.delete(target); + for (const name of Object.keys(this.filesystems)) { + this.filesystems[name].segmentIds.delete(target); } const newSegments = []; for (const id of newSegmentIds) { @@ -313,12 +311,29 @@ class LimitCache { snapshot() { const segments = this.pool.getSegmentsSnapshot(); - const indexes = {}; - for (const name of Object.keys(this.indexes)) { - const { entries } = this.indexes[name]; - indexes[name] = { entries: [...entries] }; + const filesystems = {}; + for (const name of Object.keys(this.filesystems)) { + const { entries } = this.filesystems[name]; + filesystems[name] = { entries: [...entries] }; } - return { segments, indexes }; + return { segments, filesystems }; + } + + stats() { + const segs = [...this.pool.segments.values()]; + const cleanCount = this.pool.emptySegmentIds.size; + const lines = segs.map((s) => { + const used = this.registry.isUsed(s.id); + const pct = ((used / this.baseSegmentSize) * 100).toFixed(1); + const mark = this.pool.emptySegmentIds.has(s.id) ? ' [empty]' : ''; + return ` seg ${s.id}: ${used}/${this.baseSegmentSize} (${pct}%)${mark}`; + }); + return { + segmentCount: segs.length, + cleanCount, + totalUsed: this.pool.totalUsed, + lines, + }; } async #allocateEntry(file, segmentIds) { @@ -361,7 +376,7 @@ class LimitCache { const entries = index.entries instanceof Map ? index.entries : new Map(index.entries); for (const [key, entry] of entries) { - files.set(key, LimitCache.projectEntry(entry, segmentsMap)); + files.set(key, FilesystemCache.projectEntry(entry, segmentsMap)); } return files; } @@ -380,4 +395,4 @@ class LimitCache { } } -module.exports = { LimitCache }; +module.exports = { FilesystemCache }; diff --git a/lib/cache/PerFileCache.js b/lib/cache/PerFileCache.js deleted file mode 100644 index aa7061956..000000000 --- a/lib/cache/PerFileCache.js +++ /dev/null @@ -1,108 +0,0 @@ -'use strict'; - -class PerFileCache { - constructor(options = {}) { - this.maxFileSize = options.maxFileSize || 10 * 1024 * 1024; - this.reader = options.reader || null; - this.indexes = {}; - } - - get totalUsed() { - let total = 0; - for (const name of Object.keys(this.indexes)) { - for (const entry of this.indexes[name].entries.values()) { - if (entry.kind === 'shared') total += entry.length; - } - } - return total; - } - - async load(name, filesMap) { - const entries = new Map(); - for (const [key, file] of filesMap) { - const entry = await this.#allocateEntry(file); - entries.set(key, entry); - } - const index = { entries }; - this.indexes[name] = index; - return index; - } - - async allocate(name, key, file) { - let index = this.indexes[name]; - if (!index) { - index = { entries: new Map() }; - this.indexes[name] = index; - } - const entry = await this.#allocateEntry(file); - index.entries.set(key, entry); - return entry; - } - - remove(name, key) { - const index = this.indexes[name]; - if (!index) return null; - const entry = index.entries.get(key); - if (!entry) return null; - index.entries.delete(key); - return entry; - } - - free() {} - - compact() { - return null; - } - - snapshot() { - const indexes = {}; - for (const name of Object.keys(this.indexes)) { - const { entries } = this.indexes[name]; - indexes[name] = { entries: [...entries] }; - } - return { segments: null, indexes }; - } - - async #allocateEntry(file) { - const { data, stat, path: filePath } = file; - const size = stat?.size || 0; - if (size > this.maxFileSize) { - return { kind: 'disk', path: filePath, stat, data: null }; - } - if (!data && !this.reader) { - return { kind: 'disk', path: filePath, stat, data: null }; - } - if (size === 0) { - const sab = new SharedArrayBuffer(0); - return { kind: 'shared', sab, length: 0, stat }; - } - const sab = new SharedArrayBuffer(size); - if (data) { - new Uint8Array(sab).set(data); - } else if (this.reader) { - await this.reader(filePath, sab, 0, size); - } - return { kind: 'shared', sab, length: size, stat }; - } - - static project(index) { - const files = new Map(); - const entries = - index.entries instanceof Map ? index.entries : new Map(index.entries); - for (const [key, entry] of entries) { - files.set(key, PerFileCache.projectEntry(entry)); - } - return files; - } - - static projectEntry(entry) { - if (entry.kind === 'shared') { - const { sab, length } = entry; - const data = Buffer.from(sab, 0, length); - return { data, stat: entry.stat }; - } - return { data: null, stat: entry.stat, path: entry.path }; - } -} - -module.exports = { PerFileCache }; diff --git a/lib/cache/PlacementSource.js b/lib/cache/PlacementSource.js index 815dea0c1..2ec09148d 100644 --- a/lib/cache/PlacementSource.js +++ b/lib/cache/PlacementSource.js @@ -1,33 +1,54 @@ 'use strict'; -const { node, metarhia } = require('../deps.js'); -const { Place } = require('../place.js'); +const path = require('node:path'); +const fsp = require('node:fs/promises'); +const metautil = require('metautil'); const WIN = process.platform === 'win32'; const toKey = WIN ? (filePath, base) => { const key = filePath.substring(base.length); - return metarhia.metautil.replace(key, node.path.sep, '/'); + return metautil.replace(key, path.sep, '/'); } : (filePath, base) => filePath.substring(base.length); -class PlacementSource extends Place { - constructor(name, application, options = {}) { - super(name, application); +class PlacementSource { + constructor(name, dir, watcher, options = {}) { + this.name = name; + this.path = path.join(dir, name); + this.watcher = watcher; this.files = new Map(); - this.ext = options.ext; + this.ext = options.ext || null; } getKey(filePath) { return toKey(filePath, this.path); } + async load(targetPath = this.path) { + this.watcher.watch(targetPath); + let entries; + try { + entries = await fsp.readdir(targetPath, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const filePath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + await this.load(filePath); + } else { + await this.change(filePath); + } + } + } + async change(filePath) { - const ext = metarhia.metautil.fileExt(filePath); + const ext = metautil.fileExt(filePath); if (this.ext && !this.ext.includes(ext)) return; try { - const stat = await node.fsp.stat(filePath); + const stat = await fsp.stat(filePath); const key = this.getKey(filePath); this.files.set(key, { stat, path: filePath }); } catch { diff --git a/lib/cache/SharedCache.js b/lib/cache/SharedCache.js index 0b2b992d3..ca7bcc1ee 100644 --- a/lib/cache/SharedCache.js +++ b/lib/cache/SharedCache.js @@ -1,36 +1,18 @@ 'use strict'; -const { node, metarhia } = require('../deps.js'); -const { LimitCache } = require('./LimitCache.js'); -const { PerFileCache } = require('./PerFileCache.js'); +const path = require('node:path'); +const fsp = require('node:fs/promises'); +const { DirectoryWatcher } = require('metawatch'); +const metautil = require('metautil'); +const { FilesystemCache } = require('./FilesystemCache.js'); const { PlacementSource } = require('./PlacementSource.js'); const DEFAULT_PLACEMENTS = [{ name: 'static' }, { name: 'resources' }]; -const NOOP = () => {}; - -const PREFIXES = { k: 1, m: 2, g: 3, t: 4 }; - -const sizeToBytes = (value) => { - if (typeof value === 'number') return value; - const str = value.trim().toLowerCase(); - const binary = str.endsWith('ib'); - const suffix = binary ? 3 : 2; - const unit = str.slice(-suffix); - const num = parseInt(str.slice(0, -suffix)); - const base = binary ? 1024 : 1000; - const exp = PREFIXES[unit[0]]; - if (!exp) throw new Error(`Unknown unit: ${unit}`); - return num * base ** exp; -}; class SharedCache { - constructor({ config, dir, console }) { - const cacheConfig = config.cache || {}; - const mode = cacheConfig.mode || 'limit'; - const sabConfig = cacheConfig.sab || {}; - const maxFileSize = sizeToBytes(cacheConfig.maxFileSize || '10 mb'); + constructor({ limit, baseSegmentSize, maxFileSize, watchTimeout, placements, dir, console, broadcast, getWorkerIds }) { const reader = async (filePath, sab, offset, size) => { - const fh = await node.fsp.open(filePath, 'r'); + const fh = await fsp.open(filePath, 'r'); try { const buf = Buffer.from(sab, offset, size); await fh.read(buf, 0, size, 0); @@ -39,63 +21,38 @@ class SharedCache { } }; - if (mode === 'limit') { - const limit = sizeToBytes(sabConfig.limit || '1 gib'); - const baseSegmentSize = sizeToBytes( - sabConfig.baseSegmentSize || '64 mib', - ); - this.cache = new LimitCache({ - limit, - baseSegmentSize, - maxFileSize, - reader, - }); - } else { - this.cache = new PerFileCache({ maxFileSize, reader }); - } + this.cache = new FilesystemCache({ + limit: metautil.sizeToBytes(limit || '1 gib'), + baseSegmentSize: metautil.sizeToBytes(baseSegmentSize || '64 mib'), + maxFileSize: metautil.sizeToBytes(maxFileSize || '10 mb'), + reader, + }); - this.placements = cacheConfig.placements || DEFAULT_PLACEMENTS; + this.placements = placements || DEFAULT_PLACEMENTS; this.dir = dir; - this.config = config; + this.watchTimeout = watchTimeout; this.console = console; + this.broadcast = broadcast; + this.getWorkerIds = getWorkerIds; this.sources = {}; - this.app = null; this.watcher = null; this.nextUpdateId = 0; - - // Pre-initialize ACK-dependent functions - const needsAck = mode === 'limit'; - if (needsAck) { - this.pendingFrees = new Map(); - this.#afterAck = (pending) => { - this.#freeEntries(pending); - }; - } else { - this.pendingFrees = null; - this.#afterAck = NOOP; - } - this.needsAck = needsAck; + this.pendingFrees = new Map(); + this.#afterAck = (pending) => { + this.#freeEntries(pending); + }; } #afterAck; async initialize() { - const appObj = { - path: this.dir, - config: this.config, - watcher: null, - console: this.console, - absolute: (relative) => node.path.join(this.dir, relative), - }; - const timeout = this.config.server.timeouts.watch; - const { DirectoryWatcher } = metarhia.metawatch; - this.watcher = new DirectoryWatcher({ timeout }); - appObj.watcher = this.watcher; + this.watcher = new DirectoryWatcher({ timeout: this.watchTimeout }); for (const placement of this.placements) { const opts = placement.ext ? { ext: placement.ext } : {}; this.sources[placement.name] = new PlacementSource( placement.name, - appObj, + this.dir, + this.watcher, opts, ); } @@ -132,17 +89,16 @@ class SharedCache { } } - watch(app) { - this.app = app; - const { sources, cache, needsAck } = this; + watch() { + const { sources, cache } = this; const sourcesByName = new Map(Object.entries(sources)); const findSource = (filePath) => { - const relPath = node.path.relative(this.dir, filePath); + const relPath = path.relative(this.dir, filePath); if (!relPath || relPath.startsWith('..')) return null; - if (node.path.isAbsolute(relPath)) return null; - const sepIndex = relPath.indexOf(node.path.sep); + if (path.isAbsolute(relPath)) return null; + const sepIndex = relPath.indexOf(path.sep); const name = sepIndex === -1 ? relPath : relPath.substring(0, sepIndex); const source = sourcesByName.get(name); return source ? { name, source } : null; @@ -151,7 +107,7 @@ class SharedCache { let epoch = null; const processChange = async (ep, name, source, filePath) => { - const stat = await node.fsp.stat(filePath).catch(() => null); + const stat = await fsp.stat(filePath).catch(() => null); if (!stat) return; if (stat.isDirectory()) { const before = new Set(source.files.keys()); @@ -173,7 +129,7 @@ class SharedCache { const key = source.getKey(filePath); const file = source.files.get(key); if (!file) return; - const oldEntry = cache.indexes[name]?.entries.get(key); + const oldEntry = cache.filesystems[name]?.entries.get(key); const newEntry = await cache.allocate(name, key, file); const group = ep.updates[name] || @@ -237,16 +193,11 @@ class SharedCache { .then(() => this.#flushEpoch(current)) .catch((err) => this.console.error(`[cache] epoch: ${err.message}`)); }); - - // Pre-build flush function for the mode - if (needsAck) { - this.#flushEpoch = this.#flushEpochWithAck; - } else { - this.#flushEpoch = this.#flushEpochFireAndForget; - } } - #flushEpoch; + #flushEpoch(epoch) { + return this.#flushEpochWithAck(epoch); + } #flushEpochWithAck(epoch) { const { updates, deletes, oldEntries } = epoch; @@ -284,51 +235,19 @@ class SharedCache { } } - #flushEpochFireAndForget(epoch) { - const { updates, deletes } = epoch; - for (const name of Object.keys(updates)) { - const { entries } = updates[name]; - if (entries.length === 0) continue; - this.#broadcast({ - name: 'file-update', - target: name, - updates: entries, - }); - } - for (const name of Object.keys(deletes)) { - const keys = deletes[name]; - if (keys.length === 0) continue; - this.#broadcast({ - name: 'file-delete', - target: name, - keys, - }); - } - } - #broadcast(data) { - for (const thread of this.app.threads.values()) { - thread.postMessage(data); - } + this.broadcast(data); } #freeEntries(pending) { if (!pending) return; for (const entry of pending.entries) this.cache.free(entry); - const { pool, registry } = this.cache; - const segs = [...pool.segments.values()]; - const cleanCount = pool.cleanSegmentIds.size; - const info = segs.map((s) => { - const used = registry.segmentUsed(s.id); - const pct = ((used / s.size) * 100).toFixed(1); - const mark = pool.cleanSegmentIds.has(s.id) ? ' [clean]' : ''; - return ` seg ${s.id}: ${used}/${s.size} (${pct}%)${mark}`; - }); + const { segmentCount, cleanCount, totalUsed, lines } = this.cache.stats(); const count = pending.entries.length; - this.console.info( + this.console.debug( `[cache] freeEntries: ${count} entries freed, ` + - `${segs.length} segments (${cleanCount} clean), ` + - `totalUsed=${this.cache.totalUsed}\n${info.join('\n')}`, + `${segmentCount} segments (${cleanCount} clean), ` + + `totalUsed=${totalUsed}\n${lines.join('\n')}`, ); this.#tryCompact(); } @@ -336,7 +255,7 @@ class SharedCache { #tryCompact() { const result = this.cache.compact(); if (!result) { - this.console.info('[cache] compact: no target found'); + this.console.debug('[cache] compact: no target found'); return; } this.console.info( @@ -365,9 +284,9 @@ class SharedCache { } #trackUpdate(updateId, entries) { - const workerIds = new Set(this.app.threads.keys()); + const workerIds = new Set(this.getWorkerIds()); this.pendingFrees.set(updateId, { workerIds, entries }); } } -module.exports = { SharedCache, sizeToBytes }; +module.exports = { SharedCache }; diff --git a/lib/static.js b/lib/static.js index 5ca1e2715..f221be6d8 100644 --- a/lib/static.js +++ b/lib/static.js @@ -170,9 +170,8 @@ class Static { const absPath = join(this.path, filePath); if (absPath.startsWith(this.path)) { const fsStat = await node.fsp.stat(absPath).catch(() => null); - const diskStat = file?.stat || fsStat; - if (diskStat && (!diskStat.isFile || diskStat.isFile())) { - const { size } = diskStat; + if (fsStat && fsStat.isFile()) { + const { size } = file?.stat ?? fsStat; const options = { size }; let code = 200; const { headers } = transport.req; diff --git a/lib/worker.js b/lib/worker.js index 89e4e4c83..9e544338f 100644 --- a/lib/worker.js +++ b/lib/worker.js @@ -10,24 +10,18 @@ const segmentsMap = new Map(); const sharedCache = workerData.sharedCache; let projectEntry = null; -if (sharedCache && sharedCache.segments) { - // Limit mode: build segmentsMap, import LimitCache for projection - const { LimitCache } = require('./cache/LimitCache.js'); +if (sharedCache) { + const { FilesystemCache } = require('./cache/FilesystemCache.js'); for (const seg of sharedCache.segments) segmentsMap.set(seg.id, seg.sab); - projectEntry = (entry) => LimitCache.projectEntry(entry, segmentsMap); -} else if (sharedCache) { - // Per-file mode: no segments, import PerFileCache for projection - const { PerFileCache } = require('./cache/PerFileCache.js'); - projectEntry = (entry) => PerFileCache.projectEntry(entry); + projectEntry = (entry) => FilesystemCache.projectEntry(entry, segmentsMap); } application.segmentsMap = segmentsMap; application.projectEntry = projectEntry; -// Pre-initialize ACK: real function for limit mode, noop for per-file -const sendAck = - sharedCache && sharedCache.segments - ? (updateId) => parentPort.postMessage({ name: 'ack-update', updateId }) - : () => {}; +// Send ACK after worker applies a cache update +const sendAck = sharedCache + ? (updateId) => parentPort.postMessage({ name: 'ack-update', updateId }) + : () => {}; const logError = (type) => async (err) => { const error = metarhia.metautil.isError(err) ? err : new Error('Unknown'); diff --git a/schemas/config/cache.js b/schemas/config/cache.js index 0657cf25f..56245606c 100644 --- a/schemas/config/cache.js +++ b/schemas/config/cache.js @@ -1,5 +1,4 @@ ({ - mode: { type: 'string', required: false }, size: 'size', maxFileSize: 'size', streamThreshold: { type: 'size', required: false }, diff --git a/test/cache-shared.js b/test/cache-shared.js index 3bdfb4a85..8ae4dd765 100644 --- a/test/cache-shared.js +++ b/test/cache-shared.js @@ -4,8 +4,7 @@ const { test } = require('node:test'); const assert = require('node:assert'); const path = require('node:path'); const { Static } = require('../lib/static.js'); -const { LimitCache } = require('../lib/cache/LimitCache.js'); -const { PerFileCache } = require('../lib/cache/PerFileCache.js'); +const { FilesystemCache } = require('../lib/cache/FilesystemCache.js'); const root = process.cwd(); @@ -17,28 +16,28 @@ const application = { }, }; -// --- LimitCache (limit backend) --- +// --- FilesystemCache (limit backend) --- -test('LimitCache - should load files into segments', async () => { +test('FilesystemCache - should load files into segments', async () => { const seg = 1024; const options = { limit: seg, maxFileSize: seg, baseSegmentSize: seg }; - const cache = new LimitCache(options); + const cache = new FilesystemCache(options); const filesMap = new Map(); const data = Buffer.from('hello world'); const stat = { size: data.byteLength }; filesMap.set('/test.js', { data, stat, path: '/test.js' }); await cache.load('static', filesMap); - const index = cache.indexes.static; + const index = cache.filesystems.static; assert.ok(index); const entry = index.entries.get('/test.js'); assert.strictEqual(entry.kind, 'shared'); assert.strictEqual(entry.length, data.byteLength); }); -test('LimitCache - project creates Buffer view', async () => { +test('FilesystemCache - project creates Buffer view', async () => { const seg = 1024; const options = { limit: seg, maxFileSize: seg, baseSegmentSize: seg }; - const cache = new LimitCache(options); + const cache = new FilesystemCache(options); const data = Buffer.from('test data'); const stat = { size: data.byteLength }; const filesMap = new Map([['/f.js', { data, stat, path: '/f.js' }]]); @@ -46,21 +45,21 @@ test('LimitCache - project creates Buffer view', async () => { const snap = cache.snapshot(); const segmentsMap = new Map(); for (const seg of snap.segments) segmentsMap.set(seg.id, seg.sab); - const files = LimitCache.project(snap.indexes.static, segmentsMap); + const files = FilesystemCache.project(snap.filesystems.static, segmentsMap); const file = files.get('/f.js'); assert.ok(file.data instanceof Buffer); assert.deepStrictEqual(file.data, data); assert.strictEqual(file.stat, stat); }); -test('LimitCache - zero-byte files stay shared without segments', async () => { +test('FilesystemCache - zero-byte files stay shared without segments', async () => { const seg = 1024; const options = { limit: seg, maxFileSize: seg, baseSegmentSize: seg }; - const cache = new LimitCache(options); + const cache = new FilesystemCache(options); const stat = { size: 0 }; const filesMap = new Map([['/empty.txt', { data: Buffer.alloc(0), stat, path: '/empty.txt' }]]); await cache.load('static', filesMap); - const entry = cache.indexes.static.entries.get('/empty.txt'); + const entry = cache.filesystems.static.entries.get('/empty.txt'); assert.deepStrictEqual(entry, { kind: 'shared', segmentId: 0, @@ -72,9 +71,9 @@ test('LimitCache - zero-byte files stay shared without segments', async () => { assert.deepStrictEqual(snapshot.segments, []); }); -test('LimitCache - projectEntry returns empty Buffer for zero-byte files', () => { +test('FilesystemCache - projectEntry returns empty Buffer for zero-byte files', () => { const stat = { size: 0 }; - const file = LimitCache.projectEntry( + const file = FilesystemCache.projectEntry( { kind: 'shared', segmentId: 0, offset: 0, length: 0, stat }, new Map(), ); @@ -83,65 +82,6 @@ test('LimitCache - projectEntry returns empty Buffer for zero-byte files', () => assert.strictEqual(file.stat, stat); }); -// --- PerFileCache (per-file backend) --- - -test('PerFileCache - should load files into individual SABs', async () => { - const cache = new LimitCache({ limit: 1024, maxFileSize: 10 }); - const data = Buffer.alloc(20); - const stat = { size: 20 }; - const filePath = '/tmp/big.bin'; - const file = { data, stat, path: filePath }; - const fm = new Map([['/big.bin', file]]); - await cache.load('static', fm); - const entry = cache.indexes.static.entries.get('/big.bin'); - assert.strictEqual(entry.kind, 'disk'); - assert.strictEqual(entry.data, null); -}); - -test('LimitCache - disk fallback for oversized files', async () => { - const cache = new PerFileCache({ maxFileSize: 1024 * 1024 }); - const data = Buffer.from('hello world'); - const stat = { size: data.byteLength }; - const filesMap = new Map([['/test.js', { data, stat, path: '/test.js' }]]); - await cache.load('static', filesMap); - const entry = cache.indexes.static.entries.get('/test.js'); - assert.strictEqual(entry.kind, 'shared'); - assert.ok(entry.sab instanceof SharedArrayBuffer); - assert.strictEqual(entry.length, data.byteLength); -}); - -test('PerFileCache - project creates Buffer view over SAB', async () => { - const cache = new PerFileCache({ maxFileSize: 1024 * 1024 }); - const data = Buffer.from('test data'); - const stat = { size: data.byteLength }; - const filesMap = new Map([['/f.js', { data, stat, path: '/f.js' }]]); - await cache.load('static', filesMap); - const snap = cache.snapshot(); - assert.strictEqual(snap.segments, null); - const files = PerFileCache.project(snap.indexes.static); - const file = files.get('/f.js'); - assert.ok(file.data instanceof Buffer); - assert.deepStrictEqual(file.data, data); -}); - -test('PerFileCache - disk fallback for oversized', async () => { - const cache = new PerFileCache({ maxFileSize: 10 }); - const data = Buffer.alloc(20); - const stat = { size: 20 }; - const filePath = '/tmp/big.bin'; - const file = { data, stat, path: filePath }; - const fm = new Map([['/big.bin', file]]); - await cache.load('static', fm); - const entry = cache.indexes.static.entries.get('/big.bin'); - assert.strictEqual(entry.kind, 'disk'); -}); - -test('PerFileCache - free and compact are no-ops', () => { - const cache = new PerFileCache(); - cache.free({ kind: 'shared', sab: new SharedArrayBuffer(4), length: 4 }); - assert.strictEqual(cache.compact(), null); -}); - // --- Static (worker side) --- test('Static setFiles - populate from projected entries', () => {