diff --git a/.changeset/agents-runtime-sandbox-primitive.md b/.changeset/agents-runtime-sandbox-primitive.md new file mode 100644 index 0000000000..89f3d01077 --- /dev/null +++ b/.changeset/agents-runtime-sandbox-primitive.md @@ -0,0 +1,16 @@ +--- +'@electric-ax/agents-runtime': patch +'@electric-ax/agents': patch +'@electric-ax/agents-server-conformance-tests': patch +'@electric-ax/agents-desktop': patch +'@electric-ax/agents-server-ui': patch +'@electric-ax/agents-server': patch +--- + +Adds the `Sandbox` primitive (`@electric-ax/agents-runtime/sandbox`) for isolating LLM-driven tool calls. Three providers ship: `unrestrictedSandbox()` (explicit pass-through), `remoteSandbox({provider: 'e2b'})` (E2B as an optional peer dep), and `dockerSandbox()` (container isolation via `dockerode` as an optional peer dep). + +Built-in entities (Horton, Worker) default to `unrestrictedSandbox` via the new `chooseDefaultSandbox(workingDirectory)` helper. Stronger isolation is opt-in by constructing `dockerSandbox` or `remoteSandbox` directly — `dockerSandbox` is the recommended path for multi-entity hosting. + +Behavior changes folded in: bash no longer forwards `process.env` to children (removes the trivial `env`-dump leak of secrets like `$ANTHROPIC_API_KEY` — note the host-sharing `unrestricted` provider still can't fully contain secrets, e.g. via `/proc//environ`, so use `docker`/`remote` for untrusted or multi-tenant entities), tool descriptions corrected, and read/write/edit reject symlink escapes from the workspace. + +Runtimes advertise named **sandbox profiles** (e.g. `local`, `docker`) to the agents-server; spawn requests pick a profile by name, the server validates the choice against the target runner's advertised set, and the new-session UI surfaces a picker. Internally, the built-in tool factories (`createBashTool`, `createFetchUrlTool`, etc.) now route their filesystem and network access through the active `Sandbox`. diff --git a/packages/agents-desktop/package.json b/packages/agents-desktop/package.json index 6cfe78f05c..8c8b907a2d 100644 --- a/packages/agents-desktop/package.json +++ b/packages/agents-desktop/package.json @@ -30,6 +30,8 @@ "@electric-sql/client": "^1.5.20", "@mixmark-io/domino": "^2.2.0", "better-sqlite3": "^12.9.0", + "dockerode": "^5.0.0", + "e2b": ">=2.0.0", "fix-path": "^4.0.0", "jsdom": "^28.1.0", "pino": "^10.3.1", diff --git a/packages/agents-desktop/src/credentials/api-keys.ts b/packages/agents-desktop/src/credentials/api-keys.ts index 66930e26ea..2084832911 100644 --- a/packages/agents-desktop/src/credentials/api-keys.ts +++ b/packages/agents-desktop/src/credentials/api-keys.ts @@ -8,6 +8,7 @@ export const EMPTY_API_KEYS: ApiKeys = { deepseek: null, moonshot: null, brave: null, + e2b: null, } export const GLOBAL_API_KEYS_REF = `api-keys:global` @@ -19,6 +20,7 @@ export function captureEnvApiKeys(env: NodeJS.ProcessEnv): ApiKeys { deepseek: env.DEEPSEEK_API_KEY?.trim() || null, moonshot: env.MOONSHOT_API_KEY?.trim() || null, brave: env.BRAVE_SEARCH_API_KEY?.trim() || null, + e2b: env.E2B_API_KEY?.trim() || null, } } @@ -36,6 +38,7 @@ export function normalizeApiKeys(value: unknown): ApiKeys { deepseek: pick(maybe.deepseek), moonshot: pick(maybe.moonshot), brave: pick(maybe.brave), + e2b: pick(maybe.e2b), } } @@ -45,7 +48,8 @@ export function hasAnyApiKey(keys: ApiKeys): boolean { keys.openai || keys.deepseek || keys.moonshot || - keys.brave + keys.brave || + keys.e2b ) } @@ -93,6 +97,7 @@ export function applyApiKeysToEnv( | `DEEPSEEK_API_KEY` | `MOONSHOT_API_KEY` | `BRAVE_SEARCH_API_KEY` + | `E2B_API_KEY` ): void => { const next = value ?? fallback if (next) { @@ -106,6 +111,7 @@ export function applyApiKeysToEnv( resolveSlot(saved.deepseek, launchEnv.deepseek, `DEEPSEEK_API_KEY`) resolveSlot(saved.moonshot, launchEnv.moonshot, `MOONSHOT_API_KEY`) resolveSlot(saved.brave, launchEnv.brave, `BRAVE_SEARCH_API_KEY`) + resolveSlot(saved.e2b, launchEnv.e2b, `E2B_API_KEY`) } export type ApiKeyStatusDeps = { @@ -119,8 +125,8 @@ export async function getApiKeysStatus( deps: ApiKeyStatusDeps ): Promise { const saved = deps.apiKeys - // Brave is optional (falls back to Anthropic built-in search), so it doesn't - // count toward "the app is configured". + // Brave and E2B are optional: search falls back to Anthropic's built-in tool, + // and E2B only enables the remote sandbox profile. const hasAnyKey = Boolean( saved.anthropic || saved.openai || saved.deepseek || saved.moonshot ) @@ -130,6 +136,7 @@ export async function getApiKeysStatus( deepseek: saved.deepseek ? null : deps.launchEnv.deepseek, moonshot: saved.moonshot ? null : deps.launchEnv.moonshot, brave: saved.brave ? null : deps.launchEnv.brave, + e2b: saved.e2b ? null : deps.launchEnv.e2b, } const codex = await deps.getCodexStatus() const modelPicker = createModelPickerStatus({ @@ -167,7 +174,8 @@ export async function setApiKeys( normalized.openai !== deps.apiKeys.openai || normalized.deepseek !== deps.apiKeys.deepseek || normalized.moonshot !== deps.apiKeys.moonshot || - normalized.brave !== deps.apiKeys.brave + normalized.brave !== deps.apiKeys.brave || + normalized.e2b !== deps.apiKeys.e2b Object.assign(deps.apiKeys, normalized) await saveApiKeysToSecret(deps.secretStore, deps.apiKeysRef(), deps.apiKeys) applyApiKeysToEnv(deps.apiKeys, deps.launchEnv, deps.env) diff --git a/packages/agents-desktop/src/shared/types.ts b/packages/agents-desktop/src/shared/types.ts index 51ac96cabb..0ec601f6d6 100644 --- a/packages/agents-desktop/src/shared/types.ts +++ b/packages/agents-desktop/src/shared/types.ts @@ -99,6 +99,7 @@ export type ApiKeys = { deepseek: string | null moonshot: string | null brave: string | null + e2b: string | null } export type ModelPickerChoice = { diff --git a/packages/agents-desktop/vite.config.ts b/packages/agents-desktop/vite.config.ts index ccc65b4d2a..eb491d56df 100644 --- a/packages/agents-desktop/vite.config.ts +++ b/packages/agents-desktop/vite.config.ts @@ -17,6 +17,15 @@ const MUST_EXTERNALIZE = new Set([ `jsdom`, `pino`, `pino-pretty`, + // `inlineDynamicImports` would inline the lazy `dockerode` import (and its + // native `ssh2`/`cpu-features` deps), which rollup can't bundle. Externalize + // the chain: it's an optional runtime dep, gracefully absent otherwise. + `dockerode`, + `ssh2`, + `cpu-features`, + // Same treatment for the lazy `e2b` import behind the remote sandbox + // profile — an optional runtime dep, required from node_modules at runtime. + `e2b`, ]) function externalizeBareImports( diff --git a/packages/agents-runtime/package.json b/packages/agents-runtime/package.json index 95af806e05..898290bdc2 100644 --- a/packages/agents-runtime/package.json +++ b/packages/agents-runtime/package.json @@ -64,10 +64,32 @@ "default": "./dist/tools.cjs" } }, + "./sandbox": { + "import": { + "types": "./dist/sandbox.d.ts", + "default": "./dist/sandbox.js" + }, + "require": { + "types": "./dist/sandbox.d.cts", + "default": "./dist/sandbox.cjs" + } + }, + "./sandbox/docker": { + "import": { + "types": "./dist/sandbox-docker.d.ts", + "default": "./dist/sandbox-docker.js" + }, + "require": { + "types": "./dist/sandbox-docker.d.cts", + "default": "./dist/sandbox-docker.cjs" + } + }, "./package.json": "./package.json" }, "peerDependencies": { "@tanstack/react-db": ">=0.1.78", + "dockerode": ">=5.0.0", + "e2b": ">=2.0.0", "react": ">=18" }, "peerDependenciesMeta": { @@ -76,6 +98,12 @@ }, "@tanstack/react-db": { "optional": true + }, + "dockerode": { + "optional": true + }, + "e2b": { + "optional": true } }, "dependencies": { @@ -102,10 +130,12 @@ }, "devDependencies": { "@durable-streams/server": "^0.3.5", + "@types/dockerode": "^4.0.1", "@types/jsdom": "^27.0.0", "@types/node": "^22.19.15", "@types/turndown": "^5.0.6", "@vitest/coverage-v8": "^3.2.4", + "dockerode": "^5.0.0", "tsdown": "^0.9.0", "typescript": "^5.9.3", "vitest": "^3.2.4" diff --git a/packages/agents-runtime/src/context-factory.ts b/packages/agents-runtime/src/context-factory.ts index 35ba9131fa..0ec31eeba7 100644 --- a/packages/agents-runtime/src/context-factory.ts +++ b/packages/agents-runtime/src/context-factory.ts @@ -16,6 +16,7 @@ import { CACHE_TIERS } from './types' import { composeToolsWithProviders } from './tool-providers' import type { HydratedEventSourceWake } from './event-sources' import type { ChangeEvent } from '@durable-streams/state' +import type { Sandbox } from './sandbox/types' import type { AgentConfig, AgentHandle, @@ -71,6 +72,7 @@ export interface HandlerContextConfig { state: TState actions: Record) => unknown> electricTools: Array + sandbox: Sandbox events: Array writeEvent: (event: ChangeEvent) => void wakeSession: WakeSession @@ -708,6 +710,7 @@ export function createHandlerContext( actions: config.actions, electricTools: config.electricTools, signal: config.runSignal ?? new AbortController().signal, + sandbox: config.sandbox, useAgent(cfg) { agentConfig = cfg return agent diff --git a/packages/agents-runtime/src/create-handler.ts b/packages/agents-runtime/src/create-handler.ts index 012ba7a81e..30373ba6f8 100644 --- a/packages/agents-runtime/src/create-handler.ts +++ b/packages/agents-runtime/src/create-handler.ts @@ -11,6 +11,7 @@ import { passthrough } from './entity-schema' import { runtimeLog } from './log' import { appendPathToUrl } from './url' import { verifyWebhookSignature } from './webhook-signature' +import type { SandboxProfile } from './sandbox/types' import type { EntityRegistry } from './define-entity' import type { IncomingMessage, ServerResponse } from 'node:http' import type { WebhookSignatureVerifierConfig } from './webhook-signature' @@ -105,6 +106,15 @@ export interface RuntimeRouterConfig { onWakeError?: (error: Error) => boolean | void /** Max number of concurrent entity-type registrations (default: 8). */ registrationConcurrency?: number + /** + * Sandbox profiles registered by this runtime. Each profile is a + * `(name, label, description?, factory)` tuple — the factory stays + * local to the runtime; only the descriptive fields are advertised + * to the agents-server (via the runner registration) and surfaced + * in the UI picker. Spawn payloads pass `sandbox.profile` and the + * server validates against the target runner's advertised set. + */ + sandboxProfiles?: ReadonlyArray /** * Public URL of this runtime, forwarded to the agents-server so it can be * included in GET /api/runtimes. If omitted the runtime is registered but @@ -161,6 +171,20 @@ export interface RuntimeRouter { /** Names of all registered entity types */ readonly typeNames: Array + /** + * Wire-shape descriptors for sandbox profiles registered on this + * runtime. Used by the runner registration to advertise the profile + * set to the agents-server (factory closures are intentionally not + * included). + */ + readonly sandboxProfileDescriptors: Array<{ + name: string + label: string + description?: string + /** True for off-host (remote-provider) profiles; see SandboxProfile.remote. */ + remote?: boolean + }> + /** Register all entity types with the durable streams server */ registerTypes: () => Promise } @@ -201,17 +225,31 @@ export function createRuntimeRouter( webhookSignature, } = normalized + const getRegisteredType = (name: string) => + registry ? registry.get(name) : getEntityType(name) + const getRegisteredTypes = () => + registry ? registry.list() : listEntityTypes() + + // Index the runtime's profiles by name. Duplicate names are a + // configuration bug — fail fast rather than silently dropping one. + const sandboxProfiles = new Map() + for (const profile of config.sandboxProfiles ?? []) { + if (sandboxProfiles.has(profile.name)) { + throw new Error( + `[agent-runtime] duplicate sandbox profile name "${profile.name}" registered on createRuntimeRouter` + ) + } + sandboxProfiles.set(profile.name, profile) + } + const wakeConfig: ProcessWakeConfig = { baseUrl, registry, createElectricTools, idleTimeout, heartbeatInterval, + sandboxProfiles, } - const getRegisteredType = (name: string) => - registry ? registry.get(name) : getEntityType(name) - const getRegisteredTypes = () => - registry ? registry.list() : listEntityTypes() const debugRegistrationTiming = process.env.ELECTRIC_AGENTS_DEBUG_REGISTRATION_TIMING === `1` const pendingWakes = new Set>() @@ -541,6 +579,17 @@ export function createRuntimeRouter( } } + const sandboxProfileDescriptors = [...sandboxProfiles.values()].map( + (profile) => ({ + name: profile.name, + label: profile.label, + ...(profile.description !== undefined && { + description: profile.description, + }), + ...(profile.remote !== undefined && { remote: profile.remote }), + }) + ) + return { handleRequest, handleWebhookRequest, @@ -553,6 +602,7 @@ export function createRuntimeRouter( get typeNames() { return getRegisteredTypes().map((entry) => entry.name) }, + sandboxProfileDescriptors, registerTypes, } } @@ -600,6 +650,7 @@ export function createRuntimeHandler( get typeNames() { return router.typeNames }, + sandboxProfileDescriptors: router.sandboxProfileDescriptors, registerTypes: router.registerTypes, } } diff --git a/packages/agents-runtime/src/process-wake.ts b/packages/agents-runtime/src/process-wake.ts index df6d0a11a9..c8b92d3629 100644 --- a/packages/agents-runtime/src/process-wake.ts +++ b/packages/agents-runtime/src/process-wake.ts @@ -9,6 +9,8 @@ import { createHandlerContext } from './context-factory' import { createSetupContext } from './setup-context' import { createEntityLogPrefix, runtimeLog } from './log' import { createRuntimeServerClient } from './runtime-server-client' +import { unrestrictedSandbox } from './sandbox/unrestricted' +import { resolveSandboxIdentity } from './sandbox/identity' import { appendPathToUrl } from './url' import { manifestChildKey } from './manifest-helpers' import { @@ -17,6 +19,8 @@ import { } from './event-sources' import { webhookObservationCollections } from './observation-sources' import type { HydratedEventSourceWake } from './event-sources' +import { SandboxError } from './sandbox/types' +import type { Sandbox } from './sandbox/types' import type { CronObservationSource, EntitiesObservationSource, @@ -34,6 +38,7 @@ import type { ProcessWakeConfig, SendResult, SharedStateSchemaMap, + SpawnSandboxOption, Wake, WakeEvent, WakeMessage, @@ -469,6 +474,17 @@ export async function processWake( let finalError: Error | AggregateError | null = null let shutdownRequested = shutdownSignal?.aborted ?? false let ackCurrentWakeOnFailure = false + // Sandbox is acquired once per wake-session (after entityArgs is known) + // and released/disposed in the outer finally. Lives at function scope so + // both the try and finally can see it. + let sandbox: Sandbox | null = null + // The sandbox identity this wake resolved to (profile + resolved key + + // persistent), captured so the spawn glue can propagate it to an `inherit` + // child as explicit values — a per-wake parent's live key is never stored on + // the entity, so only the running wake can hand it down. + let resolvedSandboxSelection: + | { profile: string; key: string; persistent: boolean } + | undefined // Live event handler — wired after preload, processes child_status + inbox let idleTimer: ReturnType | null = null @@ -483,6 +499,14 @@ export async function processWake( let signalAbortRequested = false let pauseRequested = false let resumeRequested = false + // Set when this wake observes its entity transition to a terminal status + // (SIGKILL ⇒ killed, SIGTERM ⇒ stopped): the entity is gone for good, so its + // sandbox should be reclaimed (wiped) on dispose rather than preserved. A + // mere runner shutdown (the entity lives on) must NOT set this. Seeded from + // the incoming status in case the wake is delivered already-terminal. + let entityTerminated = + notification.entity?.status === `killed` || + notification.entity?.status === `stopped` const pendingSignalHandlers: Array> = [] const secondaryDbs: Array<{ drainPendingWrites?: () => Promise @@ -767,6 +791,7 @@ export async function processWake( case `SIGKILL`: log.info(`SIGKILL received, aborting active run and closing wake`) signalAbortRequested = true + entityTerminated = true runAbortController?.abort() requestShutdown() if (!alreadyHandled) { @@ -793,6 +818,7 @@ export async function processWake( case `SIGTERM`: log.info(`SIGTERM received, closing wake after cleanup`) requestShutdown() + entityTerminated = true invokeSignalHandler(value, (outcome) => { markSignalHandled( event, @@ -1137,6 +1163,63 @@ export async function processWake( const entityArgs = Object.freeze(notification.entity?.spawnArgs ?? {}) + // Sandbox is a per-runner concern: profiles live on the runner's + // advertisement (validated server-side at spawn time). The + // wake-time job is just to look up the chosen profile by name. + // When no profile was picked at spawn we fall back to an + // in-process unrestricted sandbox at the host's cwd — matches the + // pre-profiles default and keeps tests/dev simple. + const sandboxConfig = notification.entity?.sandbox + const requestedProfileName = sandboxConfig?.profile + if (requestedProfileName) { + const profile = config.sandboxProfiles?.get(requestedProfileName) + if (!profile) { + // Validated against the runner's advertisement at spawn time, so this + // is normally a transient mismatch — the runner re-registered (dropping + // the profile) between spawn and this wake. The wake is rejected before + // it's acked, so the server reclaims it on timeout and redrives onto a + // runner that still advertises the profile; failure is isolated to this + // one wake (see dispatchWake) and does not take the runner down. + throw new SandboxError( + `unavailable`, + `[agent-runtime] sandbox profile "${requestedProfileName}" requested for entity "${entityUrl}" is not currently registered on this runtime (likely a transient runner re-registration race; the wake will be redriven). Available profiles: ${[...(config.sandboxProfiles?.keys() ?? [])].join(`, `) || `(none)`}.` + ) + } + // Resolve identity (explicit / per-entity / per-wake key), durability + // (`persistent`), and ownership (`owner`) here, upstream of the provider. + // The provider only ever sees a resolved key + persistent + owner — + // "full isolation" is purely a unique per-wake key, never a separate path. + const resolved = resolveSandboxIdentity( + { + key: sandboxConfig.key, + scope: sandboxConfig.scope, + persistent: sandboxConfig.persistent, + owner: sandboxConfig.owner, + }, + { entityUrl, wakeId } + ) + resolvedSandboxSelection = { + profile: requestedProfileName, + key: resolved.sandboxKey, + persistent: resolved.persistent, + } + sandbox = await profile.factory({ + sandboxKey: resolved.sandboxKey, + persistent: resolved.persistent, + owner: resolved.owner, + entityUrl, + entityType: typeName, + args: entityArgs, + }) + } else { + sandbox = await unrestrictedSandbox({ workingDirectory: process.cwd() }) + } + // Record which sandbox each wake actually resolved to — the isolation + // boundary is security-relevant, so keep it legible in the runtime log. + log.info( + `[sandbox] entity=${entityUrl} requested=${requestedProfileName ?? `(none)`} resolved=${sandbox.name} cwd=${sandbox.workingDirectory}` + ) + // ---- Send executor — ctx.send() calls this directly (no queue) ---- const executeSend = (send: { targetUrl: string @@ -1183,6 +1266,7 @@ export async function processWake( initialMessage?: unknown wake?: Wake tags?: Record + sandbox?: SpawnSandboxOption } ): Promise<{ entityUrl: string; streamPath: string }> => { const wakeOpt = opts?.wake @@ -1209,6 +1293,29 @@ export async function processWake( manifestKey: manifestChildKey(childType, childId), } : undefined + // `inherit` means "adopt this wake's RESOLVED sandbox" (profile + + // resolved key + persistent). We send them as EXPLICIT values rather + // than `inherit: true` so a per-wake parent's live key — which is never + // stored on the entity — propagates to the child; the child then shares + // this wake's exact container/workspace. If this wake has no sandbox, + // inherit gracefully yields none. + const requestedInherit = + opts?.sandbox === `inherit` || + (typeof opts?.sandbox === `object` && opts.sandbox.inherit === true) + const sandbox = requestedInherit + ? resolvedSandboxSelection + ? { + profile: resolvedSandboxSelection.profile, + key: resolvedSandboxSelection.key, + persistent: resolvedSandboxSelection.persistent, + // The child ATTACHES to this wake's sandbox — it never owns or + // tears down the owner's container/workspace. + owner: false, + } + : undefined + : opts?.sandbox === `inherit` + ? undefined + : opts?.sandbox return serverClient.spawnEntity({ type: childType, id: childId, @@ -1216,6 +1323,7 @@ export async function processWake( parentUrl, initialMessage: opts?.initialMessage, tags: opts?.tags, + sandbox, wake: wakeOpt, }) }, @@ -1896,6 +2004,10 @@ export async function processWake( events: currentWakeEvents, actions: setupCtx.actions, electricTools, + // Non-null at this point: the sandbox was acquired earlier in + // this try block (after entityArgs); TS narrowing doesn't survive + // the surrounding for-loop, so assert. + sandbox: sandbox!, writeEvent, wakeSession, wakeEvent: currentWakeEvent, @@ -2146,6 +2258,16 @@ export async function processWake( } } db.close() + if (sandbox) { + try { + // When the entity reached a terminal status this wake, reclaim (wipe) + // its sandbox; otherwise release the lease and let the owner's idle + // policy (stop/remove) apply. Attacher leases ignore `reclaim`. + await sandbox.dispose({ reclaim: entityTerminated }) + } catch (err) { + cleanupErrors.push(toError(err)) + } + } if (claimedWake) { log.info( doneOffset === `-1` diff --git a/packages/agents-runtime/src/runtime-server-client.ts b/packages/agents-runtime/src/runtime-server-client.ts index 3061fad26c..b4ac4cdcc7 100644 --- a/packages/agents-runtime/src/runtime-server-client.ts +++ b/packages/agents-runtime/src/runtime-server-client.ts @@ -49,6 +49,18 @@ export interface SpawnEntityOptions { parentUrl?: string initialMessage?: unknown tags?: Record + /** + * Sandbox selection — a `profile` with optional `scope` / `persistent`, an + * explicit shared `key`, or `inherit` the parent's resolved sandbox. + */ + sandbox?: { + profile?: string + key?: string + scope?: `entity` | `wake` + persistent?: boolean + owner?: boolean + inherit?: boolean + } dispatch_policy?: DispatchPolicy wake?: { subscriberUrl: string @@ -385,6 +397,7 @@ export function createRuntimeServerClient( parentUrl, initialMessage, tags, + sandbox, dispatch_policy, wake, }: SpawnEntityOptions): Promise => { @@ -393,6 +406,7 @@ export function createRuntimeServerClient( if (parentUrl !== undefined) body.parent = parentUrl if (initialMessage !== undefined) body.initialMessage = initialMessage if (tags && Object.keys(tags).length > 0) body.tags = tags + if (sandbox !== undefined) body.sandbox = sandbox if (dispatch_policy !== undefined) body.dispatch_policy = dispatch_policy if (wake !== undefined) body.wake = wake diff --git a/packages/agents-runtime/src/sandbox-docker.ts b/packages/agents-runtime/src/sandbox-docker.ts new file mode 100644 index 0000000000..c1c39dc6cd --- /dev/null +++ b/packages/agents-runtime/src/sandbox-docker.ts @@ -0,0 +1,16 @@ +/** + * Docker sandbox provider as a separate subpath export so callers that + * only need the in-process `unrestrictedSandbox` (e.g. desktop renderers + * bundled by Vite) don't pull `dockerode` and its native dependencies + * (`cpufeatures.node`, etc.) into their bundle. Import from + * `@electric-ax/agents-runtime/sandbox/docker` only when actually using + * the docker provider. + */ + +export { + dockerSandbox, + sweepOrphanedDockerSandboxes, + __resetPersistentRegistryForTests, +} from './sandbox/docker' +export type { DockerSandboxOpts } from './sandbox/docker' +export { isDockerAvailable } from './sandbox/docker/loader' diff --git a/packages/agents-runtime/src/sandbox.ts b/packages/agents-runtime/src/sandbox.ts new file mode 100644 index 0000000000..9e1bc7ffe4 --- /dev/null +++ b/packages/agents-runtime/src/sandbox.ts @@ -0,0 +1,23 @@ +export { KNOWN_ADAPTERS } from './sandbox/types' +export type { KnownAdapter } from './sandbox/types' + +export { unrestrictedSandbox } from './sandbox/unrestricted' +export type { UnrestrictedSandboxOpts } from './sandbox/unrestricted' +export { remoteSandbox } from './sandbox/remote' +export type { RemoteProvider, RemoteSandboxOpts } from './sandbox/remote' +export type { RemoteSandboxClient } from './sandbox/remote/types' +export { isE2BAvailable } from './sandbox/remote/e2b' +export { chooseDefaultSandbox } from './sandbox/default' +export { SandboxError } from './sandbox/types' +export type { + Sandbox, + SandboxExecOpts, + SandboxExecResult, + SandboxFactory, + SandboxFactoryParams, + SandboxProfile, + DirEntry, + FileStat, + NetworkPolicy, + SandboxErrorKind, +} from './sandbox/types' diff --git a/packages/agents-runtime/src/sandbox/default.ts b/packages/agents-runtime/src/sandbox/default.ts new file mode 100644 index 0000000000..80ecef70b7 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/default.ts @@ -0,0 +1,21 @@ +import { unrestrictedSandbox } from './unrestricted' +import type { Sandbox } from './types' + +/** + * Pick the default Sandbox provider for built-in entities (Horton, Worker). + * Always returns `unrestrictedSandbox`; stronger isolation is opt-in by + * constructing `dockerSandbox` or `remoteSandbox` directly. + * + * The unrestricted provider shares the host filesystem and process namespace, + * so it is a single-tenant, trusted-code default — NOT a containment boundary. + * Tool-layer policy shrinks the blast radius (workspace + symlink-escape + * containment on reads/writes; bash drops host env so secrets aren't trivially + * dumped) but cannot stop host-level access (e.g. reading `/proc//environ` + * for secrets) or SSRF from `fetch_url`. Use `dockerSandbox`/`remoteSandbox` to + * actually contain untrusted or multi-tenant entities. + */ +export async function chooseDefaultSandbox( + workingDirectory: string +): Promise { + return unrestrictedSandbox({ workingDirectory }) +} diff --git a/packages/agents-runtime/src/sandbox/docker.ts b/packages/agents-runtime/src/sandbox/docker.ts new file mode 100644 index 0000000000..b9ff139a11 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/docker.ts @@ -0,0 +1,1197 @@ +import { PassThrough } from 'node:stream' +import { realpathSync } from 'node:fs' +import { createHash, randomUUID } from 'node:crypto' +import { fetchInSandbox } from './exec-fetch' +import { + SandboxError, + type DirEntry, + type FileStat, + type NetworkPolicy, + type Sandbox, + type SandboxExecOpts, + type SandboxExecResult, +} from './types' +import { + loadDockerode, + type Dockerode, + type DockerodeContainer, +} from './docker/loader' +import { + getFile, + makeDir, + pathExists, + putFile, + readDir, + removePath, + statPath, +} from './docker/fs' +import { hostAllowedByPolicy, isPrivateOrLinkLocal } from './docker/net-policy' +import { sandboxWipesOnDispose } from './identity' +import { absoluteSandboxPath, isPathWithinSandbox } from './path-containment' + +export interface DockerSandboxOpts { + /** Absolute path inside the container (NOT a host path). Default `/work`. */ + readonly workingDirectory?: string + /** + * Docker image. By default we pin a known small image; callers can override + * to bake in tooling but must supply a digest pin unless `allowFloatingTag` + * is set, to keep images reproducible across machines. + */ + readonly image?: string + readonly allowFloatingTag?: boolean + readonly env?: Readonly> + readonly initialNetworkPolicy?: NetworkPolicy + readonly resources?: { + readonly memoryBytes?: number + readonly cpus?: number + readonly pidsLimit?: number + } + /** `'runc'` (default, broad compat) or `'runsc'` (gVisor, hardened). */ + readonly runtime?: `runc` | `runsc` + /** + * Container ports to publish to the host (bound to loopback). The host port + * mapping can be read back via `docker inspect`; the sandbox does not expose + * a URL-lookup primitive. Requires a network policy that grants egress + * (deny-all gives the container no interface to bind). + */ + readonly exposedPorts?: ReadonlyArray + readonly extraMounts?: ReadonlyArray<{ + readonly hostPath: string + readonly containerPath: string + /** + * Defaults to `true`. Set `false` to bind read-write (e.g. when the + * caller wants the entity to write to the host's working directory). + * The docker-socket safety check still applies regardless. + */ + readonly readOnly?: boolean + }> + readonly dockerSocket?: string + readonly labels?: Readonly> + /** + * Stable identity for the container. The container is named deterministically + * from this key and reattached to (rather than recreated) while it's alive, + * so callers — sibling wakes, collaborators, an inheriting subagent — that + * resolve to the same key converge on one container and filesystem. Resolved + * upstream (per-entity URL, per-wake `url#wakeId`, or an explicit shared key). + * When omitted (direct/test callers) a random key is synthesized so a + * one-off still flows through the single registry path. + */ + readonly sandboxKey?: string + /** + * Idle-teardown durability. `true` ⇒ STOP the container when idle (its + * writable layer survives, so a later acquire restarts it with the + * filesystem intact); `false` (default) ⇒ REMOVE it when idle (wiped). Either + * way the container is named-by-key and reattachable while alive — this flag + * only selects what the debounced idle teardown does. + */ + readonly persistent?: boolean + /** + * Ownership of the keyed container. `true` (default) ⇒ OWNER: create the + * container if it's absent, and this lease's teardown governs the container's + * lifecycle (idle ⇒ stop/remove per `persistent`; `dispose({reclaim})` ⇒ + * wipe). `false` ⇒ ATTACHER: reattach to an already-live container with this + * `sandboxKey` and reject with `SandboxError('unavailable')` if none exists (it + * never creates a fresh, empty one); disposing only releases the lease and + * never tears the owner's container down. + */ + readonly owner?: boolean + /** + * How long the container is kept alive after its last lease disposes before + * the idle teardown (stop or remove) runs. A re-acquire within this window (a + * sibling wake, an inheriting subagent, ongoing collaboration) cancels the + * teardown, so the container — and any dev server running in it — survives + * active use; for an ephemeral container it's the window in which in-flight + * collaborators can still reattach before it's wiped. + * Defaults to {@link DEFAULT_IDLE_GRACE_MS}. + */ + readonly sharedIdleGraceMs?: number + /** + * Observability only — never affects identity or reattach. The entity type + * that spawned this sandbox (e.g. `horton`); recorded as a label. + */ + readonly entityType?: string + /** + * Observability only. The entity URL this wake belongs to; recorded as a + * label so `docker ps` shows what it belongs to. The container *name* is + * always derived from `sandboxKey` (so callers that resolve to the same key + * converge on one container), not from this — collaborators may differ. + */ + readonly entityUrl?: string + /** + * If true (default), pulls the image when it's not present locally. Set + * to false in CI where you'd rather fail fast and pre-pull. + */ + readonly pullIfMissing?: boolean + /** Optional progress callback during image pull. */ + readonly onPullProgress?: (event: unknown) => void +} + +/** + * Default image: small Node-capable alpine variant pinned by digest. We + * deliberately don't ship a custom image — operators can override. + * + * Update procedure: pull the latest node:20-alpine, run `docker inspect + * --format='{{index .RepoDigests 0}}' node:20-alpine`, paste here. + */ +const DEFAULT_IMAGE = `node:20-alpine@sha256:fb4cd12c85ee03686f6af5362a0b0d56d50c58a04632e6c0fb8363f609372293` +// The digest above tracks node:20-alpine at branch-build time and works +// across linux/amd64 and linux/arm64 (it's the manifest list digest). +// Override via DockerSandboxOpts.image to pin to a different version / +// pre-provisioned image. + +/** Marks every container this module creates. */ +const SANDBOX_LABEL = `com.electric.sandbox` +/** The container's resolved identity key (always set; see `sandboxKey`). */ +const SANDBOX_KEY_LABEL = `com.electric.sandbox.key` +/** Entity type that spawned the sandbox (observability). */ +const SANDBOX_ENTITY_TYPE_LABEL = `com.electric.sandbox.entity-type` +/** Entity URL the spawning wake belonged to (observability). */ +const SANDBOX_ENTITY_LABEL = `com.electric.sandbox.entity` +/** + * `'true'` when the OWNER created the container as persistent. Read by the boot + * sweep so a restarted process preserves persistent workspaces (to reattach by + * key) and only reclaims ephemeral leftovers. + */ +const SANDBOX_PERSISTENT_LABEL = `com.electric.sandbox.persistent` + +/** Common prefix for every container name this module assigns. */ +const NAME_PREFIX = `electric-sbx` + +/** Default warm window before an idle container is torn down (stop/remove). */ +const DEFAULT_IDLE_GRACE_MS = 2 * 60 * 1000 + +/** + * Process-local registry of live sandbox containers, keyed by container name. + * One entry per container; every lease (owner or attacher) gets its own + * `DockerSandbox` wrapper but they all share this entry. `refs` counts the live + * leases so the container isn't torn down while a sibling lease still uses it. + * When the last lease disposes, `idleTimer` schedules a debounced teardown that + * a re-acquire within the grace cancels (see {@link scheduleIdleTeardown}). + * + * `persistent` (set by the OWNER at creation) records the idle action — STOP + * (preserve) vs REMOVE (wipe). `reclaim` is set when an OWNER lease disposes + * with `{reclaim:true}` (its entity went terminal): the container is then wiped + * once the last lease drains, regardless of `persistent`. `container` is + * retained so the timer can act after all wrappers are gone. + */ +interface ContainerEntry { + refs: number + container: DockerodeContainer + persistent: boolean + idleGraceMs: number + reclaim?: boolean + idleTimer?: ReturnType +} +const sandboxContainers = new Map() + +/** + * Per-key serialization. Acquire (reattach + register) and the debounced idle + * teardown run under this lock so a re-acquire can never interleave with a + * teardown that's already in flight — the single synchronization point that + * keeps the lifecycle race-free without any background sweeper. + */ +const keyLocks = new Map>() +function withKeyLock(key: string, fn: () => Promise): Promise { + const prev = keyLocks.get(key) ?? Promise.resolve() + const run = prev.then(fn, fn) + // Swallow errors on the stored tail so one failure doesn't poison the lock + // for later callers; `run` itself still rejects to the current caller. + keyLocks.set( + key, + run.then( + () => undefined, + () => undefined + ) + ) + return run +} + +/** Lowercase, DNS-safe slug from an arbitrary identity string (≤24 chars). */ +function slugify(s: string): string { + const slug = s + .toLowerCase() + .replace(/[^a-z0-9]+/g, `-`) + .replace(/^-+/, ``) + .slice(0, 24) + .replace(/-+$/, ``) + return slug || `sbx` +} + +/** + * Deterministic, DNS-safe name for a container. Derived *only* from the + * resolved key so every caller that resolves to the same key — whatever its + * entity type — computes the same string and converges on one container (the + * create-race dedupes on the name). A readable slug of the key is prefixed for + * `docker ps` legibility; the trailing hash guarantees uniqueness when two keys + * slugify alike. + */ +function containerNameForKey(sandboxKey: string): string { + const hash = createHash(`sha256`) + .update(sandboxKey) + .digest(`hex`) + .slice(0, 12) + return `${NAME_PREFIX}-${slugify(sandboxKey)}-${hash}` +} + +/** + * Schedule a debounced teardown of a now-idle container. Runs under the per-key + * lock so it can't race a concurrent re-acquire; re-checks `refs` (a lease that + * returned during the grace bumps it back above zero) before acting. A `'stop'` + * is non-destructive — the writable layer survives and the next acquire + * restarts it via `reattachOrCreate`; a `'remove'` wipes it. Either way the + * registry entry is dropped afterwards so a later acquire rebuilds it. A + * `graceMs` of 0 makes this an immediate (but still lock-serialized) teardown, + * used for owner reclaim. + */ +function scheduleIdleTeardown( + name: string, + graceMs: number, + action: `stop` | `remove` +): ReturnType { + const timer = setTimeout(() => { + void withKeyLock(name, async () => { + const entry = sandboxContainers.get(name) + if (!entry || entry.refs > 0) return // re-acquired during the grace + try { + if (action === `stop`) { + // `t: 0` → straight to SIGKILL. PID 1 (`sh`) ignores SIGTERM, so a + // graceful stop would just wait the full timeout, and the container + // holds no state outside its (preserved) filesystem. + await entry.container.stop({ t: 0 }) + } else { + await entry.container.remove({ force: true, v: true }) + } + } catch { + /* already stopped / removed / gone */ + } + sandboxContainers.delete(name) + }) + }, graceMs) + timer.unref?.() + return timer +} + +export async function dockerSandbox( + opts: DockerSandboxOpts = {} +): Promise { + const Docker = await loadDockerode() + const docker: Dockerode = opts.dockerSocket + ? new Docker({ socketPath: opts.dockerSocket }) + : new Docker() + + // Probe the daemon so we surface "unavailable" cleanly instead of a + // dockerode error deep in createContainer. + try { + await Promise.race([ + docker.ping(), + new Promise((_, rej) => + setTimeout(() => rej(new Error(`docker ping timeout`)), 2000) + ), + ]) + } catch (err) { + throw new SandboxError( + `unavailable`, + `dockerSandbox: cannot reach the Docker daemon (${ + err instanceof Error ? err.message : String(err) + }). Is Docker Desktop / OrbStack running?` + ) + } + + // Single lifecycle path: every container is named-by-key, registered, and + // refcount + debounce managed. `persistent` only selects the idle-teardown + // action (stop vs remove); `owner` gates creation. A direct caller that omits + // `sandboxKey` gets a synthesized one so a one-off still flows through the path. + const sandboxKey = opts.sandboxKey ?? randomUUID() + const persistent = opts.persistent === true + const owner = opts.owner !== false + // Named deterministically from the resolved key so callers that resolve to + // the same key converge on one container; the entity is recorded in labels. + const containerName = containerNameForKey(sandboxKey) + const idleGraceMs = opts.sharedIdleGraceMs ?? DEFAULT_IDLE_GRACE_MS + + const containerCwd = opts.workingDirectory ?? `/work` + if (!containerCwd.startsWith(`/`)) { + throw new SandboxError( + `runtime`, + `dockerSandbox.workingDirectory must be an absolute container path, got "${containerCwd}"` + ) + } + + const initialPolicy: NetworkPolicy = opts.initialNetworkPolicy ?? { + mode: `deny-all`, + } + + // Per-lease wrapper over the shared registry entry. Each acquire (owner or + // attacher) gets its own wrapper carrying its own `owner` flag, so an attacher + // disposing can never reclaim the owner's container. + const buildWrapper = (container: DockerodeContainer): DockerSandbox => + new DockerSandbox({ + container, + containerCwd, + policy: initialPolicy, + runtime: opts.runtime ?? `runc`, + containerName, + owner, + }) + + // Fast path: a concurrent sibling lease in this process already holds the + // container live (refs > 0 ⇒ running, no pending idle teardown). Bump the + // refcount and hand back a fresh wrapper over the shared container. + { + const entry = sandboxContainers.get(containerName) + if (entry && entry.refs > 0) { + entry.refs += 1 + return buildWrapper(entry.container) + } + } + + // Network is granted purely by the egress policy. deny-all → no interface + // at all (NetworkMode=none), which is the hard isolation guarantee. Any + // other policy → bridge: the container issues requests directly, and the + // allowlist is enforced host-side in `fetch()` rather than via an + // in-container proxy. + const networkMode = + initialPolicy.mode === `deny-all` ? (`none` as const) : (`bridge` as const) + + // Publishing a port requires a network interface, which deny-all + // (NetworkMode=none) doesn't have — surface the contradiction up front + // rather than failing later when the binding turns out to be absent. + if (networkMode === `none` && (opts.exposedPorts?.length ?? 0) > 0) { + throw new SandboxError( + `runtime`, + `dockerSandbox: exposedPorts requires a network policy that grants egress; deny-all gives the container no network interface.` + ) + } + + const baseEnv: Record = { + HOME: `/work`, + ...opts.env, + } + + const portBindings = makePortBindings(opts.exposedPorts ?? []) + const exposedPorts = makeExposedPortsObject(opts.exposedPorts ?? []) + + const memoryBytes = opts.resources?.memoryBytes ?? 2 * 1024 * 1024 * 1024 + const nanoCpus = Math.floor((opts.resources?.cpus ?? 2) * 1_000_000_000) + const pidsLimit = opts.resources?.pidsLimit ?? 1024 + + // Hardened HostConfig — caller cannot override (no surface area). + // NB: ReadonlyRootfs is *not* enabled by default because dockerode's + // putArchive (the primitive backing writeFile / mkdir / readFile) + // operates at the storage-driver layer, which Docker treats as a rootfs + // write and rejects when the rootfs is RO — even when the target path + // is a tmpfs / volume mount. The remaining flags below are the load- + // bearing hardening: drop all caps, no new privileges, no docker socket, + // strict ulimits, resource caps. Operators who want RO rootfs should also + // stop using sandbox.writeFile / mkdir and do all writes via sandbox.exec + // (echo > /work/...) which goes through the container's own mount namespace + // and respects the tmpfs. + // + // No AutoRemove: every container lingers for the idle grace after its last + // lease (so an in-window collaborator can reattach) and is then torn down by + // the debounced teardown — STOP if persistent, REMOVE if ephemeral. Crash + // leftovers are reclaimed by the boot sweep, not AutoRemove. + const HostConfig = { + Tmpfs: { + '/tmp': `rw,size=64m,mode=1777`, + }, + CapDrop: [`ALL`], + CapAdd: [], + SecurityOpt: [`no-new-privileges:true`], + Privileged: false, + PidsLimit: pidsLimit, + Memory: memoryBytes, + MemorySwap: memoryBytes, // disables swap + NanoCpus: nanoCpus, + NetworkMode: networkMode, + PortBindings: portBindings, + Runtime: opts.runtime === `runsc` ? `runsc` : undefined, + Binds: makeBinds(opts.extraMounts), + Ulimits: [ + { Name: `nofile`, Soft: 1024, Hard: 2048 }, + { Name: `nproc`, Soft: 1024, Hard: 1024 }, + ], + IpcMode: `none`, + } + + const labels: Record = { + [SANDBOX_LABEL]: `1`, + [SANDBOX_KEY_LABEL]: sandboxKey, + [SANDBOX_PERSISTENT_LABEL]: persistent ? `true` : `false`, + ...(opts.entityType + ? { [SANDBOX_ENTITY_TYPE_LABEL]: opts.entityType } + : {}), + ...(opts.entityUrl ? { [SANDBOX_ENTITY_LABEL]: opts.entityUrl } : {}), + ...(opts.labels ?? {}), + } + + // Create-and-start a fresh container. Image is pulled here only (skipped + // entirely when reattaching to an existing container). + const createStarted = async (): Promise => { + const image = resolveImage(opts) + await ensureImage(docker, image, opts) + const c = await docker.createContainer({ + // Spread (rather than a literal `name:`) so the `name` query param — + // which dockerode accepts but doesn't declare on its create-opts type — + // doesn't trip excess-property checking. + ...{ name: containerName }, + Image: image, + Cmd: [`sh`, `-c`, `while true; do sleep 3600; done`], + WorkingDir: containerCwd, + Env: Object.entries(baseEnv).map(([k, v]) => `${k}=${v}`), + Labels: labels, + ExposedPorts: exposedPorts, + HostConfig, + }) + try { + await c.start() + } catch (err) { + await c.remove({ force: true, v: true }).catch(() => {}) + throw new SandboxError( + `runtime`, + `dockerSandbox: container start failed: ${ + err instanceof Error ? err.message : String(err) + }` + ) + } + // Tmpfs on `/work` is empty at start; ensure caller-supplied workingDir + // exists with sensible perms. + await runOneOff(c, [`mkdir`, `-p`, containerCwd]) + return c + } + + // Serialize reattach + registration against the debounced idle teardown + // (which holds the same per-key lock), so a re-acquire can't race a teardown + // that's already in flight. + return withKeyLock(containerName, async () => { + const live = sandboxContainers.get(containerName) + if (live) { + // An entry exists. If a teardown was pending (refs hit 0 during the + // grace) cancel it and re-lease the still-live container. + if (live.idleTimer) { + clearTimeout(live.idleTimer) + live.idleTimer = undefined + } + live.refs += 1 + return buildWrapper(live.container) + } + // No entry: an owner creates-or-reattaches; an attacher may only reattach to + // an already-live container and never creates a fresh, empty one. + const container = owner + ? await reattachOrCreate(docker, containerName, createStarted) + : await reattachExisting(docker, containerName) + // The entry's `persistent` is the OWNER's idle intent. An attacher that + // builds the entry (reattaching a container the owner left between wakes) + // forces `persistent: true` so it can never wipe the owner's filesystem. + sandboxContainers.set(containerName, { + refs: 1, + container, + persistent: owner ? persistent : true, + idleGraceMs, + }) + return buildWrapper(container) + }) +} + +/** + * Resolve a container by name: reattach to an existing one (starting it if a + * persistent container had been stopped) or create it fresh. Handles the race + * where a concurrent caller creates the named container first (409). + */ +async function reattachOrCreate( + docker: Dockerode, + name: string, + createStarted: () => Promise +): Promise { + const existing = docker.getContainer(name) + let running: boolean | null = null + try { + running = (await existing.inspect()).State.Running + } catch { + running = null + } + if (running !== null) { + if (!running) { + // A persistent container that was STOPPED on idle: its writable layer + // (and thus the filesystem) survives, so restarting resumes the keepalive + // and exec/fs round-trips work again. + await existing.start().catch(() => {}) + } + return existing + } + try { + return await createStarted() + } catch (err) { + // Lost a create race — another lease just made it. Attach to theirs. + if (isNameConflict(err)) { + const c = docker.getContainer(name) + await c.start().catch(() => {}) + return c + } + throw err + } +} + +/** + * Reattach to an existing container WITHOUT creating one — the attacher path. + * A non-owner can only ever join an already-live owner container; if none + * exists (the owner never created it, or it was already torn down) we reject + * with `unavailable` rather than conjuring a fresh, empty sandbox under the + * shared key. A stopped (idle-preserved) owner container is restarted. + */ +async function reattachExisting( + docker: Dockerode, + name: string +): Promise { + const existing = docker.getContainer(name) + let running: boolean | null = null + try { + running = (await existing.inspect()).State.Running + } catch { + running = null + } + if (running === null) { + throw new SandboxError( + `unavailable`, + `dockerSandbox: cannot attach — no owner sandbox is live for this key (container "${name}" does not exist). The owning entity must create it first.` + ) + } + if (!running) await existing.start().catch(() => {}) + return existing +} + +function isNameConflict(err: unknown): boolean { + const status = (err as { statusCode?: number; status?: number })?.statusCode + if (status === 409) return true + return /already in use|Conflict/i.test( + err instanceof Error ? err.message : String(err) + ) +} + +/** + * One-shot startup cleanup of *ephemeral* sandbox leftovers from a previous + * process (a crash or restart before disposes ran). Call once at runner boot. + * + * Two containers are deliberately left untouched: + * - RUNNING containers — they may belong to a live sibling runner (or a + * concurrent test run) sharing this Docker daemon; force-removing those + * would wipe another process's in-use sandbox. Reboot/crash leftovers are + * `Exited` once the daemon restarts, so the common case is still reclaimed; + * a still-running ephemeral orphan is left for a manual labelled prune + * rather than risk a live peer. + * - PERSISTENT containers — `persistent: true` exists precisely so a restarted + * process can reattach to the warm workspace by key, so they must survive a + * boot (only ephemeral leftovers are reclaimed here; a manual labelled prune + * reclaims truly-abandoned persistent ones). + * + * Returns the names removed. + */ +export async function sweepOrphanedDockerSandboxes(opts?: { + dockerSocket?: string +}): Promise> { + const Docker = await loadDockerode() + const docker: Dockerode = opts?.dockerSocket + ? new Docker({ socketPath: opts.dockerSocket }) + : new Docker() + + let listed: ReadonlyArray<{ + Id: string + Names?: ReadonlyArray + State?: string + Labels?: Record + }> = [] + try { + listed = await docker.listContainers({ + all: true, + filters: { label: [SANDBOX_LABEL] }, + }) + } catch { + return [] + } + + const removed: Array = [] + for (const c of listed) { + const name = c.Names?.[0]?.replace(/^\//, ``) ?? c.Id + // Never touch a running container (possibly a live peer) or a persistent + // one (meant to be reattached by key). See the doc comment above. + if (c.State === `running`) continue + if (c.Labels?.[SANDBOX_PERSISTENT_LABEL] === `true`) continue + try { + await docker.getContainer(c.Id).remove({ force: true, v: true }) + removed.push(name) + } catch { + /* already gone */ + } + } + return removed +} + +/** Test-only: drop the in-process container registry bookkeeping. */ +export function __resetPersistentRegistryForTests(): void { + for (const entry of sandboxContainers.values()) { + if (entry.idleTimer) clearTimeout(entry.idleTimer) + } + sandboxContainers.clear() + keyLocks.clear() +} + +function resolveImage(opts: DockerSandboxOpts): string { + const image = opts.image ?? DEFAULT_IMAGE + if (!opts.allowFloatingTag && !image.includes(`@sha256:`)) { + throw new SandboxError( + `runtime`, + `dockerSandbox: image "${image}" lacks a digest pin. Either supply a digest (\`image@sha256:...\`) or pass allowFloatingTag: true.` + ) + } + return image +} + +async function ensureImage( + docker: Dockerode, + image: string, + opts: DockerSandboxOpts +): Promise { + // Best-effort: try the daemon's inspection by relying on createContainer + // to surface the missing image as a 404. To keep the first-run experience + // smooth on dev machines, we proactively pull when allowed. + if (opts.pullIfMissing === false) return + // dockerode's `pull` is idempotent; the daemon dedupes by digest. + const stream = await docker.pull(image) + await new Promise((resolve, reject) => { + docker.modem.followProgress( + stream, + (err) => (err ? reject(err) : resolve()), + opts.onPullProgress + ) + }) +} + +function makePortBindings( + ports: ReadonlyArray +): Record> { + const out: Record< + string, + ReadonlyArray<{ HostIp?: string; HostPort?: string }> + > = {} + for (const p of ports) { + // Bind to loopback only — on a dev laptop `0.0.0.0` would expose the + // sandboxed service across the LAN, which is unexpected for an + // isolation primitive. + out[`${p}/tcp`] = [{ HostIp: `127.0.0.1`, HostPort: `` }] + } + return out +} + +function makeExposedPortsObject( + ports: ReadonlyArray +): Record> { + const out: Record> = {} + for (const p of ports) out[`${p}/tcp`] = {} + return out +} + +function makeBinds( + mounts: DockerSandboxOpts[`extraMounts`] +): ReadonlyArray { + if (!mounts || mounts.length === 0) return [] + const isDockerSock = (p: string): boolean => /docker\.sock(?:[/]|$)/.test(p) + return mounts.map((m) => { + // Check the literal path *and* its realpath: a symlink like + // `/tmp/innocent -> /var/run/docker.sock` passes the string match but + // resolves to the socket, handing the container an instant escape + // primitive. realpath throws ENOENT for a not-yet-created mount path — + // which can't be a symlink right now, so the literal check below stands. + // + // This is best-effort defense-in-depth: `extraMounts` is operator-supplied + // config (not agent-controlled), and the resolved path isn't pinned — we + // hand docker the literal `hostPath`, which the daemon re-resolves at mount + // time. So if the path is materialized as a symlink to the socket in the + // window between here and createContainer, safety rests on docker's own + // resolution, not on this check. Negligible in practice: the spec is built + // and consumed synchronously, and exploiting it needs host write access. + let resolved = m.hostPath + try { + resolved = realpathSync(m.hostPath) + } catch { + // Path doesn't exist yet; docker would create it as an empty dir. + } + if (isDockerSock(m.hostPath) || isDockerSock(resolved)) { + const via = resolved !== m.hostPath ? ` (resolves to "${resolved}")` : `` + throw new SandboxError( + `policy`, + `dockerSandbox: refusing to mount Docker socket "${m.hostPath}"${via} — that would let sandboxed code create new containers and escape.` + ) + } + const readOnly = m.readOnly !== false + return `${m.hostPath}:${m.containerPath}:${readOnly ? `ro` : `rw`}` + }) +} + +interface RunOneOffResult { + exitCode: number | null + stdout: Buffer + stderr: Buffer +} + +/** + * Read an exec's *final* exit code. The Docker daemon can still report + * `Running: true` / `ExitCode: null` for a brief window after the output + * stream closes but before the exec task is reaped, so a single `inspect()` + * right after the stream ends intermittently yields a null exit code for a + * command that exited cleanly. Poll until the exec is no longer running + * (bounded) instead of trusting the first read. Returns `null` only if + * `inspect()` throws (e.g. the container vanished). + */ +async function reapExec( + ex: { inspect: () => Promise<{ ExitCode: number | null; Running: boolean }> }, + opts: { timeoutMs?: number; intervalMs?: number } = {} +): Promise<{ ExitCode: number | null; Running: boolean } | null> { + const deadline = Date.now() + (opts.timeoutMs ?? 2000) + const intervalMs = opts.intervalMs ?? 20 + for (;;) { + let info: { ExitCode: number | null; Running: boolean } + try { + info = await ex.inspect() + } catch { + return null + } + if (!info.Running || Date.now() >= deadline) return info + await new Promise((r) => { + setTimeout(r, intervalMs) + }) + } +} + +async function runOneOff( + container: DockerodeContainer, + cmd: ReadonlyArray +): Promise { + const ex = await container.exec({ + Cmd: cmd, + AttachStdout: true, + AttachStderr: true, + Tty: false, + }) + const stream = await ex.start({ hijack: true, stdin: false }) + const stdout = new PassThrough() + const stderr = new PassThrough() + const stdoutChunks: Array = [] + const stderrChunks: Array = [] + stdout.on(`data`, (b: Buffer) => stdoutChunks.push(b)) + stderr.on(`data`, (b: Buffer) => stderrChunks.push(b)) + // dockerode demuxes the framed Docker stream into stdout/stderr. + await new Promise((resolve) => { + const containerAny = container as unknown as { + modem: { + demuxStream: ( + s: NodeJS.ReadableStream, + o: NodeJS.WritableStream, + e: NodeJS.WritableStream + ) => void + } + } + containerAny.modem.demuxStream(stream, stdout, stderr) + stream.on(`end`, () => resolve()) + stream.on(`close`, () => resolve()) + }) + const info = await reapExec(ex) + return { + exitCode: info?.ExitCode ?? null, + stdout: Buffer.concat(stdoutChunks), + stderr: Buffer.concat(stderrChunks), + } +} + +class DockerSandbox implements Sandbox { + readonly name: string + readonly workingDirectory: string + private container: DockerodeContainer + private policy: NetworkPolicy + private disposed = false + private containerName: string + /** + * Whether this lease owns the container. Only an owner's dispose can reclaim + * (wipe) it; an attacher's dispose merely releases its refcount. The + * persistent/idle intent lives on the shared registry entry, not here, so + * every lease agrees on the teardown action regardless of who disposes last. + */ + private isOwner: boolean + + constructor(deps: { + container: DockerodeContainer + containerCwd: string + policy: NetworkPolicy + runtime: `runc` | `runsc` + containerName: string + owner: boolean + }) { + this.container = deps.container + this.workingDirectory = deps.containerCwd + this.policy = deps.policy + this.name = `docker:${deps.runtime}` + this.containerName = deps.containerName + this.isOwner = deps.owner + } + + async exec(opts: SandboxExecOpts): Promise { + this.assertLive() + // Unique per-exec marker. Children of this exec inherit it via the + // environment, which lets a timeout/abort kill *only* this exec's process + // tree — see `killExecTree`. Crucial for shared containers, where sibling + // execs and background servers share the PID namespace. + const execId = randomUUID() + const env: Record = { + ...opts.env, + __SBX_EXEC_ID: execId, + } + const ex = await this.container.exec({ + Cmd: [`sh`, `-c`, opts.command], + WorkingDir: opts.cwd ?? this.workingDirectory, + AttachStdin: opts.stdin !== undefined, + AttachStdout: true, + AttachStderr: true, + Tty: false, + Env: Object.entries(env).map(([k, v]) => `${k}=${v}`), + }) + + const stream = (await ex.start({ + hijack: true, + stdin: opts.stdin !== undefined, + })) as NodeJS.ReadableStream & { end?: (data?: Buffer | string) => void } + if (opts.stdin !== undefined && stream.end) { + stream.end(opts.stdin) + } + + const stdout = new PassThrough() + const stderr = new PassThrough() + const stdoutChunks: Array = [] + const stderrChunks: Array = [] + let stdoutBytes = 0 + let stderrBytes = 0 + let truncated = false + const max = opts.maxOutputBytes ?? Number.POSITIVE_INFINITY + + const collect = + ( + target: Array, + getBytes: () => number, + setBytes: (n: number) => void + ) => + (chunk: Buffer) => { + const bytes = getBytes() + if (bytes >= max) { + truncated = true + return + } + const remaining = max - bytes + if (chunk.length > remaining) { + target.push(chunk.subarray(0, remaining)) + setBytes(bytes + remaining) + truncated = true + } else { + target.push(chunk) + setBytes(bytes + chunk.length) + } + } + + stdout.on( + `data`, + collect( + stdoutChunks, + () => stdoutBytes, + (n) => (stdoutBytes = n) + ) + ) + stderr.on( + `data`, + collect( + stderrChunks, + () => stderrBytes, + (n) => (stderrBytes = n) + ) + ) + + const containerAny = this.container as unknown as { + modem: { + demuxStream: ( + s: NodeJS.ReadableStream, + o: NodeJS.WritableStream, + e: NodeJS.WritableStream + ) => void + } + } + containerAny.modem.demuxStream(stream, stdout, stderr) + + let aborted = false + let timedOut = false + let inspected: { ExitCode: number | null; Running: boolean } | null = null + + const killExecTree = async () => { + // Kill only *this* exec's process tree. Every process in it inherited the + // unique `__SBX_EXEC_ID` we put in the exec's environment, so we find the + // tree by scanning each /proc//environ for the marker — leaving PID + // 1, background servers, and sibling execs (different marker) untouched. + // That's what makes a shared, multi-tenant container safe. We tag-and-find + // rather than kill by PID because dockerode reports the exec's host- + // namespace PID, which is meaningless inside the container. Killing the + // tree includes the exec's root `sh`, so the hijacked stream unblocks. + try { + await runOneOff(this.container, [ + `sh`, + `-c`, + // environ is NUL-separated, so translate NUL→newline before matching + // the marker as a whole line. + `for p in $(ls /proc 2>/dev/null | grep -E '^[0-9]+$'); do ` + + `[ "$p" = 1 ] && continue; ` + + `tr '\\0' '\\n' < /proc/$p/environ 2>/dev/null | ` + + `grep -qxF "__SBX_EXEC_ID=${execId}" && kill -KILL "$p" 2>/dev/null; ` + + `done`, + ]) + } catch { + /* container may already be gone */ + } + } + + let timer: NodeJS.Timeout | undefined + if (opts.timeoutMs !== undefined) { + timer = setTimeout(() => { + timedOut = true + void killExecTree() + }, opts.timeoutMs) + } + + const onAbort = () => { + aborted = true + void killExecTree() + } + if (opts.signal) { + if (opts.signal.aborted) onAbort() + else opts.signal.addEventListener(`abort`, onAbort, { once: true }) + } + const clearAbort = () => { + if (opts.signal) opts.signal.removeEventListener(`abort`, onAbort) + } + + // Race the natural stream close against a hard cutoff a few seconds + // past the kill — dockerode occasionally leaks the connection. + await new Promise((resolve) => { + let settled = false + const settle = () => { + if (settled) return + settled = true + resolve() + } + stream.on(`end`, settle) + stream.on(`close`, settle) + if (opts.timeoutMs !== undefined) { + setTimeout(settle, opts.timeoutMs + 5000).unref() + } + if (opts.signal) { + const force = () => setTimeout(settle, 3000).unref() + if (opts.signal.aborted) force() + else opts.signal.addEventListener(`abort`, force, { once: true }) + } + }) + if (timer) clearTimeout(timer) + clearAbort() + // Poll until the exec is reaped so we don't read a transient null exit + // code for a command that actually exited (see `reapExec`). + inspected = await reapExec(ex) + return { + exitCode: inspected?.ExitCode ?? null, + signal: null, + stdout: Buffer.concat(stdoutChunks), + stderr: Buffer.concat(stderrChunks), + timedOut, + aborted, + outputTruncated: truncated, + } + } + + async readFile(path: string): Promise { + this.assertLive() + this.assertReadable(path) + try { + return await getFile(this.container, this.absolute(path)) + } catch (err) { + throw wrapFsError(err, `readFile`, path) + } + } + + async writeFile(path: string, content: Buffer | string): Promise { + this.assertLive() + this.assertWritable(path) + try { + await putFile(this.container, this.absolute(path), content) + } catch (err) { + throw wrapFsError(err, `writeFile`, path) + } + } + + async mkdir(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + this.assertWritable(path) + try { + await makeDir(this.container, this.absolute(path), opts) + } catch (err) { + throw wrapFsError(err, `mkdir`, path) + } + } + + async readdir(path: string): Promise> { + this.assertLive() + this.assertReadable(path) + try { + return await readDir( + (cmd) => runOneOff(this.container, cmd), + this.absolute(path) + ) + } catch (err) { + throw wrapFsError(err, `readdir`, path) + } + } + + async exists(path: string): Promise { + this.assertLive() + // Safe-probe semantics: false for missing AND policy-denied paths, + // matching native/unrestricted. We don't expose the policy boundary + // through this primitive. + if (!this.isReadable(path)) return false + try { + return await pathExists( + (cmd) => runOneOff(this.container, cmd), + this.absolute(path) + ) + } catch (err) { + throw wrapFsError(err, `exists`, path) + } + } + + async remove(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + this.assertWritable(path) + try { + await removePath( + (cmd) => runOneOff(this.container, cmd), + this.absolute(path), + opts + ) + } catch (err) { + throw wrapFsError(err, `remove`, path) + } + } + + async stat(path: string): Promise { + this.assertLive() + this.assertReadable(path) + try { + return await statPath( + (cmd) => runOneOff(this.container, cmd), + this.absolute(path) + ) + } catch (err) { + throw wrapFsError(err, `stat`, path) + } + } + + async fetch(input: string | URL, init?: RequestInit): Promise { + this.assertLive() + const url = typeof input === `string` ? new URL(input) : input + // Enforce the egress policy here, at the tool boundary on the host, + // *before* dispatching. Literal private / link-local / metadata IPs are + // always refused (SSRF guard); otherwise the host must pass the policy. + if (isPrivateOrLinkLocal(url.hostname)) { + throw new SandboxError( + `policy`, + `dockerSandbox: host "${url.hostname}" denied — private / link-local / metadata addresses are not permitted.` + ) + } + if (!hostAllowedByPolicy(this.policy, url.hostname)) { + throw new SandboxError( + `policy`, + `dockerSandbox: host "${url.hostname}" is not permitted by the sandbox network policy (mode: ${this.policy.mode}).` + ) + } + // The request is then issued directly from inside the container (no + // in-container proxy). This host-side check governs the fetch tool only; + // code run via exec has direct bridge egress unless the policy is + // deny-all (NetworkMode=none, no interface). + return fetchInSandbox((opts) => this.exec(opts), url, init) + } + + async dispose(opts?: { reclaim?: boolean }): Promise { + if (this.disposed) return + this.disposed = true + + // Release this lease. Each lease is its own wrapper but shares the registry + // entry's refcount, so teardown only happens once the LAST lease drains — + // a sibling wake, an inheriting subagent, or ongoing collaboration keeps it + // warm meanwhile. The teardown ACTION is owner-governed: + // - an OWNER disposing with `reclaim` (its entity went terminal) marks the + // entry so the container is WIPED — even if persistent — once leases hit + // zero (an attacher can never set this); + // - otherwise the entry's owner-set `persistent` decides STOP vs REMOVE. + const entry = sandboxContainers.get(this.containerName) + if (!entry) return + if (this.isOwner && opts?.reclaim) entry.reclaim = true + entry.refs = Math.max(0, entry.refs - 1) + if (entry.refs > 0) return + if (entry.idleTimer) clearTimeout(entry.idleTimer) + // Reclaim removes immediately (grace 0, still lock-serialized); otherwise + // the owner's persistent intent picks stop (preserve) vs remove (wipe). The + // owner gate is already folded into `entry.reclaim`, and an ephemeral + // container wipes on last-lease-drain regardless of the last holder. + const action: `stop` | `remove` = sandboxWipesOnDispose( + entry.reclaim ?? false, + entry.persistent + ) + ? `remove` + : `stop` + const grace = entry.reclaim ? 0 : entry.idleGraceMs + entry.idleTimer = scheduleIdleTeardown(this.containerName, grace, action) + } + + private absolute(path: string): string { + return absoluteSandboxPath(this.workingDirectory, path) + } + + private isReadable(path: string): boolean { + return isPathWithinSandbox(this.workingDirectory, path) + } + + private assertReadable(path: string): void { + if (!this.isReadable(path)) { + throw new SandboxError( + `policy`, + `dockerSandbox: read access to "${path}" is denied (outside working directory ${this.workingDirectory}).` + ) + } + } + + private assertWritable(path: string): void { + if (!isPathWithinSandbox(this.workingDirectory, path)) { + throw new SandboxError( + `policy`, + `dockerSandbox: write access to "${path}" is denied (outside working directory ${this.workingDirectory}).` + ) + } + } + + private assertLive(): void { + if (this.disposed) { + throw new SandboxError( + `runtime`, + `dockerSandbox: operation called after dispose().` + ) + } + } +} + +function wrapFsError(err: unknown, op: string, path: string): Error { + if (err instanceof SandboxError) return err + const e = err as NodeJS.ErrnoException + return new SandboxError( + `runtime`, + `dockerSandbox.${op}("${path}") failed: ${e.code ?? ``} ${e.message ?? String(err)}`.trim() + ) +} diff --git a/packages/agents-runtime/src/sandbox/docker/fs.ts b/packages/agents-runtime/src/sandbox/docker/fs.ts new file mode 100644 index 0000000000..9c64faeb9d --- /dev/null +++ b/packages/agents-runtime/src/sandbox/docker/fs.ts @@ -0,0 +1,375 @@ +import { basename, dirname, posix } from 'node:path' +import { Readable } from 'node:stream' +import type { DirEntry, FileStat } from '../types' +import type { DockerodeContainer } from './loader' + +/** + * Minimal in-memory tar writer for shipping single files / directories into + * a container via dockerode's `putArchive`. We do not depend on a tar npm + * library because (a) we only need the v7-ustar variant for our small + * payloads and (b) avoiding the dep keeps the package install graph slim + * for users who don't opt into docker. + */ + +const BLOCK = 512 + +function pad(buf: Buffer): Buffer { + const remainder = buf.length % BLOCK + if (remainder === 0) return buf + return Buffer.concat([buf, Buffer.alloc(BLOCK - remainder)]) +} + +function checksum(header: Buffer): number { + let sum = 0 + for (let i = 0; i < header.length; i++) sum += header[i] + return sum +} + +function writeOctal( + buf: Buffer, + offset: number, + len: number, + value: number +): void { + const str = value.toString(8).padStart(len - 1, `0`) + `\0` + buf.write(str, offset, len, `ascii`) +} + +function buildHeader(opts: { + name: string + size: number + mode: number + mtimeSec: number + typeflag: `0` | `5` +}): Buffer { + const header = Buffer.alloc(BLOCK) + // Fill checksum field with spaces while we compute the rest. + header.fill(0x20, 148, 156) + + const nameBuf = Buffer.from(opts.name, `utf-8`) + if (nameBuf.length > 100) { + throw new Error( + `dockerSandbox: file path "${opts.name}" exceeds the 100-byte tar limit. Split via mkdir + writeFile or use a shorter path.` + ) + } + header.set(nameBuf, 0) + writeOctal(header, 100, 8, opts.mode & 0o7777) + writeOctal(header, 108, 8, 0) // uid + writeOctal(header, 116, 8, 0) // gid + writeOctal(header, 124, 12, opts.size) + writeOctal(header, 136, 12, opts.mtimeSec) + header.write(opts.typeflag, 156, 1, `ascii`) + header.write(`ustar\0`, 257, 6, `ascii`) + header.write(`00`, 263, 2, `ascii`) + + const sum = checksum(header) + writeOctal(header, 148, 7, sum) + // Bytes 155+ remain zero (prefix). + + return header +} + +/** + * Build a tar stream containing a single file at the given POSIX path + * (path is interpreted relative to the archive root — dockerode's + * `putArchive` accepts a destination `path` that is prepended). + */ +function buildSingleFileTar(name: string, content: Buffer): Buffer { + const now = Math.floor(Date.now() / 1000) + const header = buildHeader({ + name, + size: content.length, + mode: 0o644, + mtimeSec: now, + typeflag: `0`, + }) + return Buffer.concat([ + header, + pad(content), + // Two zero blocks signal end-of-archive. + Buffer.alloc(BLOCK * 2), + ]) +} + +function buildSingleDirTar(name: string): Buffer { + const now = Math.floor(Date.now() / 1000) + const trailing = name.endsWith(`/`) ? name : `${name}/` + const header = buildHeader({ + name: trailing, + size: 0, + mode: 0o755, + mtimeSec: now, + typeflag: `5`, + }) + return Buffer.concat([header, Buffer.alloc(BLOCK * 2)]) +} + +/** + * Minimal tar reader: parses ustar headers and yields {name, type, content} + * records. Used for `getFile` (dockerode's `getArchive` returns a tar + * stream wrapping the requested path). + */ +async function readTarStream( + stream: NodeJS.ReadableStream +): Promise< + ReadonlyArray<{ name: string; type: `file` | `directory`; content: Buffer }> +> { + const chunks: Array = [] + for await (const chunk of stream as AsyncIterable) { + chunks.push(chunk) + } + const buf = Buffer.concat(chunks) + const out: Array<{ + name: string + type: `file` | `directory` + content: Buffer + }> = [] + let offset = 0 + while (offset + BLOCK <= buf.length) { + const header = buf.subarray(offset, offset + BLOCK) + if (header[0] === 0) { + // End-of-archive (two zero blocks). Stop scanning. + break + } + const rawName = header.subarray(0, 100) + const nul = rawName.indexOf(0) + const name = rawName + .subarray(0, nul === -1 ? rawName.length : nul) + .toString(`utf-8`) + const sizeField = header + .subarray(124, 124 + 12) + .toString(`ascii`) + .replace(/\0+$/, ``) + .trim() + const size = parseInt(sizeField, 8) || 0 + const typeflag = String.fromCharCode(header[156]) + offset += BLOCK + const content = buf.subarray(offset, offset + size) + offset += size + if (size % BLOCK !== 0) offset += BLOCK - (size % BLOCK) + out.push({ + name, + type: typeflag === `5` ? `directory` : `file`, + content: Buffer.from(content), + }) + } + return out +} + +/** + * Write `content` to `absolutePath` inside the container. The path's + * directory must already exist — we do not create parents implicitly. Use + * `makeDir` first if needed. + */ +export async function putFile( + container: DockerodeContainer, + absolutePath: string, + content: Buffer | string +): Promise { + const buf = Buffer.isBuffer(content) ? content : Buffer.from(content) + const parent = posix.dirname(absolutePath) + const name = posix.basename(absolutePath) + if (!name) { + throw new Error( + `dockerSandbox: cannot write to bare directory "${absolutePath}"` + ) + } + const tar = buildSingleFileTar(name, buf) + await container.putArchive(Readable.from(tar), { path: parent }) +} + +export async function getFile( + container: DockerodeContainer, + absolutePath: string +): Promise { + const stream = await container.getArchive({ path: absolutePath }) + const entries = await readTarStream(stream) + const wanted = basename(absolutePath) + const hit = + entries.find((e) => e.name === wanted || e.name === `${wanted}/`) ?? + entries.find((e) => e.type === `file`) + if (!hit) { + const err = new Error(`ENOENT: ${absolutePath}`) as NodeJS.ErrnoException + err.code = `ENOENT` + throw err + } + if (hit.type !== `file`) { + throw new Error(`dockerSandbox.readFile: "${absolutePath}" is not a file`) + } + return hit.content +} + +/** + * Idempotent recursive mkdir. We model dockerode `putArchive` of a 0-size + * dir entry, which creates the leaf only — to get recursion we issue one + * tar per missing component. + */ +export async function makeDir( + container: DockerodeContainer, + absolutePath: string, + opts?: { recursive?: boolean } +): Promise { + const components = absolutePath.split(`/`).filter(Boolean) + if (components.length === 0) return + // /a/b/c → ['/a', '/a/b', '/a/b/c'] + const tail = components[components.length - 1] + const parent = `/` + components.slice(0, -1).join(`/`) + if (opts?.recursive) { + for (let i = 1; i <= components.length; i++) { + const intermediateParent = `/` + components.slice(0, i - 1).join(`/`) + const tar = buildSingleDirTar(components[i - 1]) + await container.putArchive(Readable.from(tar), { + path: intermediateParent === `` ? `/` : intermediateParent, + }) + } + return + } + const tar = buildSingleDirTar(tail) + await container.putArchive(Readable.from(tar), { path: parent || `/` }) +} + +/** + * `find` based listing — POSIX-portable and avoids fragility with + * non-printable filenames by using `-print0`. Returns entries relative to + * `absolutePath` (no leading `./`). + */ +export async function readDir( + exec: (cmd: ReadonlyArray) => Promise<{ + exitCode: number | null + stdout: Buffer + stderr: Buffer + }>, + absolutePath: string +): Promise> { + // Three POSIX `find -type X` passes — works on both GNU find and + // BusyBox find (alpine). NUL-delimited output is filename-safe. + const quoted = shellQuote(absolutePath) + const r = await exec([ + `sh`, + `-c`, + `set -e +echo -n DIRS: +find ${quoted} -mindepth 1 -maxdepth 1 -type d -print0 2>/dev/null +echo -n FILES: +find ${quoted} -mindepth 1 -maxdepth 1 -type f -print0 2>/dev/null +echo -n LINKS: +find ${quoted} -mindepth 1 -maxdepth 1 -type l -print0 2>/dev/null`, + ]) + if (r.exitCode !== 0 && r.stdout.length === 0) { + const err = new Error( + r.stderr.toString(`utf-8`) || `readdir failed: ${absolutePath}` + ) as NodeJS.ErrnoException + err.code = `ENOENT` + throw err + } + const blob = r.stdout.toString(`utf-8`) + const segDirs = sliceBetween(blob, `DIRS:`, `FILES:`) + const segFiles = sliceBetween(blob, `FILES:`, `LINKS:`) + const segLinks = blob.slice(blob.indexOf(`LINKS:`) + 6) + const make = ( + segment: string, + type: DirEntry[`type`] + ): ReadonlyArray => + segment + .split(`\0`) + .filter((s) => s.length > 0) + .map((p) => ({ name: posix.basename(p), type })) + return [ + ...make(segDirs, `directory`), + ...make(segFiles, `file`), + ...make(segLinks, `symlink`), + ] +} + +function sliceBetween(s: string, start: string, end: string): string { + const i = s.indexOf(start) + if (i === -1) return `` + const startOff = i + start.length + const j = s.indexOf(end, startOff) + return s.slice(startOff, j === -1 ? undefined : j) +} + +export async function statPath( + exec: (cmd: ReadonlyArray) => Promise<{ + exitCode: number | null + stdout: Buffer + stderr: Buffer + }>, + absolutePath: string +): Promise { + const r = await exec([ + `sh`, + `-c`, + `(stat -c '%F|%s|%Y' ${shellQuote(absolutePath)} 2>/dev/null || stat -f '%HT|%z|%m' ${shellQuote(absolutePath)} 2>/dev/null)`, + ]) + const fields = r.stdout.toString(`utf-8`).trim().split(`|`) + if (r.exitCode !== 0 || fields.length !== 3) { + const err = new Error( + r.stderr.toString(`utf-8`) || `stat: no such file: ${absolutePath}` + ) as NodeJS.ErrnoException + err.code = `ENOENT` + throw err + } + const [kind, size, mtime] = fields + const lowerKind = (kind ?? ``).toLowerCase() + const type: FileStat[`type`] = lowerKind.includes(`directory`) + ? `directory` + : lowerKind.includes(`symbolic`) + ? `symlink` + : lowerKind.includes(`regular`) || lowerKind === `file` + ? `file` + : `other` + const mtimeNum = Number(mtime) + return { + type, + size: Number(size) || 0, + mtimeMs: Number.isFinite(mtimeNum) ? mtimeNum * 1000 : 0, + } +} + +export async function pathExists( + exec: (cmd: ReadonlyArray) => Promise<{ + exitCode: number | null + stdout: Buffer + stderr: Buffer + }>, + absolutePath: string +): Promise { + const r = await exec([`test`, `-e`, absolutePath]) + return r.exitCode === 0 +} + +export async function removePath( + exec: (cmd: ReadonlyArray) => Promise<{ + exitCode: number | null + stdout: Buffer + stderr: Buffer + }>, + absolutePath: string, + opts?: { recursive?: boolean } +): Promise { + const cmd = opts?.recursive + ? [`rm`, `-r`, absolutePath] + : [`rm`, absolutePath] + const r = await exec(cmd) + if (r.exitCode !== 0) { + const err = new Error( + r.stderr.toString(`utf-8`) || `remove failed: ${absolutePath}` + ) as NodeJS.ErrnoException + if (/No such file/i.test(r.stderr.toString(`utf-8`))) err.code = `ENOENT` + else if (/Permission denied/i.test(r.stderr.toString(`utf-8`))) + err.code = `EACCES` + else if ( + /Is a directory|directory not empty/i.test(r.stderr.toString(`utf-8`)) + ) + err.code = `EISDIR` + else err.code = `EIO` + throw err + } +} + +function shellQuote(arg: string): string { + return `'` + arg.replace(/'/g, `'\\''`) + `'` +} + +void dirname // keep imported for readability when extending diff --git a/packages/agents-runtime/src/sandbox/docker/loader.ts b/packages/agents-runtime/src/sandbox/docker/loader.ts new file mode 100644 index 0000000000..9245ff4eac --- /dev/null +++ b/packages/agents-runtime/src/sandbox/docker/loader.ts @@ -0,0 +1,171 @@ +import { SandboxError } from '../types' + +/** + * Strongly-typed surface of `dockerode` we depend on. We avoid importing the + * package type-side because it's an optional peer dependency and we don't + * want our consumers' typecheckers to fail when dockerode is absent. + */ +export interface Dockerode { + ping(): Promise + version(): Promise<{ ApiVersion?: string; Version?: string }> + createContainer(opts: DockerContainerCreateOpts): Promise + getContainer(id: string): DockerodeContainer + listContainers(opts?: { + all?: boolean + filters?: Record> + }): Promise }>> + pull(image: string, opts?: unknown): Promise + modem: { + followProgress( + stream: NodeJS.ReadableStream, + onFinished: (err: Error | null) => void, + onProgress?: (event: unknown) => void + ): void + demuxStream( + raw: NodeJS.ReadableStream, + stdout: NodeJS.WritableStream, + stderr: NodeJS.WritableStream + ): void + } +} + +export interface DockerContainerCreateOpts { + Image: string + Cmd?: ReadonlyArray + WorkingDir?: string + Env?: ReadonlyArray + Labels?: Record + ExposedPorts?: Record> + HostConfig: DockerHostConfig + Tty?: boolean +} + +export interface DockerHostConfig { + AutoRemove?: boolean + ReadonlyRootfs?: boolean + Tmpfs?: Record + CapDrop?: ReadonlyArray + CapAdd?: ReadonlyArray + SecurityOpt?: ReadonlyArray + Privileged?: boolean + PidsLimit?: number + Memory?: number + MemorySwap?: number + NanoCpus?: number + NetworkMode?: string + ExtraHosts?: ReadonlyArray + PortBindings?: Record< + string, + ReadonlyArray<{ HostIp?: string; HostPort?: string }> + > + Runtime?: string + Binds?: ReadonlyArray + Ulimits?: ReadonlyArray<{ Name: string; Soft: number; Hard: number }> + IpcMode?: string +} + +export interface DockerodeContainer { + readonly id: string + start(): Promise + stop(opts?: { t?: number }): Promise + kill(opts?: { signal?: string }): Promise + remove(opts?: { force?: boolean; v?: boolean }): Promise + inspect(): Promise + exec(opts: { + Cmd: ReadonlyArray + WorkingDir?: string + Env?: ReadonlyArray + AttachStdin?: boolean + AttachStdout?: boolean + AttachStderr?: boolean + Tty?: boolean + User?: string + }): Promise + getArchive(opts: { path: string }): Promise + putArchive( + tarStream: NodeJS.ReadableStream | Buffer, + opts: { path: string } + ): Promise +} + +export interface DockerodeExec { + readonly id: string + start(opts: { + hijack?: boolean + stdin?: boolean + Tty?: boolean + }): Promise< + NodeJS.ReadableStream & { end?: (data?: Buffer | string) => void } + > + inspect(): Promise<{ ExitCode: number | null; Pid: number; Running: boolean }> +} + +export interface DockerInspectResult { + Id: string + State: { Running: boolean; Pid: number } + NetworkSettings: { + Ports: Record< + string, + ReadonlyArray<{ HostIp: string; HostPort: string }> | null + > + } + Config?: { Image?: string } +} + +type DockerCtor = new (opts?: { + socketPath?: string + host?: string + port?: number + protocol?: string +}) => Dockerode + +let cachedAvailability: boolean | null = null + +export async function loadDockerode(): Promise { + try { + // `@vite-ignore` keeps Vite from resolving/bundling this optional + // runtime-only dep at build time (callers externalize it). + const mod = (await import(/* @vite-ignore */ `dockerode`)) as unknown as { + default?: DockerCtor + } + return (mod.default ?? (mod as unknown as DockerCtor)) as DockerCtor + } catch { + throw new SandboxError( + `unavailable`, + `dockerSandbox requires the "dockerode" package. Install it: pnpm add dockerode @types/dockerode` + ) + } +} + +/** + * Cheap probe used by tests and `chooseDefaultSandbox`-like helpers. Caches + * the first result to avoid repeated socket connections during a test run. + */ +export async function isDockerAvailable(): Promise { + if (cachedAvailability !== null) return cachedAvailability + try { + const Docker = await loadDockerode() + const d = new Docker() + await Promise.race([ + d.ping(), + new Promise((_, rej) => + setTimeout(() => rej(new Error(`docker ping timeout`)), 1000) + ), + ]) + cachedAvailability = true + } catch (err) { + cachedAvailability = false + // Surface why docker is unavailable (missing dockerode vs. daemon down). + console.warn( + `[sandbox] docker unavailable: ${ + err instanceof Error ? err.message : String(err) + }` + ) + } + return cachedAvailability +} + +/** For tests that need to flip the cache (e.g. simulating daemon-down). */ +export function _resetDockerAvailabilityCache(): void { + cachedAvailability = null +} diff --git a/packages/agents-runtime/src/sandbox/docker/net-policy.ts b/packages/agents-runtime/src/sandbox/docker/net-policy.ts new file mode 100644 index 0000000000..5d81a57396 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/docker/net-policy.ts @@ -0,0 +1,143 @@ +import type { NetworkPolicy } from '../types' + +/** + * Host-side egress decision for the docker sandbox's `fetch()`. The docker + * adapter has no in-container proxy: the request is issued directly from the + * container, and the allowlist is enforced here, at the tool boundary on the + * host, *before* the request is dispatched. + * + * This governs the `fetch` tool only — code run via `exec` has direct bridge + * egress when the policy is not deny-all. deny-all is hard-enforced separately + * by creating the container with `NetworkMode=none`. + */ +export function hostAllowedByPolicy( + policy: NetworkPolicy, + host: string +): boolean { + switch (policy.mode) { + case `allow-all`: + return true + case `deny-all`: + return false + case `allowlist`: + return policy.allow.some((pattern) => matchesHost(host, pattern)) + } +} + +/** Exact host, `localhost` loopback alias, or `*.suffix` wildcard. */ +export function matchesHost(host: string, pattern: string): boolean { + if (pattern === host) return true + if (pattern === `localhost` && (host === `127.0.0.1` || host === `::1`)) { + return true + } + if (pattern.startsWith(`*.`)) { + const suffix = pattern.slice(2) + return host === suffix || host.endsWith(`.` + suffix) + } + return false +} + +const toQuad = (n: number): string => + [(n >>> 24) & 255, (n >>> 16) & 255, (n >>> 8) & 255, n & 255].join(`.`) + +/** + * Parse the loose IPv4 forms libc's `inet_aton` (and therefore `getaddrinfo` + * on Linux/macOS) accepts, returning the canonical dotted quad — or `null` if + * `h` isn't a valid loose-IPv4 literal (a real hostname, >4 parts, an + * out-of-range or malformed part). Accepts 1–4 dot-separated parts, each + * decimal, octal (`0`-prefixed) or hex (`0x`-prefixed); a final part shorter + * than the address absorbs the remaining low-order bytes, so `127.1`, + * `127.0.1`, `0177.0.0.1` and `2130706433` all fold to `127.0.0.1`. These + * forms resolve to private space via the OS resolver but slip past a + * dotted-quad-only check, so they must be canonicalized before classification. + */ +function parseLooseIPv4(h: string): string | null { + const parts = h.split(`.`) + if (parts.length < 1 || parts.length > 4) return null + const nums: Array = [] + for (const part of parts) { + let n: number + if (/^0x[0-9a-f]+$/.test(part)) n = parseInt(part.slice(2), 16) + else if (/^0[0-7]+$/.test(part)) n = parseInt(part, 8) + else if (/^(?:0|[1-9]\d*)$/.test(part)) n = parseInt(part, 10) + else return null // non-numeric label, or malformed (e.g. `08`, `0x`) + if (!Number.isSafeInteger(n) || n < 0) return null + nums.push(n) + } + // inet_aton packing: every part but the last is a single byte; the last + // part fills all remaining low-order bytes (so its width grows as parts shrink). + let addr = 0 + for (let i = 0; i < nums.length - 1; i++) { + if (nums[i]! > 255) return null + addr += nums[i]! * 256 ** (3 - i) + } + const last = nums[nums.length - 1]! + if (last >= 256 ** (4 - (nums.length - 1))) return null + addr += last + if (addr > 0xffffffff) return null + return toQuad(addr >>> 0) +} + +/** + * Canonicalize a URL hostname for IP classification. Handles the encoded + * literal forms an SSRF attempt reaches for: strips IPv6 brackets (Node's + * `URL.hostname` keeps them, e.g. `[::1]`), unwraps `::ffff:`-mapped IPv4, and + * folds every `inet_aton`-accepted IPv4 form (dotted-quad, shorthand, + * whole-integer, octal, hex) to a dotted quad so none can slip past the + * dotted-quad checks below. + */ +function canonicalizeHost(host: string): string { + let h = host.trim().toLowerCase() + if (h.startsWith(`[`) && h.endsWith(`]`)) h = h.slice(1, -1) + // IPv4-mapped IPv6: ::ffff:a.b.c.d (or shorthand) or ::ffff:aabb:ccdd + const mapped = /^::ffff:(.+)$/.exec(h) + if (mapped) { + const tail = mapped[1]! + const quad = parseLooseIPv4(tail) + if (quad) return quad + const hex = /^([0-9a-f]{1,4}):([0-9a-f]{1,4})$/.exec(tail) + if (hex) + return toQuad( + ((parseInt(hex[1]!, 16) << 16) | parseInt(hex[2]!, 16)) >>> 0 + ) + } + // Any inet_aton-accepted IPv4 literal (dotted, shorthand, integer, octal, hex). + const quad = parseLooseIPv4(h) + if (quad) return quad + return h +} + +/** + * Refuse literal private / link-local / loopback / cloud-metadata IPs + * regardless of the allowlist — the most common LLM-attempted SSRF exfil + * pattern. Encoded literal forms (integer/hex IPv4, `::ffff:`-mapped, bracketed + * IPv6) are canonicalized first so they can't bypass the checks. DNS names that + * *resolve* to private space, and redirects to a private host, are NOT caught + * here (a known gap: closing it would require resolving on the host and pinning + * the resolved IP per hop, which we don't do); this guard denies direct + * literal-IP egress. + */ +export function isPrivateOrLinkLocal(rawHost: string): boolean { + const host = canonicalizeHost(rawHost) + // IPv4 + const v4 = /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/.exec(host) + if (v4) { + const [, a, b] = v4.map(Number) as unknown as [unknown, number, number] + if (a === 10) return true + if (a === 127) return true // loopback + if (a === 169 && b === 254) return true // link-local + AWS/GCP metadata + if (a === 172 && b >= 16 && b <= 31) return true + if (a === 192 && b === 168) return true + if (a === 0) return true // unspecified + if (a === 100 && b >= 64 && b <= 127) return true // CGNAT + return false + } + // IPv6 literal (very small allowlist of dangerous ranges). Every check + // requires the colon so it only ever matches an actual IPv6 literal — a DNS + // hostname reaching here (e.g. `fc2.com`, `fda.gov`) has no colon and stays + // public. `fc00::/7` (ULA) is the first hextet starting `fc`/`fd`. + const lower = host.toLowerCase() + if (lower === `::1` || lower.startsWith(`fe80:`)) return true + if (/^f[cd][0-9a-f]{0,2}:/.test(lower)) return true + return false +} diff --git a/packages/agents-runtime/src/sandbox/exec-fetch.ts b/packages/agents-runtime/src/sandbox/exec-fetch.ts new file mode 100644 index 0000000000..23621a7274 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/exec-fetch.ts @@ -0,0 +1,133 @@ +import { SandboxError } from './types' +import type { SandboxExecOpts, SandboxExecResult } from './types' + +type ExecFn = (opts: SandboxExecOpts) => Promise + +const DEFAULT_TIMEOUT_MS = 10_000 +/** Cap the in-sandbox response body so a huge page can't blow up exec stdout. */ +const DEFAULT_MAX_BODY_BYTES = 5_000_000 +const DEFAULT_USER_AGENT = `Mozilla/5.0 (compatible; DurableStreamsAgent/1.0)` +const DEFAULT_ACCEPT = `text/html,application/xhtml+xml,text/plain,*/*` + +/** + * The POSIX `sh` program that performs the request *inside* the sandbox. It + * auto-detects an HTTP client (curl → node → wget) and emits, on stdout: + * + * line 1: `\t` (content_type may be empty) + * rest: base64 of the response body (line-wrapping is tolerated) + * + * All request inputs arrive via environment variables (FETCH_URL, FETCH_UA, + * FETCH_ACCEPT, FETCH_TIMEOUT, FETCH_MAXBYTES) so nothing is interpolated into + * the command string — there is no shell-injection surface from the URL or + * headers. A missing client prints the sentinel `NOCLIENT`; a failed request + * prints status `000`. + * + * Only User-Agent + Accept headers and `redirect: follow` are forwarded — + * that is the full surface `fetch_url` uses. Request bodies and other methods + * are out of scope (documented limitation). + */ +const FETCH_SCRIPT = ` +if command -v curl >/dev/null 2>&1; then + f="$(mktemp 2>/dev/null || echo /tmp/efetch.$$)" + if meta="$(curl -sS -L --max-time "$FETCH_TIMEOUT" --max-filesize "$FETCH_MAXBYTES" -A "$FETCH_UA" -H "Accept: $FETCH_ACCEPT" -o "$f" -w '%{http_code}\\t%{content_type}' "$FETCH_URL")"; then + printf '%s\\n' "$meta" + base64 "$f" 2>/dev/null + else + printf '000\\t\\n' + fi + rm -f "$f" +elif command -v node >/dev/null 2>&1; then + node -e 'const u=process.env.FETCH_URL;const to=(Number(process.env.FETCH_TIMEOUT)||10)*1000;const mb=Number(process.env.FETCH_MAXBYTES)||5000000;const c=new AbortController();const t=setTimeout(function(){c.abort();},to);fetch(u,{redirect:"follow",signal:c.signal,headers:{"User-Agent":process.env.FETCH_UA,"Accept":process.env.FETCH_ACCEPT}}).then(function(r){return r.arrayBuffer().then(function(a){clearTimeout(t);var b=Buffer.from(a);if(b.length>mb)b=b.subarray(0,mb);process.stdout.write(String(r.status)+"\\t"+(r.headers.get("content-type")||"")+"\\n");process.stdout.write(b.toString("base64"));});}).catch(function(){clearTimeout(t);process.stdout.write("000\\t\\n");});' +elif command -v wget >/dev/null 2>&1; then + f="$(mktemp 2>/dev/null || echo /tmp/efetch.$$)" + if wget -q -T "$FETCH_TIMEOUT" -U "$FETCH_UA" --header "Accept: $FETCH_ACCEPT" -O "$f" "$FETCH_URL"; then + printf '200\\t\\n' + else + printf '000\\t\\n' + fi + base64 "$f" 2>/dev/null + rm -f "$f" +else + printf 'NOCLIENT\\n' +fi +` + +/** + * Perform an HTTP request from *inside* a sandbox by running an in-sandbox + * HTTP client over `exec`, and return a synthesized `Response`. This is how + * isolated providers (docker, e2b) implement `Sandbox.fetch()` so that the + * request egresses through the sandbox's network — and is therefore governed + * by the sandbox's network policy — rather than from the host process. + */ +export async function fetchInSandbox( + exec: ExecFn, + input: string | URL, + init?: RequestInit, + opts: { timeoutMs?: number; maxBodyBytes?: number } = {} +): Promise { + const url = typeof input === `string` ? input : input.toString() + const headers = new Headers(init?.headers) + const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS + const maxBodyBytes = opts.maxBodyBytes ?? DEFAULT_MAX_BODY_BYTES + + const result = await exec({ + command: FETCH_SCRIPT, + env: { + FETCH_URL: url, + FETCH_UA: headers.get(`user-agent`) ?? DEFAULT_USER_AGENT, + FETCH_ACCEPT: headers.get(`accept`) ?? DEFAULT_ACCEPT, + FETCH_TIMEOUT: String(Math.ceil(timeoutMs / 1000)), + FETCH_MAXBYTES: String(maxBodyBytes), + }, + // Let the in-sandbox client's own timeout fire first; this is the + // backstop if the client wedges. Forward the caller's abort signal so a + // host-side cancellation tears the exec down too. + timeoutMs: timeoutMs + 5_000, + signal: init?.signal ?? undefined, + // base64 inflates the body ~4/3; leave headroom for that + the meta line. + maxOutputBytes: Math.ceil(maxBodyBytes * (4 / 3)) + 4_096, + }) + + const stdout = result.stdout.toString(`utf8`) + const nlIdx = stdout.indexOf(`\n`) + const metaLine = (nlIdx === -1 ? stdout : stdout.slice(0, nlIdx)).replace( + /\r$/, + `` + ) + + if (metaLine === `NOCLIENT`) { + throw new SandboxError( + `runtime`, + `fetchInSandbox: no HTTP client (curl/node/wget) found in the sandbox image — cannot fetch "${url}"` + ) + } + + const tab = metaLine.indexOf(`\t`) + const statusStr = tab === -1 ? metaLine : metaLine.slice(0, tab) + const contentType = tab === -1 ? `` : metaLine.slice(tab + 1).trim() + const status = Number(statusStr) + + if (!Number.isFinite(status) || status === 0) { + // `000` (or unparseable): no HTTP response was received. From inside the + // sandbox a policy-blocked host is indistinguishable from an unreachable + // one, so this surfaces as a runtime failure mentioning both. + throw new SandboxError( + `runtime`, + `fetchInSandbox: request to "${url}" produced no response — the host is unreachable or blocked by the sandbox's network policy` + ) + } + if (status < 200 || status > 599) { + throw new SandboxError( + `runtime`, + `fetchInSandbox: in-sandbox client returned an invalid HTTP status (${statusStr}) for "${url}"` + ) + } + + const b64 = (nlIdx === -1 ? `` : stdout.slice(nlIdx + 1)).replace(/\s+/g, ``) + const body = Buffer.from(b64, `base64`) + + return new Response(body, { + status, + headers: contentType ? { 'content-type': contentType } : undefined, + }) +} diff --git a/packages/agents-runtime/src/sandbox/identity.ts b/packages/agents-runtime/src/sandbox/identity.ts new file mode 100644 index 0000000000..cd3eaedba9 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/identity.ts @@ -0,0 +1,99 @@ +/** + * Pure resolution of a sandbox's lifecycle knobs from the per-entity sandbox + * config plus the live wake. Kept free of any provider or IO so it's trivially + * unit-testable and reusable by both the wake path (process-wake) and the spawn + * `inherit` path. + * + * Three orthogonal facts come out of this: + * + * - KEY SCOPE (identity): an explicit `key` (set directly or adopted via + * `inherit`) is a cross-entity rendezvous handle and always wins. Absent a + * key, `scope` derives one: `'wake'` ⇒ `${entityUrl}#${wakeId}` (full + * per-wake isolation), `'entity'` (the default) ⇒ `entityUrl` (a stable + * per-entity workspace shared across that entity's wakes). + * + * - DURABILITY (`persistent`): drives the *owner's* idle-teardown action — + * `true` preserves the sandbox (stop / suspend) for later reattach, `false` + * wipes it (remove / kill). When unset it defaults by scope: a per-wake + * sandbox is ephemeral, an explicit-key or per-entity one is persistent. + * + * - OWNERSHIP (`owner`): an owner CREATES the sandbox and its lifecycle governs + * teardown (idle ⇒ stop/remove per `persistent`; terminal ⇒ reclaim). A + * non-owner can only ATTACH to an already-live sandbox and never creates or + * tears down — so a subagent can't conjure a fresh, empty sandbox under a + * shared key. Defaults to `true`; `inherit` resolves to `owner: false`. + * + * "Full isolation" therefore comes purely from a unique per-wake key, never a + * separate code path — the provider only ever sees a resolved key, persistent, + * and owner flag. + */ +export interface SandboxSelectionConfig { + /** + * An explicit cross-entity key (set directly on the entity's sandbox config, + * or adopted from a parent via `inherit`). When present it is the identity + * verbatim and `scope` is ignored. + */ + key?: string + /** Per-wake or per-entity identity when no explicit `key` is set. */ + scope?: `entity` | `wake` + /** Idle-teardown durability. Defaults by scope (see module docs). */ + persistent?: boolean + /** + * Whether this entity OWNS the sandbox (create + attach + drive teardown) or + * only ATTACHES to an owner's sandbox. Defaults to `true`; an `inherit` spawn + * resolves to `false` upstream. + */ + owner?: boolean +} + +export interface ResolvedSandboxIdentity { + /** The key the provider uses to name / reattach the sandbox. */ + sandboxKey: string + /** Whether idle teardown preserves (true) or wipes (false) the sandbox. */ + persistent: boolean + /** Whether this entity owns the sandbox (may create) or only attaches. */ + owner: boolean +} + +/** + * Resolve the sandbox key, persistent, and owner flags for a wake. See module + * docs for the model. `wakeId` is only consulted for `scope: 'wake'`. + */ +export function resolveSandboxIdentity( + config: SandboxSelectionConfig, + wake: { entityUrl: string; wakeId: string } +): ResolvedSandboxIdentity { + const scope = config.scope ?? `entity` + const sandboxKey = + config.key ?? + (scope === `wake` ? `${wake.entityUrl}#${wake.wakeId}` : wake.entityUrl) + // Default durability: a per-wake sandbox is throwaway; an explicit-key or + // per-entity sandbox persists. An explicit config value always wins. + const defaultPersistent = config.key !== undefined ? true : scope !== `wake` + const persistent = config.persistent ?? defaultPersistent + // Ownership defaults to true; only an explicit `owner: false` (e.g. an + // `inherit` spawn) makes this entity a pure attacher. + const owner = config.owner ?? true + return { sandboxKey, persistent, owner } +} + +/** + * The teardown ACTION decision shared by the providers: a sandbox is WIPED + * (docker `remove` / remote `kill`) when its owning entity reclaimed it (went + * terminal) or it was ephemeral; otherwise it is PRESERVED (docker `stop` / + * remote `suspend`) for a later wake or collaborator to reattach. + * + * This is only the un-gated core. Owner-gating is applied by each provider + * AROUND this call, where it genuinely differs and must stay local: + * - remote gates the whole decision on ownership (`owner && wipes(...)`) so a + * non-owner attacher only suspends, never kills the owner's VM; + * - docker folds the owner gate into `reclaim` upstream and lets an ephemeral + * container wipe once the last lease drains regardless of the last holder + * (the refcounted registry guarantees teardown runs once). + */ +export function sandboxWipesOnDispose( + reclaim: boolean, + persistent: boolean +): boolean { + return reclaim || !persistent +} diff --git a/packages/agents-runtime/src/sandbox/path-containment.ts b/packages/agents-runtime/src/sandbox/path-containment.ts new file mode 100644 index 0000000000..1d74c1bea2 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/path-containment.ts @@ -0,0 +1,57 @@ +import { posix } from 'node:path' + +/** + * Path containment shared by the *isolated* providers (docker container, remote + * VM). Sandbox paths are always POSIX — they name locations inside the + * container / VM, independent of the host platform — so resolution is done with + * `posix` regardless of where the runtime runs. + * + * NOTE: this is a STRING-level containment check, not a symlink-resolving one; + * it relies on the container / VM boundary for actual isolation. The + * unrestricted provider, which shares the host filesystem, deliberately uses a + * stricter realpath/symlink walk instead (see `unrestricted.ts`) and must not + * be routed through here. + */ + +/** + * Assert the isolated provider's `workingDirectory` is an absolute POSIX path. + * Containment below names locations inside the container/VM, so a relative or + * non-POSIX working directory would silently `posix.resolve` against the host + * cwd in `absoluteSandboxPath` — a foot-gun. Call once at provider construction + * to fail loudly instead. + */ +export function assertAbsolutePosixWorkingDirectory( + workingDirectory: string +): void { + if (!posix.isAbsolute(workingDirectory)) { + throw new Error( + `sandbox workingDirectory must be an absolute POSIX path, got: ${JSON.stringify( + workingDirectory + )}` + ) + } +} + +/** Resolve a user-supplied `path` against `workingDirectory` to an absolute posix path. */ +export function absoluteSandboxPath( + workingDirectory: string, + path: string +): string { + return path.startsWith(`/`) ? path : posix.resolve(workingDirectory, path) +} + +/** + * Whether `path` resolves to a location inside `workingDirectory` — the + * containment boundary the isolated providers enforce on writes (and, for + * docker, reads). + */ +export function isPathWithinSandbox( + workingDirectory: string, + path: string +): boolean { + const rel = posix.relative( + workingDirectory, + absoluteSandboxPath(workingDirectory, path) + ) + return !rel.startsWith(`..`) && rel !== `..` +} diff --git a/packages/agents-runtime/src/sandbox/remote.ts b/packages/agents-runtime/src/sandbox/remote.ts new file mode 100644 index 0000000000..1b594b4fd4 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/remote.ts @@ -0,0 +1,318 @@ +import { + SandboxError, + type DirEntry, + type FileStat, + type NetworkPolicy, + type Sandbox, + type SandboxExecOpts, + type SandboxExecResult, +} from './types' +import { createE2BClient } from './remote/e2b' +import { fetchInSandbox } from './exec-fetch' +import { sandboxWipesOnDispose } from './identity' +import { + absoluteSandboxPath, + assertAbsolutePosixWorkingDirectory, + isPathWithinSandbox, +} from './path-containment' +import type { RemoteSandboxClient } from './remote/types' + +export type RemoteProvider = `e2b` + +export interface RemoteSandboxOpts { + provider: RemoteProvider + /** Path inside the remote workspace; default `/work`. */ + workingDirectory?: string + /** Provider-specific API key (or read from env via the SDK). */ + apiKey?: string + /** Provider-specific workspace template name/id. */ + template?: string + /** + * Stable identity used to reattach to the workspace. The adapter derives a + * provider-side handle from this (e.g. e2b sandbox metadata) so a later wake + * — possibly on a different host — reconnects to the same VM while it's + * alive, regardless of `persistent`. Resolved upstream (per-entity URL, + * per-wake `url#wakeId`, or an explicit shared key). + */ + sandboxKey?: string + /** + * Idle-teardown durability. `true` ⇒ `dispose()` PRESERVES the workspace + * (suspend) so a later wake or collaborator reconnects by `sandboxKey` with + * state intact; `false` (default) ⇒ `dispose()` KILLS it (wiped). Orthogonal + * to identity — a private (per-entity/per-wake) workspace may be persistent. + */ + persistent?: boolean + /** + * Ownership of the keyed workspace. `true` (default) ⇒ OWNER: create the VM + * if absent and let this lease's lifecycle govern teardown. `false` ⇒ + * ATTACHER: reconnect to an already-live VM for this `sandboxKey` and reject + * with `SandboxError('unavailable')` if none exists (never create a fresh, + * empty one); `dispose()` only detaches and never kills the owner's VM. + */ + owner?: boolean + /** + * The provider timeout window (ms). While a wake holds the sandbox the + * adapter heartbeats to keep it within this window; once the wake ends a + * persistent workspace auto-suspends this long after the last heartbeat + * (state preserved for reattach). Kept short to bound the trailing idle cost. + * Provider default applies when omitted. + */ + keepAliveMs?: number + /** + * Hostname allowlist for outbound egress from the workspace. Applied to the + * provider VM at creation (e.g. e2b `network.allowOut`). Default: deny + * everything. + * + * @deprecated prefer `initialNetworkPolicy`. When both are provided + * `initialNetworkPolicy` wins. + */ + allowedHosts?: ReadonlyArray + initialNetworkPolicy?: NetworkPolicy + /** + * Pre-constructed client. Bypasses provider SDK loading — used by tests + * and by customers who want to construct the provider client themselves + * (e.g. with custom retry/observability wrappers). + */ + client?: RemoteSandboxClient +} + +/** + * Creates a Sandbox backed by a remote workspace (microVM or container) at a + * SaaS provider. The working directory lives inside the provider's VM; FS + * methods round-trip to the provider over its SDK. Cost: one network RTT + * per call. Use per-wake, not per `useAgent`. + * + * `sandbox.fetch()` runs *inside* the VM (via an in-sandbox HTTP client over + * `exec`), so outbound requests egress from the workspace and are governed by + * the network policy applied to the VM at creation — not from the host + * process. The policy is declared up front; it cannot be changed mid-session. + */ +export async function remoteSandbox(opts: RemoteSandboxOpts): Promise { + const workingDirectory = opts.workingDirectory ?? `/work` + // Names a location inside the provider VM, so it must be absolute POSIX — + // a relative value would silently join against the host cwd in containment. + assertAbsolutePosixWorkingDirectory(workingDirectory) + const persistent = opts.persistent === true + const owner = opts.owner !== false + const initialPolicy: NetworkPolicy = + opts.initialNetworkPolicy ?? + (opts.allowedHosts && opts.allowedHosts.length > 0 + ? { mode: `allowlist`, allow: [...opts.allowedHosts] } + : { mode: `deny-all` }) + // A caller-supplied `client` (tests, custom wrappers) is responsible for its + // own egress config; the policy is applied by the provider adapter we load. + const client = + opts.client ?? (await loadClient(opts, workingDirectory, initialPolicy)) + return new RemoteSandbox( + `remote:${opts.provider}`, + workingDirectory, + client, + persistent, + owner + ) +} + +async function loadClient( + opts: RemoteSandboxOpts, + workingDirectory: string, + initialPolicy: NetworkPolicy +): Promise { + switch (opts.provider) { + case `e2b`: + return createE2BClient({ + apiKey: opts.apiKey, + template: opts.template, + workingDirectory, + persistent: opts.persistent === true, + owner: opts.owner !== false, + sandboxKey: opts.sandboxKey, + // Undefined flows through to the adapter's own default. + keepAliveMs: opts.keepAliveMs, + initialNetworkPolicy: initialPolicy, + }) + default: + throw new SandboxError( + `unavailable`, + `remoteSandbox: unsupported provider "${String(opts.provider)}". Supported: 'e2b'.` + ) + } +} + +class RemoteSandbox implements Sandbox { + private disposed = false + + constructor( + readonly name: string, + readonly workingDirectory: string, + private readonly client: RemoteSandboxClient, + private readonly persistent: boolean, + private readonly owner: boolean + ) {} + + async exec(opts: SandboxExecOpts): Promise { + this.assertLive() + const r = await this.client.exec({ + command: opts.command, + cwd: opts.cwd ?? this.workingDirectory, + env: opts.env, + timeoutMs: opts.timeoutMs, + stdin: opts.stdin, + }) + const max = opts.maxOutputBytes ?? Number.POSITIVE_INFINITY + const stdout = r.stdout.length > max ? r.stdout.subarray(0, max) : r.stdout + const stderr = r.stderr.length > max ? r.stderr.subarray(0, max) : r.stderr + const outputTruncated = r.stdout.length > max || r.stderr.length > max + return { + exitCode: r.exitCode, + signal: r.signal ?? null, + stdout, + stderr, + timedOut: r.timedOut ?? false, + // Remote providers don't yet propagate caller-side aborts into the + // VM; the field exists for interface conformance and will become + // meaningful once the client contract supports forwarding signals. + aborted: false, + outputTruncated, + } + } + + async readFile(path: string): Promise { + this.assertLive() + this.assertReadable(path) + return this.client.readFile(this.absolute(path)) + } + + async writeFile(path: string, content: Buffer | string): Promise { + this.assertLive() + this.assertWritable(path) + await this.client.writeFile(this.absolute(path), content) + } + + async mkdir(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + this.assertWritable(path) + if (opts?.recursive) { + await this.makeDirRecursive(this.absolute(path)) + } else { + await this.client.mkdir(this.absolute(path)) + } + } + + async readdir(path: string): Promise> { + this.assertLive() + try { + return await this.client.readdir(this.absolute(path)) + } catch (err) { + throw wrapFsError(err, `readdir`, path) + } + } + + async exists(path: string): Promise { + this.assertLive() + try { + return await this.client.exists(this.absolute(path)) + } catch (err) { + throw wrapFsError(err, `exists`, path) + } + } + + async remove(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + this.assertWritable(path) + try { + await this.client.remove(this.absolute(path), opts) + } catch (err) { + throw wrapFsError(err, `remove`, path) + } + } + + async stat(path: string): Promise { + this.assertLive() + try { + return await this.client.stat(this.absolute(path)) + } catch (err) { + throw wrapFsError(err, `stat`, path) + } + } + + async fetch(input: string | URL, init?: RequestInit): Promise { + this.assertLive() + // Run the request inside the VM. Egress is enforced by the provider's + // own network controls, configured from the policy at creation; a denied + // host surfaces as a failed request from the in-sandbox client. + return fetchInSandbox((opts) => this.exec(opts), input, init) + } + + async dispose(opts?: { reclaim?: boolean }): Promise { + if (this.disposed) return + this.disposed = true + // The VM is WIPED (kill) only when an OWNER releases it AND there's nothing + // to preserve — i.e. its entity went terminal (`reclaim`) or it's ephemeral. + // Otherwise we detach via suspend(): an owner of a persistent workspace + // hands lifecycle back to the provider (heartbeat stops → auto-suspend, + // state preserved for reattach), and a non-owner attacher merely stops its + // own heartbeat without ever killing the owner's VM. A client without + // suspend() falls back to kill(). + const wipe = + this.owner && + sandboxWipesOnDispose(opts?.reclaim === true, this.persistent) + if (!wipe && this.client.suspend) { + await this.client.suspend() + } else { + await this.client.kill() + } + } + + private absolute(path: string): string { + return absoluteSandboxPath(this.workingDirectory, path) + } + + private assertReadable(path: string): void { + // Reads outside the working directory are allowed (system binaries, + // language stdlibs etc. live elsewhere in the VM). The remote workspace + // is already isolated from the host filesystem; no extra TS gate needed. + void path + } + + private assertWritable(path: string): void { + if (!isPathWithinSandbox(this.workingDirectory, path)) { + throw new SandboxError( + `policy`, + `remoteSandbox: write access to "${path}" is denied (outside working directory ${this.workingDirectory})` + ) + } + } + + private async makeDirRecursive(path: string): Promise { + // Walk parents shallowest-first so each mkdir succeeds. The provider's + // own mkdir typically fails on missing parents. + const parts = path.split(`/`).filter(Boolean) + let prefix = path.startsWith(`/`) ? `/` : `` + for (let i = 0; i < parts.length; i++) { + prefix = prefix + (prefix.endsWith(`/`) ? `` : `/`) + parts[i] + try { + await this.client.mkdir(prefix) + } catch { + // Path may already exist — ignore. + } + } + } + + private assertLive(): void { + if (this.disposed) { + throw new SandboxError( + `runtime`, + `remoteSandbox: operation called after dispose()` + ) + } + } +} + +function wrapFsError(err: unknown, op: string, path: string): Error { + if (err instanceof SandboxError) return err + const e = err as NodeJS.ErrnoException + return new SandboxError( + `runtime`, + `remoteSandbox.${op}("${path}") failed: ${e.code ?? ``} ${e.message ?? String(err)}`.trim() + ) +} diff --git a/packages/agents-runtime/src/sandbox/remote/e2b.ts b/packages/agents-runtime/src/sandbox/remote/e2b.ts new file mode 100644 index 0000000000..df611c2e1a --- /dev/null +++ b/packages/agents-runtime/src/sandbox/remote/e2b.ts @@ -0,0 +1,517 @@ +import { createHash } from 'node:crypto' +import { SandboxError } from '../types' +import type { FileStat, NetworkPolicy } from '../types' +import type { RemoteSandboxClient } from './types' + +interface E2BCommandsRun { + stdout: string + stderr: string + exitCode: number | null +} + +interface E2BFileEntry { + name: string + type?: `file` | `dir` + path?: string +} + +interface E2BFileInfo { + name?: string + type?: `file` | `dir` + size?: number + modifiedTime?: string | Date +} + +interface E2BSandboxInstance { + /** Provider-assigned id; the handle used to reconnect from another host. */ + sandboxId: string + commands: { + run( + cmd: string, + opts?: { cwd?: string; envs?: Record; timeoutMs?: number } + ): Promise + } + files: { + read( + path: string, + opts?: { format?: `bytes` | `text` } + ): Promise + write(path: string, content: string | Uint8Array): Promise + makeDir(path: string): Promise + list?(path: string): Promise> + exists?(path: string): Promise + remove?(path: string): Promise + getInfo?(path: string): Promise + } + /** + * Reset the (absolute) keep-alive countdown to `timeoutMs` from now. e2b's + * timeout is not idle-based and is not refreshed by activity, so a heartbeat + * calls this to keep the VM alive while a wake holds it. + */ + setTimeout(timeoutMs: number): Promise + kill(): Promise +} + +/** Lifecycle policy for a created sandbox (e2b SandboxOpts.lifecycle). */ +interface E2BLifecycle { + onTimeout: `pause` | `kill` + autoResume?: boolean +} + +interface E2BCreateOpts { + apiKey?: string + metadata?: Record + timeoutMs?: number + lifecycle?: E2BLifecycle + /** + * Egress: `false` blocks all outbound traffic (≡ `network.denyOut` of + * `0.0.0.0/0`). Maps from our deny-all policy. + */ + allowInternetAccess?: boolean + /** + * Outbound allow/deny lists (hostnames, IPs, CIDRs). When `allowOut` is set, + * only those destinations are reachable. Maps from our allowlist policy. + */ + network?: { allowOut?: Array; denyOut?: Array } +} + +/** The subset of e2b create opts that encode an egress policy. */ +type E2BNetworkCreateOpts = Pick< + E2BCreateOpts, + `allowInternetAccess` | `network` +> + +/** + * Translate our provider-neutral {@link NetworkPolicy} into e2b's create-time + * egress options. e2b enforces these at the VM boundary, so a policy declared + * here governs the workspace's *own* outbound traffic (including + * `sandbox.fetch()`, which runs inside the VM). `allow-all` leaves the e2b + * default (internet enabled); `deny-all` disables internet; `allowlist` pins + * `network.allowOut` (e2b additionally auto-allows DNS, and filters by Host + * header on :80 / SNI on :443). + */ +export function e2bNetworkCreateOpts( + policy: NetworkPolicy +): E2BNetworkCreateOpts { + switch (policy.mode) { + case `allow-all`: + return { allowInternetAccess: true } + case `deny-all`: + return { allowInternetAccess: false } + case `allowlist`: + return { network: { allowOut: [...policy.allow] } } + } +} + +/** Subset of e2b `SandboxInfo` we consult when reattaching by key. */ +interface E2BSandboxInfo { + sandboxId: string + metadata?: Record + state?: string + startedAt?: Date +} + +/** Subset of the e2b `Sandbox` class statics we depend on. */ +export interface E2BSandboxClass { + create(opts?: E2BCreateOpts): Promise + create(template: string, opts?: E2BCreateOpts): Promise + connect( + sandboxId: string, + opts?: { apiKey?: string; timeoutMs?: number } + ): Promise + list(opts?: { + query?: { metadata?: Record; state?: Array } + }): { nextItems(): Promise> } +} + +/** Metadata key that tags a sandbox with its reuse identity. */ +const SANDBOX_KEY_METADATA = `electric.sandbox.key` +/** + * The e2b timeout window. Kept short: a heartbeat refreshes it while a wake is + * active, and once the wake ends the platform reaps the VM this long after the + * last refresh — pausing a persistent VM (onTimeout:'pause', state preserved + * for reattach) or killing an ephemeral one (onTimeout:'kill'). A short window + * means a small trailing idle-compute tail. + */ +const DEFAULT_KEEP_ALIVE_MS = 2 * 60 * 1000 +/** Refresh well inside the window so an event-loop hiccup can't lapse it. */ +function heartbeatIntervalFor(keepAliveMs: number): number { + return Math.max(15_000, Math.floor(keepAliveMs / 2)) +} + +/** + * True iff the optional `e2b` peer dependency is installed. Mirrors + * `isDockerAvailable()` so a runtime never advertises an e2b profile whose + * factory would throw at wake. (Resolved in this package's context, where the + * peer dep lives, rather than the embedder's.) + */ +export async function isE2BAvailable(): Promise { + try { + await import(`e2b`) + return true + } catch { + return false + } +} + +/** + * Wraps an e2b Sandbox instance behind the provider-neutral + * RemoteSandboxClient interface. The e2b SDK is loaded dynamically so it + * remains an optional peer dependency — installing agents-runtime does not + * pull in e2b unless the customer wants the remote provider. + * + * The workspace is always tagged with `sandboxKey` and reattachable: a later + * wake (possibly on a different host) reconnects to the same VM while it's + * alive. `persistent` only changes idle reaping — the VM auto-pauses + * (onTimeout:'pause', state preserved for reattach) when persistent, else it's + * killed (onTimeout:'kill'). The adapter heartbeats `setTimeout` while the wake + * is active (keeping the VM alive regardless of durability) and stops on + * dispose; `RemoteSandbox.dispose` then suspends (persistent) or kills. + */ +export async function createE2BClient(opts: { + apiKey?: string + template?: string + workingDirectory: string + persistent?: boolean + owner?: boolean + sandboxKey?: string + keepAliveMs?: number + /** Egress policy applied to the VM at creation. Default: deny everything. */ + initialNetworkPolicy?: NetworkPolicy + /** Optional sink for diagnostics (e.g. swallowed keep-alive failures). */ + log?: (message: string) => void +}): Promise { + let mod: { Sandbox: E2BSandboxClass } + try { + // e2b is an optional peer dependency — resolved at runtime when the + // customer opts into the remote provider. + mod = (await import(`e2b`)) as unknown as typeof mod + } catch { + throw new Error( + `remoteSandbox({provider:'e2b'}) requires the "e2b" package. Install it: pnpm add e2b` + ) + } + const persistent = opts.persistent === true + const keepAliveMs = opts.keepAliveMs ?? DEFAULT_KEEP_ALIVE_MS + const network = e2bNetworkCreateOpts( + opts.initialNetworkPolicy ?? { mode: `deny-all` } + ) + const sbx = await connectOrCreateE2BSandbox(mod.Sandbox, { + apiKey: opts.apiKey, + template: opts.template, + persistent, + owner: opts.owner !== false, + sandboxKey: opts.sandboxKey, + keepAliveMs, + network, + }) + // Ensure the working directory exists in the VM. + await sbx.files.makeDir(opts.workingDirectory).catch(() => { + /* ignore — may already exist */ + }) + return adaptE2B(sbx, opts.workingDirectory, { keepAliveMs, log: opts.log }) +} + +/** + * Resolve the e2b sandbox to operate on. Reattach by key regardless of + * durability: we look up a running/paused sandbox tagged with the key and + * `connect()` (which auto-resumes a paused one). An OWNER that finds none + * creates one tagged with the key; an ATTACHER (`owner: false`) that finds none + * rejects with `SandboxError('unavailable')` rather than conjuring a fresh, + * empty VM under the shared key. `persistent` only sets the idle reaping — + * pause (state preserved, reattachable) vs kill (wiped). A keyless one-off is + * always an owner-style create. Exported (with the SDK class injected) so the + * reattach decision is unit-testable without a live e2b account. + */ +export async function connectOrCreateE2BSandbox( + Sandbox: E2BSandboxClass, + opts: { + apiKey?: string + template?: string + persistent: boolean + owner?: boolean + sandboxKey?: string + keepAliveMs: number + /** Egress policy create-opts; applied to every freshly created VM. */ + network?: E2BNetworkCreateOpts + } +): Promise { + const network = opts.network ?? {} + const owner = opts.owner !== false + // Idle reaping: a persistent VM pauses (preserves state for reattach), an + // ephemeral one is killed (wiped). Both stay reattachable while alive. + const lifecycle: E2BLifecycle = opts.persistent + ? { onTimeout: `pause`, autoResume: true } + : { onTimeout: `kill` } + + if (!opts.sandboxKey) { + const createOpts: E2BCreateOpts = { + apiKey: opts.apiKey, + timeoutMs: opts.keepAliveMs, + lifecycle, + ...network, + } + return opts.template + ? Sandbox.create(opts.template, createOpts) + : Sandbox.create(createOpts) + } + + const keyTag = sandboxKeyTag(opts.sandboxKey) + + const existing = await firstSandboxForKey(Sandbox, keyTag) + if (existing) { + // Reachable from any host: a wake delivered to a freshly cold-started + // host reconnects here. connect() auto-resumes a paused sandbox. Egress + // policy is fixed at creation, so a reattached VM keeps whatever policy + // its creator declared — we don't (and can't) re-apply it here. + return Sandbox.connect(existing.sandboxId, { + apiKey: opts.apiKey, + timeoutMs: opts.keepAliveMs, + }) + } + + if (!owner) { + throw new SandboxError( + `unavailable`, + `remoteSandbox: cannot attach — no owner workspace is live for this key. The owning entity must create it first.` + ) + } + + const createOpts: E2BCreateOpts = { + apiKey: opts.apiKey, + metadata: { [SANDBOX_KEY_METADATA]: keyTag }, + timeoutMs: opts.keepAliveMs, + lifecycle, + ...network, + } + return opts.template + ? Sandbox.create(opts.template, createOpts) + : Sandbox.create(createOpts) +} + +function sandboxKeyTag(sandboxKey: string): string { + return createHash(`sha256`).update(sandboxKey).digest(`hex`).slice(0, 32) +} + +async function firstSandboxForKey( + Sandbox: E2BSandboxClass, + keyTag: string +): Promise { + const page = await Sandbox.list({ + query: { + metadata: { [SANDBOX_KEY_METADATA]: keyTag }, + state: [`running`, `paused`], + }, + }).nextItems() + // If a cross-host create race produced duplicates, every host determinist- + // ically converges on the oldest; the stragglers idle out on their own. + return [...page].sort(compareBySandboxAge)[0] +} + +function compareBySandboxAge(a: E2BSandboxInfo, b: E2BSandboxInfo): number { + const at = a.startedAt ? a.startedAt.getTime() : 0 + const bt = b.startedAt ? b.startedAt.getTime() : 0 + if (at !== bt) return at - bt + return a.sandboxId < b.sandboxId ? -1 : a.sandboxId > b.sandboxId ? 1 : 0 +} + +export function adaptE2B( + sbx: E2BSandboxInstance, + defaultCwd: string, + opts?: { + keepAliveMs?: number + heartbeatIntervalMs?: number + log?: (message: string) => void + } +): RemoteSandboxClient { + // Refresh the absolute timeout while this wake holds the VM so a long-running + // wake isn't reaped out from under us — regardless of durability, since e2b's + // timeout is not idle-based. We deliberately never pause() or shorten the + // timeout here: a collaborator still heartbeating (possibly on another host) + // keeps the VM alive, and only once every holder stops does the platform reap + // it per its lifecycle (pause when persistent, kill when ephemeral). That + // makes the lifecycle refcount-free without any cross-host coordination. + const keepAliveMs = opts?.keepAliveMs ?? DEFAULT_KEEP_ALIVE_MS + const interval = + opts?.heartbeatIntervalMs ?? heartbeatIntervalFor(keepAliveMs) + let heartbeat: ReturnType | undefined = setInterval( + () => { + void sbx.setTimeout(keepAliveMs).catch((err: unknown) => { + // Usually benign: the VM was killed/paused elsewhere, so there's + // nothing to keep alive. But this also swallows SDK/network/auth + // failures, so leave a debug trail for an operator chasing a stuck + // reattach. Intentionally non-fatal — a failed keep-alive only means + // the VM may reap sooner, which the lifecycle already tolerates. + opts?.log?.( + `e2b keep-alive refresh failed: ${err instanceof Error ? err.message : String(err)}` + ) + }) + }, + interval + ) + // Don't let the keep-alive timer hold the process open. + heartbeat.unref?.() + const stopHeartbeat = (): void => { + if (heartbeat) { + clearInterval(heartbeat) + heartbeat = undefined + } + } + return { + async exec(opts) { + const r = await sbx.commands.run(opts.command, { + cwd: opts.cwd ?? defaultCwd, + envs: opts.env, + timeoutMs: opts.timeoutMs, + }) + return { + stdout: Buffer.from(r.stdout ?? ``), + stderr: Buffer.from(r.stderr ?? ``), + exitCode: r.exitCode, + } + }, + async readFile(path) { + const out = await sbx.files.read(path, { format: `bytes` }) + return Buffer.isBuffer(out) ? out : Buffer.from(out as Uint8Array) + }, + async writeFile(path, content) { + await sbx.files.write(path, content) + }, + async mkdir(path) { + await sbx.files.makeDir(path) + }, + async readdir(path) { + if (sbx.files.list) { + const entries = await sbx.files.list(path) + return entries.map((e) => ({ + name: e.name, + type: e.type === `dir` ? (`directory` as const) : (`file` as const), + })) + } + // Fallback via `find -print0` (NUL-delimited, newline-safe). The + // `%y` printf code reports d/f/l so we can populate `type` correctly + // including symlinks. BusyBox `find` lacks `-printf`; in that case we + // re-run with a plainer command and lose symlink fidelity. + const r = await sbx.commands.run( + `find ${shellQuote(path)} -mindepth 1 -maxdepth 1 -printf '%y\\t%f\\0' 2>/dev/null || find ${shellQuote(path)} -mindepth 1 -maxdepth 1 -printf '%f\\0'` + ) + if (r.exitCode !== 0) { + throwShellError(r.stderr, `readdir`, path) + } + const records = r.stdout.split(`\0`).filter((s) => s.length > 0) + return records.map((rec) => { + const tab = rec.indexOf(`\t`) + if (tab === -1) { + return { name: rec, type: `other` as const } + } + const kind = rec.slice(0, tab) + const name = rec.slice(tab + 1) + const type: `file` | `directory` | `symlink` | `other` = + kind === `d` + ? `directory` + : kind === `f` + ? `file` + : kind === `l` + ? `symlink` + : `other` + return { name, type } + }) + }, + async exists(path) { + if (sbx.files.exists) return sbx.files.exists(path) + const r = await sbx.commands.run(`test -e ${shellQuote(path)}`) + return r.exitCode === 0 + }, + async remove(path, opts) { + if (sbx.files.remove && !opts?.recursive) { + await sbx.files.remove(path) + return + } + // `-f` would swallow missing-path errors; we want the conformance + // contract of "remove of nonexistent throws". Use plain `rm` (or + // `rm -r` for recursive) and lift exit codes into typed errors. + const flag = opts?.recursive ? `-r` : `` + const r = await sbx.commands.run(`rm ${flag} ${shellQuote(path)}`.trim()) + if (r.exitCode !== 0) { + throwShellError(r.stderr, `remove`, path) + } + }, + async stat(path): Promise { + if (sbx.files.getInfo) { + const info = await sbx.files.getInfo(path) + return { + type: + info.type === `dir` + ? `directory` + : info.type === `file` + ? `file` + : `other`, + size: info.size ?? 0, + mtimeMs: info.modifiedTime + ? new Date(info.modifiedTime).getTime() + : 0, + } + } + // Fallback: run `stat` once and validate the output shape. GNU/BSD + // formats both produce three pipe-separated fields; we use `||` to + // try GNU first then BSD, with stderr suppression so the two attempts + // don't corrupt each other's output. + const r = await sbx.commands.run( + `(stat -c '%F|%s|%Y' ${shellQuote(path)} 2>/dev/null || stat -f '%HT|%z|%m' ${shellQuote(path)} 2>/dev/null)` + ) + const fields = r.stdout.trim().split(`|`) + if (r.exitCode !== 0 || fields.length !== 3) { + const err = new Error( + r.stderr || `stat: no such file or directory: ${path}` + ) as NodeJS.ErrnoException + err.code = `ENOENT` + throw err + } + const [kind, size, mtime] = fields + const lowerKind = (kind ?? ``).toLowerCase() + const type: FileStat[`type`] = lowerKind.includes(`directory`) + ? `directory` + : lowerKind.includes(`symbolic`) + ? `symlink` + : lowerKind.includes(`regular`) || lowerKind === `file` + ? `file` + : `other` + const mtimeNum = Number(mtime) + return { + type, + size: Number(size) || 0, + mtimeMs: Number.isFinite(mtimeNum) ? mtimeNum * 1000 : 0, + } + }, + async kill() { + stopHeartbeat() + await sbx.kill() + }, + async suspend() { + // Persistent workspace teardown: stop refreshing the keep-alive and let + // the platform take over. The VM auto-pauses ~keepAliveMs after our last + // heartbeat (onTimeout:'pause'), preserving filesystem + memory state + // for reattach, and is reaped after e2b's paused-retention window. We + // don't pause() or shorten the timeout here, so a collaborator still + // heartbeating elsewhere is never disrupted. + stopHeartbeat() + }, + } +} + +function shellQuote(arg: string): string { + return `'` + arg.replace(/'/g, `'\\''`) + `'` +} + +function throwShellError(stderr: string, op: string, path: string): never { + const err = new Error( + stderr || `${op}: failed for ${path}` + ) as NodeJS.ErrnoException + // Best-effort code classification from common stderr substrings; falls + // back to EIO so consumers don't see an undefined `code` field. + if (/No such file|cannot stat|cannot access/i.test(stderr)) + err.code = `ENOENT` + else if (/Permission denied/i.test(stderr)) err.code = `EACCES` + else err.code = `EIO` + throw err +} diff --git a/packages/agents-runtime/src/sandbox/remote/types.ts b/packages/agents-runtime/src/sandbox/remote/types.ts new file mode 100644 index 0000000000..eefc36933f --- /dev/null +++ b/packages/agents-runtime/src/sandbox/remote/types.ts @@ -0,0 +1,42 @@ +import type { DirEntry, FileStat } from '../types' + +/** + * Minimal interface our remote-sandbox adapter expects from a provider's + * SDK. Each provider adapter (e2b, vercel) implements this and the rest + * of remoteSandbox is provider-agnostic. Tests pass a fake client directly + * via the `client` option, so no real SDK is required. + */ +export interface RemoteSandboxClient { + exec(opts: { + command: string + cwd?: string + env?: Record + timeoutMs?: number + stdin?: Buffer | string + }): Promise<{ + stdout: Buffer + stderr: Buffer + exitCode: number | null + signal?: string | null + timedOut?: boolean + }> + readFile(path: string): Promise + writeFile(path: string, content: Buffer | string): Promise + mkdir(path: string, opts?: { recursive?: boolean }): Promise + readdir(path: string): Promise> + exists(path: string): Promise + remove(path: string, opts?: { recursive?: boolean }): Promise + stat(path: string): Promise + kill(): Promise + /** + * State-preserving teardown for a persistent workspace. Rather than killing + * the VM, hand its lifecycle back to the provider (e.g. stop the keep-alive + * heartbeat so the provider auto-suspends it on idle) so its filesystem and, + * where supported, memory/process state survive for a later wake — or a + * collaborator on another host — to reattach by key. `remoteSandbox` calls + * this from `dispose()` only when the sandbox is `persistent`; ephemeral + * sandboxes always `kill()`. Optional: clients that don't distinguish fall + * back to `kill()`. + */ + suspend?(): Promise +} diff --git a/packages/agents-runtime/src/sandbox/types.ts b/packages/agents-runtime/src/sandbox/types.ts new file mode 100644 index 0000000000..bc9971ac12 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/types.ts @@ -0,0 +1,253 @@ +/** + * Sandbox primitive — isolates filesystem, process, and network operations + * performed by LLM-driven tools. Isolation strength varies by provider; + * each provider documents what it does and does not protect against. + */ + +/** + * Stable list of bundled adapter names. The conformance test suite asserts the + * set of providers it exercises equals this list, so adding a new adapter + * without registering it in the conformance suite fails CI. + */ +export const KNOWN_ADAPTERS = [`unrestricted`, `remote`, `docker`] as const +export type KnownAdapter = (typeof KNOWN_ADAPTERS)[number] + +export interface Sandbox { + /** + * Provider identifier, for logs/legibility only — NOT a capability + * discriminator. Built-ins use a `KnownAdapter`, optionally with a + * provider-specific suffix (e.g. `docker:runc`); custom providers may use any + * string. Callers must not branch on this: pass paths/requests straight + * through and trust the sandbox to serve or reject. + */ + readonly name: KnownAdapter | (string & {}) + + /** + * Absolute path of the sandbox's primary writable root. The sandbox + * resolves relative paths passed to FS methods against this; callers use + * it only to format cwd-relative messages — they do not pre-resolve or + * pre-validate paths (that's the sandbox's job; see below). + */ + readonly workingDirectory: string + + exec(opts: SandboxExecOpts): Promise + + /** + * FS methods own path resolution and containment, enforced against the + * filesystem the provider actually owns. A relative `path` resolves + * against `workingDirectory`. Callers pass user paths straight through and + * trust the sandbox to serve or reject — they must not stat/realpath in the + * host process, which would target the wrong filesystem. + * + * Containment is provider-dependent, so it is documented per concern + * rather than promised uniformly: + * - WRITES (`writeFile`, `mkdir`, `remove`) are contained on every + * provider: a path resolving outside the workspace is rejected with + * `SandboxError('policy')`. + * - READS (`readFile`, `stat`, `readdir`, `exists`) are contained on + * `unrestricted` and `docker`, but `remote` allows reads anywhere in the + * VM (system binaries / stdlibs live outside the workspace, and the VM is + * already isolated from the host). So a read outside the workspace + * rejects with `policy` on unrestricted/docker but may succeed on remote. + * - SYMLINK escapes are followed and rejected only by `unrestricted` (it + * shares the host FS, so realpath resolution is its sole boundary). + * `docker`/`remote` use a string-prefix check and rely on the + * container/VM root as the isolation boundary, so an in-sandbox symlink + * out of the workspace is not separately rejected there. + */ + readFile(path: string): Promise + writeFile(path: string, content: Buffer | string): Promise + mkdir(path: string, opts?: { recursive?: boolean }): Promise + /** + * List entries in a directory. Order is not guaranteed; callers that + * need a stable order should sort by `name`. + */ + readdir(path: string): Promise> + /** + * Returns true iff the path exists and is reachable. As a safe-probe + * primitive, returns `false` both for missing paths and for paths denied + * by the sandbox's read policy — callers should treat `exists` as + * least-info and not use it to detect policy boundaries. (Matches the + * Vercel/Cloudflare/E2B LCD semantics.) + */ + exists(path: string): Promise + /** Remove a file or (when `recursive: true`) a directory tree. */ + remove(path: string, opts?: { recursive?: boolean }): Promise + /** Metadata for an entry. Rejects with `SandboxError('runtime')` if missing. */ + stat(path: string): Promise + + /** + * Perform an HTTP request from *inside* the sandbox. The request egresses + * through the sandbox's network and is therefore governed by the network + * policy declared when the sandbox was created (allowlist/deny-all/etc.) — + * it never runs in the host process. Isolated providers (docker, remote) + * implement this by running an in-sandbox HTTP client over `exec`; the + * host-process `unrestricted` provider, which has no isolation boundary, + * fetches in-process. The returned `Response` is synthesized from the + * client's output (status, content-type, body). + */ + fetch(input: string | URL, init?: RequestInit): Promise + + /** + * Release this lease on the sandbox. By default an *owner's* release maps to + * a state-preserving call (pause/stop) when the sandbox is persistent, so the + * next factory invocation can transparently reattach by `sandboxKey`; an + * ephemeral owner, or any non-owner (attacher) lease, just detaches. + * + * Pass `reclaim: true` to signal the owning entity has reached a terminal + * state (killed/stopped) so its sandbox should be WIPED now rather than + * preserved — honoured only for an owner lease (an attacher can never reclaim + * the owner's sandbox). Removal still waits for any concurrent leases to + * drain. Not idempotent. + */ + dispose(opts?: { reclaim?: boolean }): Promise +} + +/** + * Factory invoked by the runtime at the start of each wake-session to + * construct `ctx.sandbox`. Closures may hold caches as in-process + * optimizations, but correctness must not depend on the cache + * surviving a host cold start — provider-side identity must be + * derivable from `sandboxKey` alone (deterministic name, label, etc.) + * so a wake delivered to a freshly cold-started ephemeral host + * (Cloudflare Workers, Lambda) can still reattach to the warm + * provider-side sandbox. + */ +export interface SandboxFactoryParams { + /** + * Resolved identity of the sandbox to construct (or reattach to). Computed + * upstream from the entity's sandbox config + the live wake (see + * `resolveSandboxIdentity`): an explicit/inherited key, a per-entity key + * (the entity URL), or a per-wake key (`entityUrl#wakeId`). Providers derive + * provider-side identity (container name, workspace id) from this and + * reattach to a live sandbox with the same key. + */ + sandboxKey: string + /** + * Idle-teardown durability, resolved upstream. `true` ⇒ the provider + * PRESERVES the sandbox on idle (stop / suspend) so a later wake or + * collaborator can reattach by `sandboxKey`; `false` ⇒ it is WIPED on idle + * (remove / kill). Orthogonal to identity — a private (per-entity/per-wake) + * sandbox may be persistent, and an explicitly-keyed one may be ephemeral. + */ + persistent: boolean + /** + * Ownership, resolved upstream. `true` ⇒ this entity OWNS the sandbox: the + * provider creates it if absent and this entity's lifecycle governs teardown. + * `false` ⇒ ATTACH-only: the provider reattaches to an already-live sandbox + * with this `sandboxKey` and rejects with `SandboxError('unavailable')` if + * none exists (it never creates a fresh, empty one), and disposing never + * tears the owner's sandbox down. + */ + owner: boolean + /** The entity this wake belongs to. Useful for logs/labels, not identity. */ + entityUrl: string + entityType: string + args: Readonly> +} + +export type SandboxFactory = (params: SandboxFactoryParams) => Promise + +/** + * Named sandbox profile registered on a runtime. The runtime advertises + * its profile names + labels to the agents-server; entity types reference + * profiles by name; spawn-time picks one of the entity's allowed profiles. + * The factory closure stays local to the runtime — only the descriptive + * fields (`name`, `label`, `description`) cross the wire. + */ +export interface SandboxProfile { + /** Stable wire identifier (e.g. `local`, `docker`). */ + name: string + /** Human-readable label shown in the UI picker. */ + label: string + /** Optional longer-form description shown as a tooltip / row subtitle. */ + description?: string + /** + * True when the sandbox lives off-host (a remote provider VM) and is + * therefore reachable from any runner. The agents-server uses this to + * relax the co-location guard: a shared remote sandbox does not require + * its collaborators to be pinned to one runner (a shared *local* sandbox + * does, since the container exists on a single host). Defaults to false — + * profiles are treated as host-local unless they opt in. + */ + remote?: boolean + factory: SandboxFactory +} + +/** + * Egress policy for a sandbox. How strongly each mode is enforced depends on + * the provider — read the per-mode notes before relying on one as a boundary: + * + * - `deny-all` — a hard boundary on the isolated providers: docker gives the + * container no network interface (`NetworkMode=none`); remote denies at the + * VM. Nothing inside (exec, fetch, or otherwise) can egress. + * - `allow-all` — no egress restriction. + * - `allowlist` — only a *surface* protection on docker, NOT a boundary: it is + * enforced host-side at the `fetch()` tool path alone (see + * `docker/net-policy.ts`). Code run via `exec`/bash has direct bridge egress + * and is NOT constrained by the allowlist, so do not treat docker+allowlist + * as network isolation — use `deny-all` for that. Remote enforces the + * allowlist at the VM boundary (provider-dependent; see the e2b adapter). + */ +export type NetworkPolicy = + | { mode: `allow-all` } + | { mode: `deny-all` } + | { mode: `allowlist`; allow: ReadonlyArray } + +export interface SandboxExecOpts { + /** Shell command line. Sandbox decides how to run it (typically `sh -c`). */ + command: string + /** Defaults to the sandbox's configured working directory. */ + cwd?: string + /** Env merged onto the sandbox's allowed-env base. */ + env?: Record + /** Wall-clock timeout. Default is provider-specific. */ + timeoutMs?: number + stdin?: Buffer | string + /** Truncate combined stdout+stderr to this many bytes per stream. */ + maxOutputBytes?: number + /** + * External cancellation signal. When aborted, the running command is + * terminated (same escalation as `timeoutMs`) and the result has + * `timedOut: false` with `signal` set to the signal used. First of + * `signal` or `timeoutMs` to fire wins. + */ + signal?: AbortSignal +} + +export interface DirEntry { + name: string + type: `file` | `directory` | `symlink` | `other` +} + +export interface FileStat { + type: `file` | `directory` | `symlink` | `other` + size: number + mtimeMs: number +} + +export interface SandboxExecResult { + exitCode: number | null + signal: string | null + stdout: Buffer + stderr: Buffer + timedOut: boolean + /** + * True iff the command was terminated because the caller's + * `SandboxExecOpts.signal` fired. Distinct from `timedOut` (timeoutMs + * elapsed) and from a naturally-delivered `signal` field. + */ + aborted: boolean + outputTruncated: boolean +} + +export type SandboxErrorKind = `policy` | `runtime` | `unavailable` + +export class SandboxError extends Error { + readonly kind: SandboxErrorKind + constructor(kind: SandboxErrorKind, message: string) { + super(message) + this.name = `SandboxError` + this.kind = kind + } +} diff --git a/packages/agents-runtime/src/sandbox/unrestricted.ts b/packages/agents-runtime/src/sandbox/unrestricted.ts new file mode 100644 index 0000000000..e96929f517 --- /dev/null +++ b/packages/agents-runtime/src/sandbox/unrestricted.ts @@ -0,0 +1,371 @@ +import { spawn } from 'node:child_process' +import { + mkdir, + readFile, + readdir, + realpath, + rm, + stat, + writeFile, +} from 'node:fs/promises' +import { dirname, relative, resolve } from 'node:path' +import { + SandboxError, + type DirEntry, + type FileStat, + type Sandbox, + type SandboxExecOpts, + type SandboxExecResult, +} from './types' + +export interface UnrestrictedSandboxOpts { + workingDirectory: string +} + +export function unrestrictedSandbox( + opts: UnrestrictedSandboxOpts +): Promise { + return Promise.resolve(new UnrestrictedSandbox(opts.workingDirectory)) +} + +class UnrestrictedSandbox implements Sandbox { + readonly name = `unrestricted` + private disposed = false + + constructor(readonly workingDirectory: string) {} + + async exec(opts: SandboxExecOpts): Promise { + this.assertLive() + const cwd = opts.cwd ?? this.workingDirectory + const env: NodeJS.ProcessEnv = { + PATH: process.env.PATH, + HOME: process.env.HOME, + USER: process.env.USER, + LANG: process.env.LANG, + TERM: process.env.TERM, + ...opts.env, + } + const max = opts.maxOutputBytes ?? Number.POSITIVE_INFINITY + + return new Promise((resolve) => { + const child = spawn(`sh`, [`-c`, opts.command], { + cwd, + env, + stdio: [opts.stdin === undefined ? `ignore` : `pipe`, `pipe`, `pipe`], + // Run in a new process group so we can signal the whole tree on + // timeout. Linux's default `child.kill('SIGTERM')` signals only + // the immediate child (sh), leaving grandchildren (like `sleep`) + // orphaned with the stdio pipes still held — the `close` event + // then doesn't fire until the grandchild exits naturally. + detached: true, + }) + + const stdoutChunks: Array = [] + const stderrChunks: Array = [] + let stdoutBytes = 0 + let stderrBytes = 0 + let truncated = false + + child.stdout?.on(`data`, (chunk: Buffer) => { + if (stdoutBytes >= max) { + truncated = true + return + } + const remaining = max - stdoutBytes + if (chunk.length > remaining) { + stdoutChunks.push(chunk.subarray(0, remaining)) + stdoutBytes += remaining + truncated = true + } else { + stdoutChunks.push(chunk) + stdoutBytes += chunk.length + } + }) + child.stderr?.on(`data`, (chunk: Buffer) => { + if (stderrBytes >= max) { + truncated = true + return + } + const remaining = max - stderrBytes + if (chunk.length > remaining) { + stderrChunks.push(chunk.subarray(0, remaining)) + stderrBytes += remaining + truncated = true + } else { + stderrChunks.push(chunk) + stderrBytes += chunk.length + } + }) + + if (opts.stdin !== undefined) { + child.stdin?.end(opts.stdin) + } + + let timer: NodeJS.Timeout | undefined + let timedOut = false + let aborted = false + const killTree = (signal: NodeJS.Signals) => { + // Negative PID signals the entire process group. We created the + // group via `detached: true` above. + try { + if (child.pid !== undefined) process.kill(-child.pid, signal) + } catch { + // Process group may already be gone; ignore. + } + } + if (opts.timeoutMs !== undefined) { + timer = setTimeout(() => { + timedOut = true + killTree(`SIGTERM`) + // Escalate to SIGKILL if the tree doesn't die in 500ms. + setTimeout(() => killTree(`SIGKILL`), 500).unref() + }, opts.timeoutMs) + } + + const onAbort = () => { + aborted = true + killTree(`SIGTERM`) + setTimeout(() => killTree(`SIGKILL`), 500).unref() + } + if (opts.signal) { + if (opts.signal.aborted) onAbort() + else opts.signal.addEventListener(`abort`, onAbort, { once: true }) + } + const clearAbort = () => { + if (opts.signal) opts.signal.removeEventListener(`abort`, onAbort) + } + + child.on(`error`, (err) => { + if (timer) clearTimeout(timer) + clearAbort() + resolve({ + exitCode: null, + signal: null, + stdout: Buffer.concat(stdoutChunks), + stderr: Buffer.from(err.message), + timedOut, + aborted, + outputTruncated: truncated, + }) + }) + + child.on(`close`, (code, signal) => { + if (timer) clearTimeout(timer) + clearAbort() + resolve({ + exitCode: code, + signal, + stdout: Buffer.concat(stdoutChunks), + stderr: Buffer.concat(stderrChunks), + timedOut, + aborted, + outputTruncated: truncated, + }) + }) + }) + } + + async readFile(path: string): Promise { + this.assertLive() + const target = await this.resolveWithin(path) + try { + return await readFile(target) + } catch (err) { + throw wrapFsError(err, `readFile`, path) + } + } + + async writeFile(path: string, content: Buffer | string): Promise { + this.assertLive() + const target = await this.resolveWithin(path) + try { + await writeFile(target, content) + } catch (err) { + throw wrapFsError(err, `writeFile`, path) + } + } + + async mkdir(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + const target = await this.resolveWithin(path) + try { + await mkdir(target, { recursive: opts?.recursive ?? false }) + } catch (err) { + throw wrapFsError(err, `mkdir`, path) + } + } + + async readdir(path: string): Promise> { + this.assertLive() + const target = await this.resolveWithin(path) + try { + const entries = await readdir(target, { withFileTypes: true }) + return entries.map((e) => ({ name: e.name, type: dirEntryType(e) })) + } catch (err) { + throw wrapFsError(err, `readdir`, path) + } + } + + async exists(path: string): Promise { + this.assertLive() + let target: string + try { + target = await this.resolveWithin(path) + } catch (err) { + // Safe-probe semantics: a path denied by the workspace boundary reads + // as "absent" rather than leaking the policy edge (matches docker / + // remote). Non-policy failures still surface. + if (err instanceof SandboxError && err.kind === `policy`) return false + throw err + } + try { + await stat(target) + return true + } catch (err) { + if ((err as NodeJS.ErrnoException).code === `ENOENT`) return false + throw wrapFsError(err, `exists`, path) + } + } + + async remove(path: string, opts?: { recursive?: boolean }): Promise { + this.assertLive() + const target = await this.resolveWithin(path) + try { + await rm(target, { recursive: opts?.recursive ?? false, force: false }) + } catch (err) { + throw wrapFsError(err, `remove`, path) + } + } + + async stat(path: string): Promise { + this.assertLive() + const target = await this.resolveWithin(path) + try { + const s = await stat(target) + return toFileStat(s) + } catch (err) { + throw wrapFsError(err, `stat`, path) + } + } + + /** + * Resolve a user-supplied path against the working directory and verify it + * stays inside, following symlinks. This provider shares the host + * filesystem, so workspace containment is enforced here — the tools are + * filesystem-agnostic and trust the sandbox to serve or reject. Defends + * against the CVE-2025-53109/53110-shape bypass where a path looks clean + * but a component is a symlink pointing outside the workspace. + * + * - For paths that already exist, returns the canonicalized realpath. + * - For paths that don't yet exist (write/mkdir into a new file), walks up + * to the deepest existing ancestor, verifies its realpath is inside the + * workspace, and returns the canonicalized ancestor joined with the + * non-existing remainder — so the FS target can't be redirected by an + * attacker-controlled symlink mid-path. + * + * Throws `SandboxError('policy')` if the resolved path escapes the + * working directory. + */ + private async resolveWithin(userPath: string): Promise { + // The realpath walk below is the authority: it canonicalizes the + // deepest existing ancestor (following symlinks) and checks containment + // against the canonical workspace root, so it handles both `..` escapes + // and symlinked components. We deliberately avoid a pure-string + // pre-check — comparing a non-canonical absolute path against `cwdReal` + // false-positives when the workspace sits under a symlink (e.g. macOS + // /var → /private/var). + const cwdReal = await realpath(this.workingDirectory) + let probe = resolve(this.workingDirectory, userPath) + let suffix = `` + for (;;) { + try { + const real = await realpath(probe) + const rel = relative(cwdReal, real) + if (rel.startsWith(`..`) || rel === `..`) throw this.denied(userPath) + // TODO(multi-tenant): when `suffix` is non-empty the returned target + // includes not-yet-existing components, leaving a narrow TOCTOU window + // — a concurrent writer could materialize an intermediate symlink that + // escapes the workspace between this check and the caller's FS op. Safe + // for this provider's single-tenant trusted-code contract (see the + // class docstring); a multi-tenant use would need to re-validate the + // final resolved target *after* the FS call (e.g. via O_NOFOLLOW or a + // post-op realpath containment recheck). + return suffix.length === 0 ? real : resolve(real, suffix) + } catch (err) { + if (err instanceof SandboxError) throw err + const code = (err as NodeJS.ErrnoException).code + if (code !== `ENOENT`) throw err + const parent = dirname(probe) + if (parent === probe) throw this.denied(userPath) + suffix = + suffix.length === 0 + ? probe.slice(parent.length + 1) + : `${probe.slice(parent.length + 1)}/${suffix}` + probe = parent + } + } + } + + private denied(userPath: string): SandboxError { + return new SandboxError( + `policy`, + `unrestrictedSandbox: access to "${userPath}" is denied (outside working directory ${this.workingDirectory}).` + ) + } + + async fetch(input: string | URL, init?: RequestInit): Promise { + this.assertLive() + return globalThis.fetch(input as RequestInfo, init) + } + + async dispose(): Promise { + // No teardown to do (this provider shares the host process), but flip the + // flag so post-dispose use throws — mirrors docker/remote and keeps the + // cross-provider conformance invariant honest, guarding against a future + // change that makes dispose meaningful (e.g. cancelling in-flight execs). + this.disposed = true + } + + private assertLive(): void { + if (this.disposed) { + throw new SandboxError( + `runtime`, + `unrestrictedSandbox: operation called after dispose().` + ) + } + } +} + +function dirEntryType(e: { + isDirectory(): boolean + isFile(): boolean + isSymbolicLink(): boolean +}): DirEntry[`type`] { + if (e.isSymbolicLink()) return `symlink` + if (e.isDirectory()) return `directory` + if (e.isFile()) return `file` + return `other` +} + +function toFileStat(s: { + isFile(): boolean + isDirectory(): boolean + isSymbolicLink(): boolean + size: number + mtimeMs: number +}): FileStat { + let type: FileStat[`type`] = `other` + if (s.isSymbolicLink()) type = `symlink` + else if (s.isDirectory()) type = `directory` + else if (s.isFile()) type = `file` + return { type, size: s.size, mtimeMs: s.mtimeMs } +} + +function wrapFsError(err: unknown, op: string, path: string): Error { + if (err instanceof SandboxError) return err + const e = err as NodeJS.ErrnoException + return new SandboxError( + `runtime`, + `unrestrictedSandbox.${op}("${path}") failed: ${e.code ?? ``} ${e.message ?? String(err)}`.trim() + ) +} diff --git a/packages/agents-runtime/src/setup-context.ts b/packages/agents-runtime/src/setup-context.ts index 6afcb9d0c5..7074ee3d72 100644 --- a/packages/agents-runtime/src/setup-context.ts +++ b/packages/agents-runtime/src/setup-context.ts @@ -38,6 +38,7 @@ import type { SharedStateSchemaMap, SourceHandleInfo, SpawnHandleInfo, + SpawnSandboxOption, StateCollectionProxy, StateProxy, Wake, @@ -63,6 +64,7 @@ export interface WiringConfig { initialMessage?: unknown wake?: Wake tags?: Record + sandbox?: SpawnSandboxOption } ) => Promise<{ entityUrl: string; streamPath: string }> /** Create a child StreamDB, preload it, and register it for cleanup. */ @@ -987,6 +989,7 @@ export function createSetupContext( wake?: Wake tags?: Record observe?: boolean + sandbox?: SpawnSandboxOption } ): Promise { const observeChild = opts?.observe !== false @@ -1117,6 +1120,7 @@ export function createSetupContext( initialMessage: opts?.initialMessage, wake: opts?.wake, tags: opts?.tags, + sandbox: opts?.sandbox, } ) realEntityUrl = childUrl diff --git a/packages/agents-runtime/src/tags.ts b/packages/agents-runtime/src/tags.ts index 976ff58a6f..291b0bdd97 100644 --- a/packages/agents-runtime/src/tags.ts +++ b/packages/agents-runtime/src/tags.ts @@ -81,6 +81,10 @@ export const entityMembershipRowSchema = z.object({ status: z.enum(entityStatuses), tags: z.record(z.string(), z.string()).default({}), spawn_args: z.record(z.string(), z.unknown()).default({}), + // Display-only subset: a tag-membership observer only needs the profile + // name to show which sandbox a sibling runs in. The authoritative selection + // (key/scope/persistent/owner) rides the wake notification, not this row. + sandbox: z.object({ profile: z.string() }).nullable().optional(), parent: z.string().nullable().optional(), type_revision: z.number().int().nullable().optional(), inbox_schemas: z.record(z.string(), z.unknown()).nullable().optional(), diff --git a/packages/agents-runtime/src/tools.ts b/packages/agents-runtime/src/tools.ts index 12889b0162..aa0f8741f8 100644 --- a/packages/agents-runtime/src/tools.ts +++ b/packages/agents-runtime/src/tools.ts @@ -3,7 +3,7 @@ export { createReadFileTool } from './tools/read-file' export { createWriteTool } from './tools/write' export { createEditTool } from './tools/edit' export { braveSearchTool } from './tools/brave-search' -export { createFetchUrlTool, fetchUrlTool } from './tools/fetch-url' +export { createFetchUrlTool } from './tools/fetch-url' export { createScheduleTools } from './tools/schedules' export { createEventSourceTools } from './tools/event-sources' export { createSendTool } from './tools/send' diff --git a/packages/agents-runtime/src/tools/bash.ts b/packages/agents-runtime/src/tools/bash.ts index b9e698b26c..c8bad3e147 100644 --- a/packages/agents-runtime/src/tools/bash.ts +++ b/packages/agents-runtime/src/tools/bash.ts @@ -1,68 +1,45 @@ -import { exec } from 'node:child_process' import { Type } from '@sinclair/typebox' +import type { Sandbox } from '../sandbox/types' import type { AgentTool } from '@mariozechner/pi-agent-core' const TIMEOUT_MS = 30_000 -const MAX_OUTPUT_CHARS = 50_000 +const MAX_OUTPUT_BYTES = 50_000 -export function createBashTool(workingDirectory: string): AgentTool { +export function createBashTool(sandbox: Sandbox): AgentTool { return { name: `bash`, label: `Bash`, - description: `Execute a shell command and return its output. Commands run with a 30-second timeout and a 50KB output cap.`, + description: `Execute a shell command and return its output. Commands run with a 30-second timeout and a 50KB output cap. The host process environment is not forwarded, so host secrets (e.g. API keys) are not available as environment variables.`, parameters: Type.Object({ command: Type.String({ description: `The shell command to execute` }), }), execute: async (_toolCallId, params) => { const { command } = params as { command: string } - return new Promise((resolve) => { - const child = exec(command, { - cwd: workingDirectory, - timeout: TIMEOUT_MS, - maxBuffer: 1024 * 1024, - env: { ...process.env }, - }) - - let stdout = `` - let stderr = `` - - child.stdout?.on(`data`, (data: string) => { - stdout += data - }) - child.stderr?.on(`data`, (data: string) => { - stderr += data - }) - - child.on(`close`, (code, signal) => { - const timedOut = signal === `SIGTERM` - let output = stdout - if (stderr) { - output += output ? `\n\nSTDERR:\n${stderr}` : stderr - } - if (timedOut) { - output += `\n\n[Command timed out after ${TIMEOUT_MS / 1000}s]` - } - - output = output.slice(0, MAX_OUTPUT_CHARS) - - resolve({ - content: [{ type: `text` as const, text: output || `(no output)` }], - details: { exitCode: code ?? 1, timedOut }, - }) - }) - - child.on(`error`, (err) => { - resolve({ - content: [ - { - type: `text` as const, - text: `Command failed: ${err.message}`, - }, - ], - details: { exitCode: 1, timedOut: false }, - }) - }) + const result = await sandbox.exec({ + command, + timeoutMs: TIMEOUT_MS, + maxOutputBytes: MAX_OUTPUT_BYTES, }) + + let output = result.stdout.toString(`utf-8`) + const stderr = result.stderr.toString(`utf-8`) + if (stderr) { + output += output ? `\n\nSTDERR:\n${stderr}` : stderr + } + if (result.timedOut) { + output += `\n\n[Command timed out after ${TIMEOUT_MS / 1000}s]` + } + if (result.outputTruncated) { + output += `\n\n[Output truncated at ${MAX_OUTPUT_BYTES} bytes]` + } + + return { + content: [{ type: `text` as const, text: output || `(no output)` }], + details: { + exitCode: result.exitCode ?? 1, + timedOut: result.timedOut, + }, + } }, } } diff --git a/packages/agents-runtime/src/tools/edit.ts b/packages/agents-runtime/src/tools/edit.ts index c66def3426..e682418a8d 100644 --- a/packages/agents-runtime/src/tools/edit.ts +++ b/packages/agents-runtime/src/tools/edit.ts @@ -1,15 +1,16 @@ -import { readFile, writeFile } from 'node:fs/promises' import { relative, resolve } from 'node:path' import { createTwoFilesPatch } from 'diff' import { Type } from '@sinclair/typebox' import { runtimeLog } from '../log' +import { SandboxError } from '../sandbox/types' +import type { Sandbox } from '../sandbox/types' import type { AgentTool } from '@mariozechner/pi-agent-core' const READ_GUARD_MESSAGE = (rel: string): string => `File ${rel} has not been read in this session (sessions are per-wake — re-read after waking from a worker).` export function createEditTool( - workingDirectory: string, + sandbox: Sandbox, readSet: Set ): AgentTool { return { @@ -44,29 +45,21 @@ export function createEditTool( new_string: string replace_all?: boolean } + // `key`/`rel` are pure-string normalizations (readSet key + messages); + // an out-of-workspace path can't have been read, so the read-guard + // below naturally refuses it. Symlink/containment escapes are caught + // from the sandbox's FS calls as SandboxError('policy'). + const key = resolve(sandbox.workingDirectory, filePath) + const rel = relative(sandbox.workingDirectory, key) try { - const resolved = resolve(workingDirectory, filePath) - const rel = relative(workingDirectory, resolved) - if (rel.startsWith(`..`)) { - return { - content: [ - { - type: `text` as const, - text: `Error: Path "${filePath}" is outside the working directory`, - }, - ], - details: { replacements: 0 }, - } - } - - if (!readSet.has(resolved)) { + if (!readSet.has(key)) { return { content: [{ type: `text` as const, text: READ_GUARD_MESSAGE(rel) }], details: { replacements: 0 }, } } - const original = await readFile(resolved, `utf-8`) + const original = (await sandbox.readFile(filePath)).toString(`utf-8`) if (!replace_all) { const first = original.indexOf(old_string) @@ -98,7 +91,7 @@ export function createEditTool( original.slice(0, first) + new_string + original.slice(first + old_string.length) - await writeFile(resolved, updated, `utf-8`) + await sandbox.writeFile(filePath, updated) const patch = createTwoFilesPatch(rel, rel, original, updated) return { content: [ @@ -125,7 +118,7 @@ export function createEditTool( } } const updated = parts.join(new_string) - await writeFile(resolved, updated, `utf-8`) + await sandbox.writeFile(filePath, updated) const patch = createTwoFilesPatch(rel, rel, original, updated) return { content: [ @@ -137,6 +130,17 @@ export function createEditTool( details: { replacements: count, diff: patch }, } } catch (err) { + if (err instanceof SandboxError && err.kind === `policy`) { + return { + content: [ + { + type: `text` as const, + text: `Error: Path "${filePath}" is outside the working directory`, + }, + ], + details: { replacements: 0 }, + } + } runtimeLog.warn( `[edit tool]`, `failed to edit ${filePath}: ${err instanceof Error ? err.message : String(err)}` diff --git a/packages/agents-runtime/src/tools/fetch-url.ts b/packages/agents-runtime/src/tools/fetch-url.ts index ecc9f80738..63c6d60cff 100644 --- a/packages/agents-runtime/src/tools/fetch-url.ts +++ b/packages/agents-runtime/src/tools/fetch-url.ts @@ -4,6 +4,8 @@ import { Readability } from '@mozilla/readability' import { JSDOM, VirtualConsole } from 'jsdom' import TurndownService from 'turndown' import { completeWithLowCostModel } from '../model-runner' +import { SandboxError } from '../sandbox/types' +import type { Sandbox } from '../sandbox/types' import type { AgentTool } from '@mariozechner/pi-agent-core' import type { LowCostModelCatalog, LowCostModelConfig } from '../model-runner' @@ -47,6 +49,7 @@ function createPiRunnerExtractor(opts: { } export function createFetchUrlTool( + sandbox: Sandbox, opts: { extractWithLLM?: ExtractWithLLM catalog?: LowCostModelCatalog @@ -69,7 +72,7 @@ export function createFetchUrlTool( execute: async (_toolCallId, params) => { const { url, prompt } = params as { url: string; prompt: string } try { - const res = await fetch(url, { + const res = await sandbox.fetch(url, { headers: { 'User-Agent': `Mozilla/5.0 (compatible; DurableStreamsAgent/1.0)`, Accept: `text/html,application/xhtml+xml,text/plain,*/*`, @@ -106,6 +109,20 @@ export function createFetchUrlTool( details: { charCount: extracted.length, usedLLM: true }, } } catch (err) { + // Surface a network-policy denial (allowlist miss / SSRF guard) as a + // distinct, actionable signal — mirrors the FS tools' policy handling — + // so the model knows the URL was blocked rather than transiently failing. + if (err instanceof SandboxError && err.kind === `policy`) { + return { + content: [ + { + type: `text` as const, + text: `Error: URL "${url}" was blocked by the sandbox network policy (it targets a disallowed or private/link-local address).`, + }, + ], + details: { charCount: 0, usedLLM: false }, + } + } return { content: [ { @@ -119,5 +136,3 @@ export function createFetchUrlTool( }, } } - -export const fetchUrlTool: AgentTool = createFetchUrlTool() diff --git a/packages/agents-runtime/src/tools/read-file.ts b/packages/agents-runtime/src/tools/read-file.ts index 42a1041049..3eba91a1f5 100644 --- a/packages/agents-runtime/src/tools/read-file.ts +++ b/packages/agents-runtime/src/tools/read-file.ts @@ -1,13 +1,14 @@ -import { readFile, stat } from 'node:fs/promises' -import { relative, resolve } from 'node:path' +import { resolve } from 'node:path' import { Type } from '@sinclair/typebox' import { runtimeLog } from '../log' +import { SandboxError } from '../sandbox/types' +import type { Sandbox } from '../sandbox/types' import type { AgentTool } from '@mariozechner/pi-agent-core' const MAX_FILE_SIZE = 512 * 1024 // 512 KB export function createReadFileTool( - workingDirectory: string, + sandbox: Sandbox, readSet?: Set ): AgentTool { return { @@ -22,21 +23,11 @@ export function createReadFileTool( execute: async (_toolCallId, params) => { const { path: filePath } = params as { path: string } try { - const resolved = resolve(workingDirectory, filePath) - const rel = relative(workingDirectory, resolved) - if (rel.startsWith(`..`)) { - return { - content: [ - { - type: `text` as const, - text: `Error: Path "${filePath}" is outside the working directory`, - }, - ], - details: { charCount: 0 }, - } - } - - const fileStat = await stat(resolved) + // Path resolution and workspace containment are the sandbox's job + // (it owns the filesystem); a denied path rejects with + // SandboxError('policy'), handled below. We only stat for the size + // gate, which is a tool-level concern. + const fileStat = await sandbox.stat(filePath) if (fileStat.size > MAX_FILE_SIZE) { return { content: [ @@ -49,7 +40,7 @@ export function createReadFileTool( } } - const buffer = await readFile(resolved) + const buffer = await sandbox.readFile(filePath) // Detect binary: check for null bytes in the first 8KB (same heuristic git/grep use). const sample = buffer.subarray(0, 8192) @@ -66,12 +57,23 @@ export function createReadFileTool( } const text = buffer.toString(`utf-8`) - readSet?.add(resolved) + readSet?.add(resolve(sandbox.workingDirectory, filePath)) return { content: [{ type: `text` as const, text }], details: { charCount: text.length }, } } catch (err) { + if (err instanceof SandboxError && err.kind === `policy`) { + return { + content: [ + { + type: `text` as const, + text: `Error: Path "${filePath}" is outside the working directory`, + }, + ], + details: { charCount: 0 }, + } + } runtimeLog.warn( `[read tool]`, `failed to read ${filePath}: ${err instanceof Error ? err.message : String(err)}` diff --git a/packages/agents-runtime/src/tools/write.ts b/packages/agents-runtime/src/tools/write.ts index 9ba9079f91..a97fa6f608 100644 --- a/packages/agents-runtime/src/tools/write.ts +++ b/packages/agents-runtime/src/tools/write.ts @@ -1,12 +1,13 @@ -import { mkdir, readFile, writeFile } from 'node:fs/promises' import { dirname, relative, resolve } from 'node:path' import { createTwoFilesPatch } from 'diff' import { Type } from '@sinclair/typebox' import { runtimeLog } from '../log' +import { SandboxError } from '../sandbox/types' +import type { Sandbox } from '../sandbox/types' import type { AgentTool } from '@mariozechner/pi-agent-core' export function createWriteTool( - workingDirectory: string, + sandbox: Sandbox, readSet?: Set ): AgentTool { return { @@ -26,34 +27,23 @@ export function createWriteTool( path: string content: string } + // Containment is enforced by the sandbox (it owns the filesystem); + // an escaping path rejects with SandboxError('policy'), handled below. + // `key`/`rel` are pure-string normalizations for the readSet and the + // diff header — not a security check. + const key = resolve(sandbox.workingDirectory, filePath) + const rel = relative(sandbox.workingDirectory, key) try { - const resolved = resolve(workingDirectory, filePath) - const rel = relative(workingDirectory, resolved) - if (rel.startsWith(`..`)) { - return { - content: [ - { - type: `text` as const, - text: `Error: Path "${filePath}" is outside the working directory`, - }, - ], - details: { bytesWritten: 0 }, - } - } - let original = `` - let existed = true - try { - original = await readFile(resolved, `utf-8`) - } catch (err) { - const code = (err as NodeJS.ErrnoException).code - if (code !== `ENOENT`) throw err - existed = false + const existed = await sandbox.exists(filePath) + if (existed) { + const buf = await sandbox.readFile(filePath) + original = buf.toString(`utf-8`) } - await mkdir(dirname(resolved), { recursive: true }) - await writeFile(resolved, content, `utf-8`) - readSet?.add(resolved) + await sandbox.mkdir(dirname(filePath), { recursive: true }) + await sandbox.writeFile(filePath, content) + readSet?.add(key) const bytesWritten = Buffer.byteLength(content, `utf-8`) const patch = createTwoFilesPatch( @@ -75,6 +65,17 @@ export function createWriteTool( details: { bytesWritten, diff: patch, existed }, } } catch (err) { + if (err instanceof SandboxError && err.kind === `policy`) { + return { + content: [ + { + type: `text` as const, + text: `Error: Path "${filePath}" is outside the working directory`, + }, + ], + details: { bytesWritten: 0 }, + } + } runtimeLog.warn( `[write tool]`, `failed to write ${filePath}: ${err instanceof Error ? err.message : String(err)}` diff --git a/packages/agents-runtime/src/types.ts b/packages/agents-runtime/src/types.ts index a3b8b497cb..bcd58d3d97 100644 --- a/packages/agents-runtime/src/types.ts +++ b/packages/agents-runtime/src/types.ts @@ -28,6 +28,8 @@ import type { EntityStreamDB as RuntimeEntityStreamDB, EntityStreamDBWithActions as RuntimeEntityStreamDBWithActions, } from './entity-stream-db' +import type { Sandbox, SandboxProfile } from './sandbox/types' +import type { SandboxSelectionConfig } from './sandbox/identity' import type { ChildStatusEntry, ContextEntryAttrs as EntityContextEntryAttrs, @@ -521,6 +523,7 @@ export interface RuntimeContext { initialMessage?: unknown tags?: Record observe?: boolean + sandbox?: SpawnSandboxOption } ) => Promise observe: (( @@ -650,6 +653,21 @@ export interface WebhookNotification { streams: { main: string; error: string } tags?: Record spawnArgs?: Record + sandbox?: { + profile: string + /** Explicit cross-entity key (set directly or adopted via `inherit`). */ + key?: string + /** Per-entity (default) or per-wake identity when no explicit `key`. */ + scope?: `entity` | `wake` + /** Idle-teardown durability; defaults by scope when unset. */ + persistent?: boolean + /** + * Whether this entity owns the sandbox (create + attach + govern + * teardown) or only attaches to an owner's. Defaults to owner; an + * `inherit` spawn stores `false`. + */ + owner?: boolean + } | null createdBy?: string } principal?: RuntimePrincipal @@ -714,6 +732,15 @@ export interface ProcessWakeConfig { idleTimeout?: number /** Heartbeat interval in ms (default: 10_000) */ heartbeatInterval?: number + /** + * Sandbox profiles registered on this runtime, indexed by profile + * name. Built by `createRuntimeRouter` from the `sandboxProfiles` + * option. processWake looks up the profile named on + * `entity.sandbox.profile` at wake-session start. When the entity + * has no profile set, processWake falls back to an in-process + * unrestricted sandbox at the host's cwd. + */ + sandboxProfiles?: ReadonlyMap } export type WakePhase = `setup` | `active` | `closing` | `closed` @@ -773,6 +800,17 @@ export interface SetupCompleteResult { // ── Wake Primitives ────────────────────────────────────────────── +/** + * Sandbox selection when spawning a child entity. + * - `'inherit'` — adopt the parent wake's resolved sandbox (profile + resolved + * key + persistent); gracefully yields none if the parent has no sandbox. + * - object form — pick a `profile`, optionally with `scope` / `persistent`, + * join an explicit shared `key`, or `inherit: true`. + */ +export type SpawnSandboxOption = + | `inherit` + | (SandboxSelectionConfig & { profile?: string; inherit?: boolean }) + export type Wake = | `runFinished` | { on: `runFinished`; includeResponse?: boolean } @@ -904,6 +942,18 @@ export interface HandlerContext< * cancellable work such as fetches or subprocesses. */ signal: AbortSignal + /** + * Sandbox for this wake. Provisioned by the runtime from the + * sandbox profile named on `entity.sandbox.profile` (or an + * unrestricted-at-cwd fallback if nothing was selected) at the + * start of each wake-session, and disposed in `processWake`'s + * outer `finally`. A single wake-session that drains multiple + * queued wakes for the same entity reuses one sandbox; across + * wake-sessions a new sandbox is constructed and inter-wake state + * preservation is the provider's responsibility. Handlers must NOT + * call `sandbox.dispose()` — `processWake` owns disposal. + */ + sandbox: Sandbox useAgent: (config: AgentConfig) => AgentHandle useContext: (config: UseContextConfig) => void timelineMessages: (opts?: TimelineProjectionOpts) => Array @@ -927,6 +977,7 @@ export interface HandlerContext< * parent never awaits child completion. */ observe?: boolean + sandbox?: SpawnSandboxOption } ) => Promise observe: (( diff --git a/packages/agents-runtime/test/bash-tool.test.ts b/packages/agents-runtime/test/bash-tool.test.ts index 82c84b1a24..7ee69b9f91 100644 --- a/packages/agents-runtime/test/bash-tool.test.ts +++ b/packages/agents-runtime/test/bash-tool.test.ts @@ -1,8 +1,9 @@ import { mkdtemp, realpath, rm } from 'node:fs/promises' -import { homedir, tmpdir } from 'node:os' +import { tmpdir } from 'node:os' import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { createBashTool } from '../src/tools/bash' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' describe(`bash tool`, () => { let cwd: string @@ -15,8 +16,9 @@ describe(`bash tool`, () => { await rm(cwd, { recursive: true, force: true }) }) - it(`runs commands in the working directory without overriding HOME`, async () => { - const tool = createBashTool(cwd) + it(`runs commands in the working directory and exposes HOME from the sandbox`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createBashTool(sandbox) const result = await tool.execute(`call-1`, { command: `node -e "console.log(process.cwd()); console.log(process.env.HOME)"`, }) @@ -25,36 +27,14 @@ describe(`bash tool`, () => { const lines = (result.content[0] as { text: string }).text .trim() .split(`\n`) - expect(lines).toEqual([await realpath(cwd), process.env.HOME ?? homedir()]) + expect(lines[0]).toBe(await realpath(cwd)) + expect(lines[1]).toBe(process.env.HOME ?? ``) + await sandbox.dispose() }) - // Characterization: the bash tool currently passes `env: { ...process.env }` - // wholesale to spawned children (`bash.ts:23`). The two tests below capture - // that behavior so the env-scrubbing change planned for a follow-up PR has - // an explicit regression target. - it(`leaks the parent PATH into the child process (no env scrubbing)`, async () => { - const tool = createBashTool(cwd) - const result = await tool.execute(`call-path`, { - command: `printf '%s' "$PATH"`, - }) - expect((result.content[0] as { text: string }).text).toBe( - process.env.PATH ?? `` - ) - }) - - it(`leaks an ANTHROPIC_API_KEY-style env var to the child process`, async () => { - const sentinel = `sk-test-bash-leak-${Date.now()}` - const prev = process.env.ANTHROPIC_API_KEY - process.env.ANTHROPIC_API_KEY = sentinel - try { - const tool = createBashTool(cwd) - const result = await tool.execute(`call-key`, { - command: `printf '%s' "$ANTHROPIC_API_KEY"`, - }) - expect((result.content[0] as { text: string }).text).toBe(sentinel) - } finally { - if (prev === undefined) delete process.env.ANTHROPIC_API_KEY - else process.env.ANTHROPIC_API_KEY = prev - } - }) + // The env-scrubbing characterization tests from #4354 documented the + // pre-fix bash env leak. Those expectations have been inverted by PR 6a's + // env scrub (see sandbox-tool-refactor.test.ts > 'does not forward + // arbitrary process.env to children'). The characterizations are removed + // because their assertions no longer match the fixed behavior. }) diff --git a/packages/agents-runtime/test/context-factory.test.ts b/packages/agents-runtime/test/context-factory.test.ts index f2784c4d7c..166ecb96b0 100644 --- a/packages/agents-runtime/test/context-factory.test.ts +++ b/packages/agents-runtime/test/context-factory.test.ts @@ -3,6 +3,7 @@ import { getCronStreamPath } from '../src/cron-utils' import { createHandlerContext } from '../src/context-factory' import { ENTITY_COLLECTIONS } from '../src/entity-schema' import { createLocalOnlyTestCollection } from './helpers/local-only' +import { testSandboxStub } from './helpers/context-test-helpers' import type { EntityStreamDBWithActions } from '../src/types' import type { ChangeEvent } from '@durable-streams/state' @@ -57,6 +58,7 @@ describe(`createHandlerContext`, () => { firstWake: false, args: {}, db, + sandbox: testSandboxStub, state: {}, actions: {}, electricTools: [], @@ -171,6 +173,7 @@ describe(`createHandlerContext`, () => { firstWake: false, args: {}, db, + sandbox: testSandboxStub, state: {}, actions: {}, electricTools: [], diff --git a/packages/agents-runtime/test/create-handler.test.ts b/packages/agents-runtime/test/create-handler.test.ts index db711418c5..76e52ed83c 100644 --- a/packages/agents-runtime/test/create-handler.test.ts +++ b/packages/agents-runtime/test/create-handler.test.ts @@ -6,6 +6,7 @@ import { createRuntimeRouter, } from '../src/create-handler' import { clearRegistry, defineEntity } from '../src/define-entity' +import { SandboxError } from '../src/sandbox/types' import type { IncomingMessage, ServerResponse } from 'node:http' import type { KeyObject } from 'node:crypto' import type { @@ -302,6 +303,75 @@ describe(`createRuntimeHandler`, () => { }) }) + it(`isolates a SandboxError('unavailable') wake without taking the runner down`, async () => { + // A wake whose sandbox profile was dropped by a runner re-registration race + // rejects with SandboxError('unavailable'). It must fail only that one wake: + // the handler keeps accepting and completing other wakes (the runner stays + // up), and the error is recorded for drain rather than thrown synchronously. + defineEntity(`test-agent`, { handler: async () => {} }) + processWakeMock + .mockRejectedValueOnce( + new SandboxError( + `unavailable`, + `sandbox profile "docker" not registered` + ) + ) + .mockResolvedValueOnce(undefined) + + const handler = createRuntimeHandler({ + baseUrl: `http://localhost:3000`, + handlerUrl: `http://localhost:4000/electric-agents`, + webhookSignature: false, + }) + + const notification = (n: number) => ({ + consumerId: `consumer-${n}`, + epoch: 1, + wakeId: `wake-${n}`, + streamPath: `/streams/entity:test-${n}`, + streams: [{ path: `/streams/entity:test-${n}`, offset: `0_0` }], + callback: `http://localhost:3000/_electric/wakes/wake-${n}`, + claimToken: `tok-${n}`, + entity: { + type: `test-agent`, + status: `active`, + url: `http://localhost:3000/test-agent/test-${n}`, + streams: { + main: `/streams/entity:test-${n}`, + error: `/streams/entity-error:test-${n}`, + }, + }, + }) + + const post = (n: number) => + handler.handleWebhookRequest( + new Request(`http://localhost/electric-agents`, { + method: `POST`, + headers: { 'content-type': `application/json` }, + body: JSON.stringify(notification(n)), + }) + ) + + // The failing wake (1) is dispatched, then a healthy wake (2). Both are + // accepted (200) — the rejection doesn't propagate out of dispatch. + expect((await post(1)).status).toBe(200) + expect((await post(2)).status).toBe(200) + await flushAsyncWork() + + // The healthy wake completed; exactly one error was recorded and isolated. + expect(handler.debugState()).toMatchObject({ + pendingWakeCount: 0, + pendingWakeLabels: [], + wakeErrorCount: 1, + typeNames: [`test-agent`], + }) + expect(processWakeMock).toHaveBeenCalledTimes(2) + + // Drain surfaces the isolated error (so it's observable), then clears. + await expect(handler.waitForSettled()).rejects.toThrow(`not registered`) + expect(handler.debugState()).toMatchObject({ wakeErrorCount: 0 }) + }) + it(`returns 400 for invalid JSON`, async () => { const handler = createRuntimeHandler({ baseUrl: `http://localhost:3000`, diff --git a/packages/agents-runtime/test/edit-tool-read-guard.test.ts b/packages/agents-runtime/test/edit-tool-read-guard.test.ts index d96ac13feb..e0bd43905a 100644 --- a/packages/agents-runtime/test/edit-tool-read-guard.test.ts +++ b/packages/agents-runtime/test/edit-tool-read-guard.test.ts @@ -4,6 +4,7 @@ import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { createEditTool } from '../src/tools/edit' import { createReadFileTool } from '../src/tools/read-file' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' describe(`edit tool read-first guard`, () => { let cwd: string @@ -18,8 +19,9 @@ describe(`edit tool read-first guard`, () => { it(`rejects edit if the file was not read in this session`, async () => { await writeFile(join(cwd, `f.txt`), `hello world`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSet = new Set() - const edit = createEditTool(cwd, readSet) + const edit = createEditTool(sandbox, readSet) const result = await edit.execute(`call`, { path: `f.txt`, old_string: `world`, @@ -28,13 +30,15 @@ describe(`edit tool read-first guard`, () => { expect((result.content[0] as { text: string }).text).toMatch( /has not been read in this session/ ) + await sandbox.dispose() }) it(`allows edit after a read in the same session`, async () => { await writeFile(join(cwd, `f.txt`), `hello world`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSet = new Set() - const read = createReadFileTool(cwd, readSet) - const edit = createEditTool(cwd, readSet) + const read = createReadFileTool(sandbox, readSet) + const edit = createEditTool(sandbox, readSet) await read.execute(`r`, { path: `f.txt` }) const result = await edit.execute(`e`, { @@ -45,14 +49,16 @@ describe(`edit tool read-first guard`, () => { expect((result.content[0] as { text: string }).text).toMatch( /Edited|Replaced/ ) + await sandbox.dispose() }) it(`rejects edit across a wake boundary (fresh readSet)`, async () => { await writeFile(join(cwd, `g.txt`), `aaa bbb`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const wake1ReadSet = new Set() - const wake1Read = createReadFileTool(cwd, wake1ReadSet) - const wake1Edit = createEditTool(cwd, wake1ReadSet) + const wake1Read = createReadFileTool(sandbox, wake1ReadSet) + const wake1Edit = createEditTool(sandbox, wake1ReadSet) await wake1Read.execute(`r1`, { path: `g.txt` }) const editResult1 = await wake1Edit.execute(`e1`, { path: `g.txt`, @@ -64,7 +70,7 @@ describe(`edit tool read-first guard`, () => { ) const wake2ReadSet = new Set() - const wake2Edit = createEditTool(cwd, wake2ReadSet) + const wake2Edit = createEditTool(sandbox, wake2ReadSet) const editResult2 = await wake2Edit.execute(`e2`, { path: `g.txt`, old_string: `xxx`, @@ -73,13 +79,15 @@ describe(`edit tool read-first guard`, () => { expect((editResult2.content[0] as { text: string }).text).toMatch( /has not been read in this session/ ) + await sandbox.dispose() }) it(`requires unique old_string when replace_all is false`, async () => { await writeFile(join(cwd, `dup.txt`), `foo foo`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSet = new Set() - const read = createReadFileTool(cwd, readSet) - const edit = createEditTool(cwd, readSet) + const read = createReadFileTool(sandbox, readSet) + const edit = createEditTool(sandbox, readSet) await read.execute(`r`, { path: `dup.txt` }) const result = await edit.execute(`e`, { path: `dup.txt`, @@ -89,13 +97,15 @@ describe(`edit tool read-first guard`, () => { expect((result.content[0] as { text: string }).text).toMatch( /found 2 matches/ ) + await sandbox.dispose() }) it(`replaces all occurrences when replace_all is true`, async () => { await writeFile(join(cwd, `multi.txt`), `aa bb aa cc aa`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSet = new Set() - const read = createReadFileTool(cwd, readSet) - const edit = createEditTool(cwd, readSet) + const read = createReadFileTool(sandbox, readSet) + const edit = createEditTool(sandbox, readSet) await read.execute(`r`, { path: `multi.txt` }) const result = await edit.execute(`e`, { path: `multi.txt`, @@ -106,5 +116,6 @@ describe(`edit tool read-first guard`, () => { expect((result.content[0] as { text: string }).text).toMatch( /3 occurrences|3 replacements/ ) + await sandbox.dispose() }) }) diff --git a/packages/agents-runtime/test/fetch-url-ssrf.test.ts b/packages/agents-runtime/test/fetch-url-ssrf.test.ts index c867e49578..334faa36be 100644 --- a/packages/agents-runtime/test/fetch-url-ssrf.test.ts +++ b/packages/agents-runtime/test/fetch-url-ssrf.test.ts @@ -1,15 +1,27 @@ +import { mkdtemp, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { createFetchUrlTool } from '../src/tools/fetch-url' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' +import { SandboxError } from '../src/sandbox/types' +import type { Sandbox } from '../src/sandbox/types' -// Characterization: createFetchUrlTool today has no host policy — no -// allowlist, no private-IP denylist, no cloud-metadata IP filter. The tests -// below capture that surface so a follow-up SSRF-hardening PR has an explicit +// Characterization: createFetchUrlTool routed through unrestrictedSandbox +// has no host policy — no allowlist, no private-IP denylist, no +// cloud-metadata IP filter. The tests below capture that surface so a +// follow-up SSRF-hardening PR (NetPolicy on sandbox.fetch) has an explicit // regression target. -describe(`fetch_url — current SSRF surface`, () => { +// +// Under remoteSandbox or dockerSandbox the hostname allowlist already +// rejects these — see sandbox-remote.test.ts and sandbox-docker.test.ts. +describe(`fetch_url — current SSRF surface (unrestricted sandbox)`, () => { const originalFetch = globalThis.fetch let fetchMock: ReturnType + let cwd: string - beforeEach(() => { + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `fetch-ssrf-`)) fetchMock = vi.fn( async () => new Response(`ok`, { @@ -20,8 +32,9 @@ describe(`fetch_url — current SSRF surface`, () => { globalThis.fetch = fetchMock as unknown as typeof globalThis.fetch }) - afterEach(() => { + afterEach(async () => { globalThis.fetch = originalFetch + await rm(cwd, { recursive: true, force: true }) }) it.each([ @@ -30,24 +43,60 @@ describe(`fetch_url — current SSRF surface`, () => { `http://10.0.0.1/`, // RFC1918 `http://192.168.1.1/`, // RFC1918 ])(`fetches %s without rejecting it`, async (url) => { - const tool = createFetchUrlTool({ extractWithLLM: async (t) => t }) - const result = await tool.execute(`call`, { - url, - prompt: `extract content`, - }) - expect(fetchMock).toHaveBeenCalledTimes(1) - expect(fetchMock.mock.calls[0]?.[0]).toBe(url) - // The tool returns the extracted content, not an SSRF guard error. - expect((result.content[0] as { text: string }).text).toBe(`ok`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + const tool = createFetchUrlTool(sandbox, { + extractWithLLM: async (t: string) => t, + }) + const result = await tool.execute(`call`, { + url, + prompt: `extract content`, + }) + expect(fetchMock).toHaveBeenCalledTimes(1) + expect(fetchMock.mock.calls[0]?.[0]).toBe(url) + // The tool returns the extracted content, not an SSRF guard error. + expect((result.content[0] as { text: string }).text).toBe(`ok`) + } finally { + await sandbox.dispose() + } }) it(`follows redirects (redirect: 'follow') — DNS-rebinding / redirect-to-private not blocked`, async () => { - const tool = createFetchUrlTool({ extractWithLLM: async (t) => t }) - await tool.execute(`call`, { - url: `http://example.com/`, + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + const tool = createFetchUrlTool(sandbox, { + extractWithLLM: async (t: string) => t, + }) + await tool.execute(`call`, { + url: `http://example.com/`, + prompt: `extract`, + }) + const init = fetchMock.mock.calls[0]?.[1] as RequestInit | undefined + expect(init?.redirect).toBe(`follow`) + } finally { + await sandbox.dispose() + } + }) + + it(`surfaces a network-policy denial distinctly from a generic fetch error`, async () => { + // Providers with an egress policy (docker/remote) reject blocked hosts with + // SandboxError('policy'). The tool must turn that into an actionable + // "blocked by policy" message rather than collapsing it into the generic + // "Error fetching URL" used for transient failures. + const policySandbox = { + fetch: async () => { + throw new SandboxError(`policy`, `host denied`) + }, + } as unknown as Sandbox + const tool = createFetchUrlTool(policySandbox, { + extractWithLLM: async (t: string) => t, + }) + const result = await tool.execute(`call`, { + url: `http://169.254.169.254/`, prompt: `extract`, }) - const init = fetchMock.mock.calls[0]?.[1] as RequestInit | undefined - expect(init?.redirect).toBe(`follow`) + const text = (result.content[0] as { text: string }).text + expect(text).toContain(`network policy`) + expect(text).not.toContain(`Error fetching URL`) }) }) diff --git a/packages/agents-runtime/test/helpers/context-test-helpers.ts b/packages/agents-runtime/test/helpers/context-test-helpers.ts index 11b50ea9fe..9d0def322f 100644 --- a/packages/agents-runtime/test/helpers/context-test-helpers.ts +++ b/packages/agents-runtime/test/helpers/context-test-helpers.ts @@ -1,4 +1,5 @@ import { vi } from 'vitest' +import { tmpdir } from 'node:os' import { createHandlerContext } from '../../src/context-factory' import { assembleContext } from '../../src/context-assembly' import { ENTITY_COLLECTIONS, builtInCollections } from '../../src/entity-schema' @@ -15,6 +16,27 @@ import type { WakeSession, } from '../../src/types' import type { HydratedEventSourceWake } from '../../src/event-sources' +import type { Sandbox } from '../../src/sandbox/types' + +// Minimal sandbox stub for tests that exercise HandlerContext shape but +// don't actually call sandbox methods. Production wakes get a real +// sandbox from the runner's sandbox profile registry, selected by name. +export const testSandboxStub: Sandbox = { + name: `test-stub`, + workingDirectory: tmpdir(), + exec: async () => { + throw new Error(`test sandbox stub: exec not implemented`) + }, + readFile: async () => Buffer.alloc(0), + writeFile: async () => {}, + mkdir: async () => {}, + readdir: async () => [], + exists: async () => false, + remove: async () => {}, + stat: async () => ({ type: `file`, size: 0, mtimeMs: 0 }), + fetch: async () => new Response(``), + dispose: async () => {}, +} type DebugContext = { __debug: { @@ -295,6 +317,7 @@ export function createTestHandlerContext( actions: {}, electricTools: [], events: [], + sandbox: testSandboxStub, writeEvent, wakeSession: createFakeWakeSession(db), wakeEvent: opts.wakeEvent ?? { diff --git a/packages/agents-runtime/test/helpers/docker-probe.ts b/packages/agents-runtime/test/helpers/docker-probe.ts new file mode 100644 index 0000000000..785b3ca864 --- /dev/null +++ b/packages/agents-runtime/test/helpers/docker-probe.ts @@ -0,0 +1,17 @@ +import { isDockerAvailable } from '../../src/sandbox/docker/loader' + +/** + * Module-level Docker availability flag for vitest gating. Resolved + * eagerly via top-level await so `describe.skipIf(!dockerAvailable)` + * works at import time. Tests run as no-op skips when Docker is absent. + */ +export const dockerAvailable: boolean = await isDockerAvailable() + +/** + * A small public image with `sh`, `find`, `stat`, `rm`, `kill`, and + * `node` (so we can also smoke-test program execution). Pinned by digest + * to keep tests reproducible. + */ +export const TEST_IMAGE = `node:20-alpine@sha256:fb4cd12c85ee03686f6af5362a0b0d56d50c58a04632e6c0fb8363f609372293` + +export const TEST_LABEL = `electric-test-sandbox` diff --git a/packages/agents-runtime/test/readset-isolation.test.ts b/packages/agents-runtime/test/readset-isolation.test.ts index fc31f12246..fadeac8bb1 100644 --- a/packages/agents-runtime/test/readset-isolation.test.ts +++ b/packages/agents-runtime/test/readset-isolation.test.ts @@ -4,6 +4,7 @@ import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { createEditTool } from '../src/tools/edit' import { createReadFileTool } from '../src/tools/read-file' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' describe(`readSet isolation across handler invocations`, () => { let cwd: string @@ -18,13 +19,14 @@ describe(`readSet isolation across handler invocations`, () => { it(`entity A's read does not satisfy entity B's edit guard`, async () => { await writeFile(join(cwd, `shared.txt`), `aaa bbb`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSetA = new Set() - const readA = createReadFileTool(cwd, readSetA) + const readA = createReadFileTool(sandbox, readSetA) await readA.execute(`a`, { path: `shared.txt` }) const readSetB = new Set() - const editB = createEditTool(cwd, readSetB) + const editB = createEditTool(sandbox, readSetB) const result = await editB.execute(`b`, { path: `shared.txt`, old_string: `aaa`, @@ -34,5 +36,6 @@ describe(`readSet isolation across handler invocations`, () => { expect((result.content[0] as { text: string }).text).toMatch( /has not been read in this session/ ) + await sandbox.dispose() }) }) diff --git a/packages/agents-runtime/test/record-run.test.ts b/packages/agents-runtime/test/record-run.test.ts index 4c492787da..40424522ad 100644 --- a/packages/agents-runtime/test/record-run.test.ts +++ b/packages/agents-runtime/test/record-run.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from 'vitest' import { createHandlerContext } from '../src/context-factory' import { ENTITY_COLLECTIONS } from '../src/entity-schema' import { createLocalOnlyTestCollection } from './helpers/local-only' +import { testSandboxStub } from './helpers/context-test-helpers' import type { EntityStreamDBWithActions } from '../src/types' import type { ChangeEvent } from '@durable-streams/state' @@ -52,6 +53,7 @@ function buildHarness(opts?: { existingRunKeys?: Array }): { state: {}, actions: {}, electricTools: [], + sandbox: testSandboxStub, events: [] as Array, writeEvent, wakeSession: { diff --git a/packages/agents-runtime/test/runtime-server-client-update-metadata.test.ts b/packages/agents-runtime/test/runtime-server-client-update-metadata.test.ts index a681969900..3277c01c9a 100644 --- a/packages/agents-runtime/test/runtime-server-client-update-metadata.test.ts +++ b/packages/agents-runtime/test/runtime-server-client-update-metadata.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from 'vitest' import { createRuntimeServerClient } from '../src/runtime-server-client' import { createHandlerContext } from '../src/context-factory' +import { testSandboxStub } from './helpers/context-test-helpers' describe(`runtime-server-client.setTag`, () => { it(`ensureStream creates an exact stream path with the requested content type`, async () => { @@ -242,6 +243,7 @@ describe(`createHandlerContext: tags + tag mutations`, () => { state: {}, actions: {}, electricTools: [], + sandbox: testSandboxStub, events: [], writeEvent: () => {}, wakeSession: {} as any, diff --git a/packages/agents-runtime/test/sandbox-conformance.test.ts b/packages/agents-runtime/test/sandbox-conformance.test.ts new file mode 100644 index 0000000000..407134b1d8 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-conformance.test.ts @@ -0,0 +1,538 @@ +import { mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { remoteSandbox } from '../src/sandbox/remote' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' +import { dockerSandbox } from '../src/sandbox/docker' +import { SandboxError } from '../src/sandbox/types' +import { KNOWN_ADAPTERS } from '../src/sandbox' +import type { Sandbox } from '../src/sandbox/types' +import type { RemoteSandboxClient } from '../src/sandbox/remote/types' +import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' + +/** + * Cross-provider conformance: a single set of scenarios exercised against + * unrestricted, remote (driven by an in-memory fake of an SDK matching + * our RemoteSandboxClient contract), and docker (gated by daemon + * availability). For scenarios where a provider has fundamentally + * different semantics, the case is marked accordingly and the test + * asserts the documented outcome for that provider. + * + * The contract this enforces: + * - exec is a real subprocess on unrestricted; a delegated call on + * remote; a container exec on docker. + * - writeFile + readFile roundtrip works. + * - writeFile outside the working directory is rejected with a + * SandboxError of kind 'policy'. + * - dispose is safe to call. + */ + +interface ProviderCapabilities { + /** AbortSignal on exec terminates the subprocess; false ⇒ best-effort/no-op. */ + supportsAbort: boolean +} + +interface ProviderFactory { + name: string + /** The KNOWN_ADAPTERS slug this provider exercises. */ + adapter: (typeof KNOWN_ADAPTERS)[number] + enabled: boolean + capabilities: ProviderCapabilities + /** + * "Outside the working directory" probe path. For host-filesystem + * providers (unrestricted) we use a host tempdir; for containerized + * providers we use /etc/passwd which is outside the sandbox cwd but + * always present in the container. + */ + outsideKind: `host-tempdir` | `etc-passwd` + /** + * Whether READS are contained to the workspace. Writes are contained on + * every provider; reads are contained on unrestricted/docker but `remote` + * allows reads anywhere in the VM (see Sandbox FS-method contract). + */ + containsReads: boolean +} + +function makeFakeRemoteClient(): RemoteSandboxClient { + const files = new Map() + const dirs = new Set() + return { + async exec(opts) { + // Minimal fake exec that handles a few shell patterns we use in the + // conformance scenarios. Real provider execs run inside a VM; this + // fake satisfies the interface contract without simulating shell. + const cmd = opts.command + if (cmd.startsWith(`echo `)) { + const out = cmd.slice(5).replace(/^['"]|['"]$/g, ``) + return { + stdout: Buffer.from(out + `\n`), + stderr: Buffer.from(``), + exitCode: 0, + } + } + return { + stdout: Buffer.from(``), + stderr: Buffer.from(``), + exitCode: 0, + } + }, + async readFile(path) { + const buf = files.get(path) + if (!buf) { + const e: NodeJS.ErrnoException = new Error(`ENOENT: ${path}`) + e.code = `ENOENT` + throw e + } + return buf + }, + async writeFile(path, content) { + files.set(path, Buffer.isBuffer(content) ? content : Buffer.from(content)) + }, + async mkdir(path) { + dirs.add(path) + }, + async readdir(path) { + const prefix = path.endsWith(`/`) ? path : path + `/` + const seen = new Set() + const out: Array<{ name: string; type: `file` | `directory` }> = [] + for (const f of files.keys()) { + if (!f.startsWith(prefix)) continue + const rest = f.slice(prefix.length) + const [first] = rest.split(`/`) + if (!first || seen.has(first)) continue + seen.add(first) + out.push({ + name: first, + type: rest.includes(`/`) ? `directory` : `file`, + }) + } + for (const d of dirs) { + if (!d.startsWith(prefix)) continue + const rest = d.slice(prefix.length) + const [first] = rest.split(`/`) + if (!first || seen.has(first)) continue + seen.add(first) + out.push({ name: first, type: `directory` }) + } + return out + }, + async exists(path) { + if (files.has(path)) return true + for (const d of dirs) if (d === path) return true + return false + }, + async remove(path, opts) { + if (files.delete(path)) return + if (opts?.recursive) { + const prefix = path.endsWith(`/`) ? path : path + `/` + for (const f of [...files.keys()]) + if (f.startsWith(prefix) || f === path) files.delete(f) + for (const d of [...dirs]) + if (d.startsWith(prefix) || d === path) dirs.delete(d) + return + } + const e: NodeJS.ErrnoException = new Error(`ENOENT: ${path}`) + e.code = `ENOENT` + throw e + }, + async stat(path) { + const buf = files.get(path) + if (buf) return { type: `file`, size: buf.length, mtimeMs: 0 } + if (dirs.has(path)) return { type: `directory`, size: 0, mtimeMs: 0 } + const e: NodeJS.ErrnoException = new Error(`ENOENT: ${path}`) + e.code = `ENOENT` + throw e + }, + async kill() {}, + } +} + +const providers: Array< + ProviderFactory & { + create(workingDirectory: string): Promise + } +> = [ + { + name: `unrestricted`, + adapter: `unrestricted`, + enabled: true, + capabilities: { + supportsAbort: true, + }, + outsideKind: `host-tempdir`, + containsReads: true, + create: (cwd) => unrestrictedSandbox({ workingDirectory: cwd }), + }, + { + name: `remote (fake)`, + adapter: `remote`, + enabled: true, + capabilities: { + // The in-memory fake doesn't forward signals. + supportsAbort: false, + }, + outsideKind: `etc-passwd`, + // remote deliberately allows reads outside the workspace (the VM is + // already host-isolated); only writes are contained. + containsReads: false, + create: (cwd) => + remoteSandbox({ + provider: `e2b`, + workingDirectory: cwd, + client: makeFakeRemoteClient(), + initialNetworkPolicy: { mode: `allowlist`, allow: [`example.com`] }, + }), + }, + { + name: `docker`, + adapter: `docker`, + enabled: dockerAvailable, + capabilities: { + supportsAbort: true, + }, + outsideKind: `etc-passwd`, + containsReads: true, + create: () => + dockerSandbox({ + image: TEST_IMAGE, + // Container workdir is the implicit /work; we ignore the host + // tempdir argument — for containerized adapters the cwd is an + // in-container path. + workingDirectory: `/work`, + initialNetworkPolicy: { mode: `allowlist`, allow: [`example.com`] }, + exposedPorts: [9999], + labels: { [TEST_LABEL]: `1` }, + }), + }, +] + +describe(`sandbox conformance`, () => { + it(`every KNOWN_ADAPTERS slug is exercised by exactly one provider`, () => { + const slugs = providers.map((p) => p.adapter).sort() + const expected = [...KNOWN_ADAPTERS].sort() + expect(slugs).toEqual(expected) + }) + + for (const provider of providers) { + const d = provider.enabled ? describe : describe.skip + d(provider.name, () => { + let cwd: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `conformance-${provider.name}-`)) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + }) + + it(`writeFile + readFile roundtrip inside the working directory`, async () => { + const sandbox = await provider.create(cwd) + try { + const path = join(sandbox.workingDirectory, `roundtrip.txt`) + await sandbox.writeFile(path, `payload`) + const buf = await sandbox.readFile(path) + expect(buf.toString(`utf-8`)).toBe(`payload`) + } finally { + await sandbox.dispose() + } + }) + + it(`writeFile outside the working directory is rejected with a policy SandboxError`, async () => { + // Workspace containment is a sandbox concern, enforced uniformly + // across every provider against the filesystem it owns. Unrestricted + // shares the host FS and contains via realpath; docker/remote contain + // against their container/VM root. + const sandbox = await provider.create(cwd) + const outside = + provider.outsideKind === `etc-passwd` + ? `/etc/passwd` + : `/tmp/conformance-outside-${Date.now()}.txt` + try { + await expect( + sandbox.writeFile(outside, `nope`) + ).rejects.toBeInstanceOf(SandboxError) + await expect( + sandbox.writeFile(outside, `nope`) + ).rejects.toMatchObject({ kind: `policy` }) + } finally { + await sandbox.dispose() + } + }) + + it(`readFile outside the working directory follows the provider's read-containment policy`, async () => { + const sandbox = await provider.create(cwd) + const outside = + provider.outsideKind === `etc-passwd` + ? `/etc/passwd` + : join(tmpdir(), `conformance-read-outside-${Date.now()}.txt`) + if (provider.outsideKind === `host-tempdir`) { + await writeFile(outside, `host-secret`) + } + try { + if (provider.containsReads) { + await expect(sandbox.readFile(outside)).rejects.toMatchObject({ + kind: `policy`, + }) + } else { + // remote allows reads anywhere in the VM — the read is not + // policy-gated. (Against the in-memory fake the path is absent, + // so it surfaces as an ENOENT, not a policy rejection; the + // invariant under test is the absence of a policy gate.) + await sandbox.readFile(outside).then( + () => {}, + (err: unknown) => { + const isPolicy = + err instanceof SandboxError && err.kind === `policy` + expect(isPolicy).toBe(false) + } + ) + } + } finally { + if (provider.outsideKind === `host-tempdir`) { + await rm(outside, { force: true }) + } + await sandbox.dispose() + } + }) + + it(`exec returns a result with exitCode`, async () => { + const sandbox = await provider.create(cwd) + try { + const r = await sandbox.exec({ command: `echo hello` }) + expect(r.exitCode).toBe(0) + expect(r.stdout.toString().trim()).toBe(`hello`) + } finally { + await sandbox.dispose() + } + }) + + it(`dispose is safe (does not throw)`, async () => { + const sandbox = await provider.create(cwd) + await expect(sandbox.dispose()).resolves.toBeUndefined() + }) + + it(`operations after dispose are rejected with a runtime SandboxError`, async () => { + const sandbox = await provider.create(cwd) + await sandbox.dispose() + await expect( + sandbox.exec({ command: `echo hi` }) + ).rejects.toMatchObject({ kind: `runtime` }) + await expect( + sandbox.readFile(join(sandbox.workingDirectory, `x.txt`)) + ).rejects.toMatchObject({ kind: `runtime` }) + await expect( + sandbox.writeFile(join(sandbox.workingDirectory, `x.txt`), `nope`) + ).rejects.toMatchObject({ kind: `runtime` }) + }) + + it(`exposes name and workingDirectory`, async () => { + const sandbox = await provider.create(cwd) + try { + expect(sandbox.name.length).toBeGreaterThan(0) + expect(sandbox.workingDirectory.length).toBeGreaterThan(0) + } finally { + await sandbox.dispose() + } + }) + + it(`readFile rejects ENOENT for missing files`, async () => { + const sandbox = await provider.create(cwd) + try { + const missing = join(sandbox.workingDirectory, `does-not-exist.txt`) + await expect(sandbox.readFile(missing)).rejects.toThrow() + } finally { + await sandbox.dispose() + } + }) + + it(`exists returns false for missing, true after writeFile`, async () => { + const sandbox = await provider.create(cwd) + try { + const path = join(sandbox.workingDirectory, `exists.txt`) + expect(await sandbox.exists(path)).toBe(false) + await sandbox.writeFile(path, `hi`) + expect(await sandbox.exists(path)).toBe(true) + } finally { + await sandbox.dispose() + } + }) + + it(`stat returns file metadata after writeFile`, async () => { + const sandbox = await provider.create(cwd) + try { + const path = join(sandbox.workingDirectory, `meta.txt`) + await sandbox.writeFile(path, `12345`) + const s = await sandbox.stat(path) + expect(s.type).toBe(`file`) + expect(s.size).toBe(5) + } finally { + await sandbox.dispose() + } + }) + + it(`readdir lists entries written into the working directory`, async () => { + const sandbox = await provider.create(cwd) + try { + const root = sandbox.workingDirectory + await sandbox.writeFile(join(root, `a.txt`), `a`) + await sandbox.writeFile(join(root, `b.txt`), `b`) + await sandbox.mkdir(join(root, `sub`)) + const entries = await sandbox.readdir(root) + const names = entries.map((e) => e.name).sort() + expect(names).toContain(`a.txt`) + expect(names).toContain(`b.txt`) + expect(names).toContain(`sub`) + const sub = entries.find((e) => e.name === `sub`) + expect(sub?.type).toBe(`directory`) + } finally { + await sandbox.dispose() + } + }) + + it(`remove deletes a file and updates exists`, async () => { + const sandbox = await provider.create(cwd) + try { + const path = join(sandbox.workingDirectory, `to-remove.txt`) + await sandbox.writeFile(path, `bye`) + expect(await sandbox.exists(path)).toBe(true) + await sandbox.remove(path) + expect(await sandbox.exists(path)).toBe(false) + } finally { + await sandbox.dispose() + } + }) + + it(`remove({recursive:true}) deletes a directory tree`, async () => { + const sandbox = await provider.create(cwd) + try { + const sub = join(sandbox.workingDirectory, `tree`) + await sandbox.mkdir(sub) + await sandbox.writeFile(join(sub, `leaf.txt`), `x`) + await sandbox.remove(sub, { recursive: true }) + expect(await sandbox.exists(sub)).toBe(false) + } finally { + await sandbox.dispose() + } + }) + + it(`stat rejects for missing paths`, async () => { + const sandbox = await provider.create(cwd) + try { + const missing = join(sandbox.workingDirectory, `nope.txt`) + await expect(sandbox.stat(missing)).rejects.toThrow() + } finally { + await sandbox.dispose() + } + }) + + it(`remove rejects nonexistent path (non-recursive)`, async () => { + const sandbox = await provider.create(cwd) + try { + const missing = join(sandbox.workingDirectory, `nope.txt`) + await expect(sandbox.remove(missing)).rejects.toThrow() + } finally { + await sandbox.dispose() + } + }) + + it(`remove rejects a directory without recursive flag`, async () => { + const sandbox = await provider.create(cwd) + try { + const sub = join(sandbox.workingDirectory, `nonempty`) + await sandbox.mkdir(sub) + await sandbox.writeFile(join(sub, `leaf.txt`), `x`) + await expect(sandbox.remove(sub)).rejects.toThrow() + } finally { + await sandbox.dispose() + } + }) + + it.skipIf(!provider.capabilities.supportsAbort)( + `exec honors AbortSignal mid-flight`, + async () => { + const sandbox = await provider.create(cwd) + try { + const ac = new AbortController() + const p = sandbox.exec({ + command: `sleep 30`, + timeoutMs: 5000, + signal: ac.signal, + }) + setTimeout(() => ac.abort(), 50) + const r = await p + expect(r.aborted).toBe(true) + expect(r.timedOut).toBe(false) + expect(r.exitCode === null || r.exitCode !== 0).toBe(true) + } finally { + await sandbox.dispose() + } + } + ) + + it.skipIf(!provider.capabilities.supportsAbort)( + `exec returns immediately when signal is already aborted`, + async () => { + const sandbox = await provider.create(cwd) + try { + const ac = new AbortController() + ac.abort() + const r = await sandbox.exec({ + command: `sleep 30`, + timeoutMs: 5000, + signal: ac.signal, + }) + expect(r.aborted).toBe(true) + expect(r.timedOut).toBe(false) + } finally { + await sandbox.dispose() + } + } + ) + }) + } + + // Symlink escape — pertinent for unrestricted (real host filesystem), + // where the sandbox follows the link, sees the realpath leave the + // workspace, and rejects. Skip for remote (VM-rooted, fake doesn't model + // symlinks) and docker (container fs, host workdir isn't mounted in). + for (const provider of providers.filter( + (p) => p.outsideKind === `host-tempdir` + )) { + const d = provider.enabled ? describe : describe.skip + d(`${provider.name} — symlink escape`, () => { + let cwd: string + let outside: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `conformance-sym-${provider.name}-`)) + outside = await mkdtemp( + join(tmpdir(), `conformance-sym-out-${provider.name}-`) + ) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + await rm(outside, { recursive: true, force: true }) + }) + + it(`readFile rejects a symlink pointing outside the workspace`, async () => { + const { symlink } = await import(`node:fs/promises`) + await writeFile(join(outside, `secret`), `s3cret`, `utf-8`) + await symlink(join(outside, `secret`), join(cwd, `link`)) + + const sandbox = await provider.create(cwd) + try { + // Symlink defense is a sandbox concern: unrestricted resolves the + // realpath, sees it escape the workspace, and rejects with policy. + await expect( + sandbox.readFile(join(cwd, `link`)) + ).rejects.toMatchObject({ kind: `policy` }) + } finally { + await sandbox.dispose() + } + }) + }) + } +}) diff --git a/packages/agents-runtime/test/sandbox-default.test.ts b/packages/agents-runtime/test/sandbox-default.test.ts new file mode 100644 index 0000000000..22766896ae --- /dev/null +++ b/packages/agents-runtime/test/sandbox-default.test.ts @@ -0,0 +1,33 @@ +import { mkdtemp, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { chooseDefaultSandbox } from '../src/sandbox/default' + +/** + * chooseDefaultSandbox(workingDirectory): the runtime helper that picks + * the default Sandbox provider for built-in entities (Horton, Worker). + * Always returns `unrestrictedSandbox`; stronger isolation is opt-in via + * `dockerSandbox` / `remoteSandbox`. + */ +describe(`chooseDefaultSandbox`, () => { + let cwd: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `sandbox-default-`)) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + }) + + it(`returns unrestrictedSandbox`, async () => { + const sandbox = await chooseDefaultSandbox(cwd) + try { + expect(sandbox.name).toBe(`unrestricted`) + expect(sandbox.workingDirectory).toBe(cwd) + } finally { + await sandbox.dispose() + } + }) +}) diff --git a/packages/agents-runtime/test/sandbox-docker-net-policy.test.ts b/packages/agents-runtime/test/sandbox-docker-net-policy.test.ts new file mode 100644 index 0000000000..4b100732ba --- /dev/null +++ b/packages/agents-runtime/test/sandbox-docker-net-policy.test.ts @@ -0,0 +1,129 @@ +import { describe, expect, it } from 'vitest' +import { + hostAllowedByPolicy, + isPrivateOrLinkLocal, + matchesHost, +} from '../src/sandbox/docker/net-policy' + +// Host-side egress enforcement for the docker sandbox's fetch(). Pure logic, +// no container — this is where the allowlist + SSRF guard live now that the +// in-container proxy is gone. + +describe(`hostAllowedByPolicy`, () => { + it(`allow-all permits any host`, () => { + expect(hostAllowedByPolicy({ mode: `allow-all` }, `example.com`)).toBe(true) + }) + + it(`deny-all permits nothing`, () => { + expect(hostAllowedByPolicy({ mode: `deny-all` }, `example.com`)).toBe(false) + }) + + it(`allowlist permits only listed hosts (incl. wildcard)`, () => { + const policy = { + mode: `allowlist`, + allow: [`example.com`, `*.test.dev`], + } as const + expect(hostAllowedByPolicy(policy, `example.com`)).toBe(true) + expect(hostAllowedByPolicy(policy, `api.test.dev`)).toBe(true) + expect(hostAllowedByPolicy(policy, `test.dev`)).toBe(true) + expect(hostAllowedByPolicy(policy, `evil.com`)).toBe(false) + expect(hostAllowedByPolicy(policy, `notexample.com`)).toBe(false) + }) +}) + +describe(`matchesHost`, () => { + it(`matches exact, localhost loopback, and *.suffix`, () => { + expect(matchesHost(`example.com`, `example.com`)).toBe(true) + expect(matchesHost(`127.0.0.1`, `localhost`)).toBe(true) + expect(matchesHost(`::1`, `localhost`)).toBe(true) + expect(matchesHost(`a.b.example.com`, `*.example.com`)).toBe(true) + expect(matchesHost(`example.com`, `*.example.com`)).toBe(true) + expect(matchesHost(`evil.com`, `*.example.com`)).toBe(false) + }) +}) + +describe(`isPrivateOrLinkLocal (SSRF guard)`, () => { + it.each([ + `169.254.169.254`, // AWS/GCP metadata + link-local + `127.0.0.1`, // loopback + `10.0.0.1`, // RFC1918 + `172.16.5.4`, // RFC1918 + `192.168.1.1`, // RFC1918 + `100.64.0.1`, // CGNAT + `0.0.0.0`, // unspecified + `::1`, // IPv6 loopback + `fe80::1`, // IPv6 link-local + `fc00::1`, // IPv6 ULA (fc00::/7) + `fd00::1`, // IPv6 ULA + `fdab:1234::1`, // IPv6 ULA, longer first hextet + ])(`flags %s as private/link-local`, (host) => { + expect(isPrivateOrLinkLocal(host)).toBe(true) + }) + + it.each([`93.184.216.34`, `example.com`, `8.8.8.8`, `172.32.0.1`])( + `treats %s as public`, + (host) => { + expect(isPrivateOrLinkLocal(host)).toBe(false) + } + ) + + // The IPv6 ULA check (fc00::/7) must require the colon: a DNS hostname that + // merely starts with `fc`/`fd` is public and must not be denied. Without the + // colon guard these popular domains were wrongly blocked by the docker fetch + // tool regardless of the allow-all / allowlist policy. + it.each([ + `fc2.com`, // top-traffic public site + `fda.gov`, // US FDA + `fdrive.com`, + `fcdomain.com`, + `fc-bayern.com`, + ])(`treats fc/fd public hostname %s as public`, (host) => { + expect(isPrivateOrLinkLocal(host)).toBe(false) + }) + + it.each([ + `2130706433`, // 127.0.0.1 as a decimal integer + `0x7f000001`, // 127.0.0.1 in hex + `[::1]`, // bracketed IPv6 loopback (URL.hostname keeps the brackets) + `[fe80::1]`, // bracketed IPv6 link-local + `::ffff:169.254.169.254`, // IPv4-mapped IPv6 metadata IP + `[::ffff:a9fe:a9fe]`, // IPv4-mapped IPv6 metadata IP, hex + brackets + `2852039166`, // 169.254.169.254 as a decimal integer + ])(`flags encoded literal %s as private/link-local`, (host) => { + expect(isPrivateOrLinkLocal(host)).toBe(true) + }) + + // inet_aton shorthand: libc's resolver (used by getaddrinfo on Linux/macOS) + // accepts 1–4 dot-separated parts, each decimal/octal/hex, packing the final + // part into the low-order bytes. A dotted-quad-only guard misses these, yet + // they resolve to private space via the OS resolver — so they're a real SSRF + // bypass for fetch_url under the default (allow-all) docker profile. + it.each([ + `127.1`, // 2-part: 127.0.0.1 + `127.0.1`, // 3-part: 127.0.0.1 + `0177.0.0.1`, // octal first octet: 127.0.0.1 + `0x7f.0.0.1`, // hex first octet: 127.0.0.1 + `0x7f.1`, // hex + 2-part: 127.0.0.1 + `017700000001`, // octal whole-integer: 127.0.0.1 + `0xa9fea9fe`, // 169.254.169.254 in hex (metadata) + `169.254.43518`, // 3-part metadata: 169.254.169.254 + `10.1`, // 2-part RFC1918: 10.0.0.1 + `0xa.0.0.1`, // hex first octet RFC1918: 10.0.0.1 + ])(`flags inet_aton shorthand %s as private/link-local`, (host) => { + expect(isPrivateOrLinkLocal(host)).toBe(true) + }) + + // The loose parser must not over-claim: numeric-looking public addresses and + // anything that isn't a valid IPv4 literal (real hostnames, >4 parts, + // out-of-range octets) stay public so legitimate fetches aren't blocked. + it.each([ + `8.8`, // 2-part: 8.0.0.8 (public) + `93.184.216.34`, // dotted-quad public + `172.32.0.1`, // just outside RFC1918 + `1.2.3.4.5`, // 5 parts — not an IPv4 literal, treat as hostname + `08.0.0.1`, // invalid octal (8 not an octal digit) — not an IP + `0x7g.0.0.1`, // invalid hex — not an IP + ])(`treats %s as public`, (host) => { + expect(isPrivateOrLinkLocal(host)).toBe(false) + }) +}) diff --git a/packages/agents-runtime/test/sandbox-docker-smoke.test.ts b/packages/agents-runtime/test/sandbox-docker-smoke.test.ts new file mode 100644 index 0000000000..945b5cca80 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-docker-smoke.test.ts @@ -0,0 +1,61 @@ +import { describe, expect, it } from 'vitest' +import { dockerSandbox } from '../src/sandbox/docker' +import { dockerAvailable, TEST_IMAGE } from './helpers/docker-probe' + +const d = dockerAvailable ? describe : describe.skip + +d(`ad-hoc docker sandbox smoke`, () => { + it(`exec basic, inspect caps, inspect /etc/passwd vs host, attempt mount`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { 'electric-test-sandbox': `1` }, + }) + try { + const uname = await sandbox.exec({ command: `uname -a` }) + console.log(` [uname -a] ${uname.stdout.toString().trim()}`) + expect(uname.stdout.toString()).toContain(`Linux`) + + const caps = await sandbox.exec({ + command: `cat /proc/self/status | grep -E '^Cap(Eff|Bnd|Prm)'`, + }) + console.log( + ` [caps]\n${caps.stdout + .toString() + .trim() + .split(`\n`) + .map((l) => ` ${l}`) + .join(`\n`)}` + ) + // CapEff should be all zeros given CapDrop=ALL + expect(caps.stdout.toString()).toMatch(/CapEff:\s+0000000000000000/) + + const id = await sandbox.exec({ command: `id` }) + console.log(` [id] ${id.stdout.toString().trim()}`) + + const containerPasswd = await sandbox.exec({ + command: `wc -l < /etc/passwd`, + }) + const lines = parseInt(containerPasswd.stdout.toString().trim(), 10) + console.log(` [container /etc/passwd lines] ${lines}`) + expect(lines).toBeGreaterThan(0) + expect(lines).toBeLessThan(50) + + const lsUsers = await sandbox.exec({ + command: `ls /Users; echo "exit=$?"`, + }) + console.log( + ` [ls /Users] ${lsUsers.stdout.toString().trim().split(`\n`).join(` | `)}` + ) + // Inside the container, /Users does not exist — host fs is not mounted. + expect(lsUsers.stdout.toString()).toMatch(/exit=[1-9]/) + + const mountTry = await sandbox.exec({ + command: `mount -t tmpfs none /mnt 2>&1; echo "exit=$?"`, + }) + console.log(` [mount attempt] ${mountTry.stdout.toString().trim()}`) + expect(mountTry.stdout.toString()).toMatch(/exit=[1-9]/) + } finally { + await sandbox.dispose() + } + }, 60_000) +}) diff --git a/packages/agents-runtime/test/sandbox-docker.test.ts b/packages/agents-runtime/test/sandbox-docker.test.ts new file mode 100644 index 0000000000..df69688a20 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-docker.test.ts @@ -0,0 +1,770 @@ +import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest' +import { mkdtemp, rm, symlink, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { + __resetPersistentRegistryForTests, + dockerSandbox, + sweepOrphanedDockerSandboxes, +} from '../src/sandbox/docker' +import { loadDockerode } from '../src/sandbox/docker/loader' +import { SandboxError } from '../src/sandbox/types' +import { dockerAvailable, TEST_IMAGE, TEST_LABEL } from './helpers/docker-probe' + +/** + * dockerSandbox integration tests. The whole describe block is gated on + * `dockerAvailable` — if the daemon is unreachable the suite skips + * silently (CI prints one warning at module load). + */ + +if (!dockerAvailable) { + console.warn( + `[sandbox-docker] Docker daemon unreachable — skipping docker sandbox tests` + ) +} + +const d = dockerAvailable ? describe : describe.skip + +async function sweepTestContainers(): Promise { + const Docker = await loadDockerode() + const docker = new Docker() + const containers = await docker.listContainers({ + all: true, + filters: { label: [`${TEST_LABEL}=1`] }, + }) + await Promise.all( + containers.map((c) => + docker + .getContainer(c.Id) + .remove({ force: true, v: true }) + .catch(() => {}) + ) + ) +} + +d(`dockerSandbox`, () => { + beforeAll(async () => { + // Best-effort cleanup of leftover containers from previous runs. + await sweepTestContainers() + }, 30_000) + + afterAll(async () => { + await sweepTestContainers() + }, 30_000) + + afterEach(async () => { + // Every container now flows through the registry + debounced teardown; + // clear the in-process bookkeeping (and its timers) between tests. + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) + + it(`exec roundtrip with stdout / exitCode`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + const r = await sandbox.exec({ command: `echo hello-from-sandbox` }) + expect(r.exitCode).toBe(0) + expect(r.stdout.toString().trim()).toBe(`hello-from-sandbox`) + expect(r.aborted).toBe(false) + expect(r.timedOut).toBe(false) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`exec env propagation`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + const r = await sandbox.exec({ + command: `echo $MY_VAR`, + env: { MY_VAR: `propagated` }, + }) + expect(r.stdout.toString().trim()).toBe(`propagated`) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`writeFile + readFile roundtrip via tar archives`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + await sandbox.writeFile(`/work/hello.txt`, `hi from host`) + const buf = await sandbox.readFile(`/work/hello.txt`) + expect(buf.toString(`utf-8`)).toBe(`hi from host`) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`exists/stat/readdir/remove via in-container shell`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + expect(await sandbox.exists(`/work/never-existed.txt`)).toBe(false) + await sandbox.writeFile(`/work/probe.txt`, `12345`) + expect(await sandbox.exists(`/work/probe.txt`)).toBe(true) + const s = await sandbox.stat(`/work/probe.txt`) + expect(s.type).toBe(`file`) + expect(s.size).toBe(5) + await sandbox.mkdir(`/work/sub`) + const entries = await sandbox.readdir(`/work`) + const names = entries.map((e) => e.name).sort() + expect(names).toContain(`probe.txt`) + expect(names).toContain(`sub`) + const sub = entries.find((e) => e.name === `sub`) + expect(sub?.type).toBe(`directory`) + await sandbox.remove(`/work/probe.txt`) + expect(await sandbox.exists(`/work/probe.txt`)).toBe(false) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`writeFile rejects paths outside the working directory`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + await expect( + sandbox.writeFile(`/etc/passwd`, `nope`) + ).rejects.toBeInstanceOf(SandboxError) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`read-side methods enforce the working directory boundary`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + // readFile, readdir, stat all throw for paths outside /work; exists + // returns false (safe-probe semantics, matching native + unrestricted). + await expect(sandbox.readFile(`/etc/passwd`)).rejects.toMatchObject({ + kind: `policy`, + }) + await expect(sandbox.readdir(`/etc`)).rejects.toMatchObject({ + kind: `policy`, + }) + await expect(sandbox.stat(`/etc/passwd`)).rejects.toMatchObject({ + kind: `policy`, + }) + expect(await sandbox.exists(`/etc/passwd`)).toBe(false) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`hardened defaults: cap-drop, no-new-privileges, no docker socket access`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + // /var/run/docker.sock isn't mounted — a sandboxed agent that gets + // the socket can trivially escape by launching a new container with + // host bind-mounts. + const sockAttempt = await sandbox.exec({ + command: `test -S /var/run/docker.sock && echo SOCK_PRESENT || echo SOCK_ABSENT`, + }) + expect(sockAttempt.stdout.toString().trim()).toBe(`SOCK_ABSENT`) + + // CapAdd is empty, CapDrop=ALL → privileged ops fail. `mount` + // requires CAP_SYS_ADMIN. (Note: Docker Desktop / OrbStack apply + // their own default seccomp/apparmor on top; the relevant signal + // here is exit != 0.) + const mountAttempt = await sandbox.exec({ + command: `mount -t tmpfs none /mnt 2>&1; echo "exit=$?"`, + }) + expect(mountAttempt.stdout.toString()).toMatch(/exit=([1-9]\d*)/) + + // chroot is another CAP_SYS_CHROOT canary. + const chrootAttempt = await sandbox.exec({ + command: `chroot /tmp /bin/echo nope 2>&1; echo "exit=$?"`, + }) + expect(chrootAttempt.stdout.toString()).toMatch(/exit=([1-9]\d*)/) + + // no-new-privileges blocks setuid escalations. `su` typically fails + // with "Authentication failure" or similar non-zero exit under this + // flag. + const suAttempt = await sandbox.exec({ + command: `su root -c true 2>&1; echo "exit=$?"`, + }) + expect(suAttempt.stdout.toString()).toMatch(/exit=([1-9]\d*)/) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`refuses to mount the host Docker socket via extraMounts`, async () => { + await expect( + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + extraMounts: [ + { + hostPath: `/var/run/docker.sock`, + containerPath: `/var/run/docker.sock`, + readOnly: true, + }, + ], + }) + ).rejects.toBeInstanceOf(SandboxError) + }, 20_000) + + it(`refuses a symlink that resolves to the Docker socket`, async () => { + // The literal hostPath doesn't contain "docker.sock", but it symlinks to a + // file that does — the regex-only check would let it through. realpath must + // resolve it first. Uses a self-controlled target named docker.sock so the + // test doesn't depend on the host's real socket path. + const dir = await mkdtemp(join(tmpdir(), `dockersock-`)) + const target = join(dir, `docker.sock`) + const link = join(dir, `innocent`) + await writeFile(target, ``) + await symlink(target, link) + try { + await expect( + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + extraMounts: [ + { + hostPath: link, + containerPath: `/var/run/docker.sock`, + readOnly: true, + }, + ], + }) + ).rejects.toMatchObject({ kind: `policy` }) + } finally { + await rm(dir, { recursive: true, force: true }) + } + }, 20_000) + + it(`an ephemeral container lingers for the idle grace, then is removed`, async () => { + // Dispose no longer vanishes the container synchronously: it lingers for + // the idle grace (so an in-window collaborator can reattach) and is then + // REMOVED by the debounced teardown (ephemeral ⇒ wiped). + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + sandboxKey: `ephemeral-${Date.now()}`, + sharedIdleGraceMs: 1_000, + }) + const Docker = await loadDockerode() + const docker = new Docker() + const list = () => + docker.listContainers({ + all: true, + filters: { label: [`${TEST_LABEL}=1`] }, + }) + expect((await list()).length).toBeGreaterThanOrEqual(1) + await sandbox.dispose() + // Still present immediately after dispose — it's only refcounted down. + expect((await list()).length).toBe(1) + // After the grace the ephemeral teardown removes it. + const start = Date.now() + while ((await list()).length > 0) { + if (Date.now() - start > 5_000) { + throw new Error(`ephemeral container not removed after idle grace`) + } + await new Promise((r) => setTimeout(r, 50)) + } + }, 60_000) + + it(`exec timeout kills the process and reports timedOut`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + const r = await sandbox.exec({ + command: `sleep 30`, + timeoutMs: 800, + }) + expect(r.timedOut).toBe(true) + expect(r.exitCode === null || r.exitCode !== 0).toBe(true) + // The container itself must still be alive — timeout kills the exec + // PID, not the whole container. + const probe = await sandbox.exec({ command: `echo still-alive` }) + expect(probe.stdout.toString().trim()).toBe(`still-alive`) + } finally { + await sandbox.dispose() + } + }, 30_000) + + it(`exec honors AbortSignal`, async () => { + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + const ac = new AbortController() + const p = sandbox.exec({ + command: `sleep 30`, + timeoutMs: 5000, + signal: ac.signal, + }) + setTimeout(() => ac.abort(), 100) + const r = await p + expect(r.aborted).toBe(true) + expect(r.timedOut).toBe(false) + } finally { + await sandbox.dispose() + } + }, 30_000) + + it(`exec timeout kills only its own process tree, not co-tenant processes`, async () => { + // Models a shared container: one exec leaves a background process running + // (a stand-in for a sibling lease's work or a dev server), then a *second* + // exec times out. The scoped kill must not touch the first process. + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + }) + try { + // Background a long sleep that outlives the exec that launched it; it is + // reparented to PID 1 but keeps that first exec's marker in its environ. + const launch = await sandbox.exec({ command: `sleep 300 & echo $!` }) + const bgPid = launch.stdout.toString().trim() + expect(bgPid).toMatch(/^\d+$/) + + const timed = await sandbox.exec({ command: `sleep 30`, timeoutMs: 800 }) + expect(timed.timedOut).toBe(true) + + // The earlier background process — a different exec marker — survives. + const check = await sandbox.exec({ + command: `kill -0 ${bgPid} && echo ALIVE || echo DEAD`, + }) + expect(check.stdout.toString().trim()).toBe(`ALIVE`) + } finally { + await sandbox.dispose() + } + }, 30_000) + + it(`names a container from its key and labels the spawner for observability`, async () => { + // The runtime resolves the key to (here) the entity URL, so the name is + // legible in `docker ps`; the entity identity also rides on labels. + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + sandboxKey: `/worker/job-42/main`, + entityType: `worker`, + entityUrl: `/worker/job-42/main`, + }) + try { + const Docker = await loadDockerode() + const docker = new Docker() + const list = await docker.listContainers({ + all: true, + filters: { label: [`${TEST_LABEL}=1`] }, + }) + const mine = list.find((c) => + (c.Names ?? []).some((n) => n.includes(`${`electric-sbx`}-`)) + ) + expect(mine).toBeDefined() + const name = (mine!.Names ?? [])[0] ?? `` + // The entity type + id are legible in `docker ps`. + expect(name).toContain(`worker`) + expect(name).toContain(`job-42`) + const labels = (mine as unknown as { Labels: Record }) + .Labels + expect(labels[`com.electric.sandbox.entity-type`]).toBe(`worker`) + expect(labels[`com.electric.sandbox.entity`]).toBe(`/worker/job-42/main`) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`fetch runs inside the container and returns the page (allow-all)`, async () => { + // Proves the request executes *in the container* (via the in-sandbox + // HTTP client over exec) and a Response is synthesized from its output. + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + initialNetworkPolicy: { mode: `allow-all` }, + }) + try { + const res = await sandbox.fetch(`https://example.com/`) + expect(res.status).toBe(200) + const body = await res.text() + expect(body).toContain(`Example Domain`) + } finally { + await sandbox.dispose() + } + }, 60_000) + + it(`deny-all container refuses fetch (NetworkMode=none, no proxy)`, async () => { + // deny-all means the container is created with no network interface, so + // there is no proxy and fetch is rejected up front. + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + initialNetworkPolicy: { mode: `deny-all` }, + }) + try { + await expect(sandbox.fetch(`https://example.com/`)).rejects.toMatchObject( + { kind: `policy` } + ) + } finally { + await sandbox.dispose() + } + }, 30_000) +}) + +d(`dockerSandbox keyed lifecycle`, () => { + // Unique keys per run so reattach targets a clean deterministic name. + const KEY = `electric-test-${Date.now()}` + + beforeAll(async () => { + await sweepTestContainers() + }, 30_000) + + afterEach(async () => { + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) + + afterAll(async () => { + __resetPersistentRegistryForTests() + await sweepTestContainers() + }, 30_000) + + const make = ( + sandboxKey: string, + sharedIdleGraceMs?: number, + persistent = true + ) => + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + persistent, + sandboxKey, + sharedIdleGraceMs, + }) + + // Inspect a shared container by its key label (set on every persistent + // container as `com.electric.sandbox.key=`), avoiding any coupling + // to the deterministic-name hash. + const keyState = async ( + sandboxKey: string + ): Promise<`absent` | `running` | `stopped`> => { + const Docker = await loadDockerode() + const docker = new Docker() + const list = await docker.listContainers({ + all: true, + filters: { label: [`com.electric.sandbox.key=${sandboxKey}`] }, + }) + if (list.length === 0) return `absent` + const info = await docker.getContainer(list[0].Id).inspect() + return info.State.Running ? `running` : `stopped` + } + + const waitForKeyState = async ( + sandboxKey: string, + want: `absent` | `running` | `stopped`, + timeoutMs: number + ): Promise => { + const start = Date.now() + for (;;) { + if ((await keyState(sandboxKey)) === want) return + if (Date.now() - start > timeoutMs) { + throw new Error(`timed out waiting for ${sandboxKey} to be ${want}`) + } + await new Promise((r) => setTimeout(r, 50)) + } + } + + it(`reattaches to the same container and preserves the filesystem`, async () => { + const first = await make(`${KEY}-persist`) + await first.writeFile(`/work/shared.txt`, `hello-from-first`) + // dispose drops the last ref but leaves the container running. + await first.dispose() + + const second = await make(`${KEY}-persist`) + try { + const back = await second.readFile(`/work/shared.txt`) + expect(back.toString()).toBe(`hello-from-first`) + } finally { + await second.dispose() + } + }, 90_000) + + it(`names a shared container from the key (collaborator-stable) and labels the spawner`, async () => { + const key = `${KEY}-named` + const sandbox = await dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + persistent: true, + sandboxKey: key, + entityType: `horton`, + entityUrl: `/horton/abc123/main`, + }) + try { + const Docker = await loadDockerode() + const docker = new Docker() + const list = await docker.listContainers({ + all: true, + filters: { label: [`com.electric.sandbox.key=${key}`] }, + }) + expect(list).toHaveLength(1) + const name = (list[0].Names ?? [])[0] ?? `` + // Derived from the shared key (not the entity) so every collaborator + // converges on it; readable slug + a 12-hex disambiguator. + expect(name).toMatch(/electric-sbx-.+-[0-9a-f]{12}$/) + const labels = (list[0] as unknown as { Labels: Record }) + .Labels + // Per-entity identity lives in labels, since the shared name can't. + expect(labels[`com.electric.sandbox.entity-type`]).toBe(`horton`) + expect(labels[`com.electric.sandbox.entity`]).toBe(`/horton/abc123/main`) + } finally { + await sandbox.dispose() + } + }, 90_000) + + it(`shares one live container across concurrent leases (refcount)`, async () => { + // Two concurrent factory calls for the same key resolve to one container. + const [a, b] = await Promise.all([ + make(`${KEY}-concurrent`), + make(`${KEY}-concurrent`), + ]) + try { + await a.exec({ command: `echo from-a > /work/a.txt` }) + // b sees a's write — same filesystem. + const seen = await b.readFile(`/work/a.txt`) + expect(seen.toString().trim()).toBe(`from-a`) + + // Disposing one lease must NOT tear the container down — the sibling + // is still using it. + await a.dispose() + const stillThere = await b.exec({ command: `cat /work/a.txt` }) + expect(stillThere.exitCode).toBe(0) + expect(stillThere.stdout.toString().trim()).toBe(`from-a`) + } finally { + await b.dispose() + } + }, 90_000) + + it(`stops a shared container after the idle grace; reattach restarts it with the fs intact`, async () => { + const grace = 1_000 + const key = `${KEY}-idle` + const sandbox = await make(key, grace) + await sandbox.writeFile(`/work/keep.txt`, `survives-stop`) + await sandbox.dispose() // refs → 0, schedules a debounced stop + + // Within the grace it's still running; after it, stopped (not removed). + expect(await keyState(key)).toBe(`running`) + await waitForKeyState(key, `stopped`, 5_000) + + // Reattach restarts the stopped container; the filesystem survived. + const again = await make(key, grace) + try { + expect(await keyState(key)).toBe(`running`) + expect((await again.readFile(`/work/keep.txt`)).toString()).toBe( + `survives-stop` + ) + } finally { + await again.dispose() + } + }, 90_000) + + it(`REMOVES an ephemeral container after the idle grace (wiped, not stopped)`, async () => { + // Same keyed path as persistent, but persistent:false ⇒ the debounced + // teardown REMOVES the container instead of stopping it. + const grace = 1_000 + const key = `${KEY}-ephemeral` + const sandbox = await make(key, grace, /* persistent */ false) + await sandbox.writeFile(`/work/gone.txt`, `will-be-wiped`) + await sandbox.dispose() // refs → 0, schedules a debounced remove + + // Within the grace it's still running (reattachable for in-window sharing). + expect(await keyState(key)).toBe(`running`) + // After the grace it's gone entirely — not merely stopped. + await waitForKeyState(key, `absent`, 5_000) + }, 90_000) + + it(`per-wake keys get distinct containers (full isolation via the key alone)`, async () => { + // Two wakes of one entity under scope:'wake' resolve to entityUrl#wakeId — + // distinct keys ⇒ distinct containers, no shared filesystem. No separate + // "ephemeral path": isolation is purely the per-wake key. + const entityUrl = `${KEY}-entity` + const w1 = await make(`${entityUrl}#wake-1`, undefined, false) + const w2 = await make(`${entityUrl}#wake-2`, undefined, false) + try { + await w1.exec({ command: `echo only-in-w1 > /work/marker.txt` }) + // w2 is a different container, so it cannot see w1's write. + expect(await w2.exists(`/work/marker.txt`)).toBe(false) + expect(await keyState(`${entityUrl}#wake-1`)).toBe(`running`) + expect(await keyState(`${entityUrl}#wake-2`)).toBe(`running`) + } finally { + await w1.dispose() + await w2.dispose() + } + }, 90_000) + + // --- ownership: owner creates + governs teardown; attacher only attaches --- + + const makeOwned = (sandboxKey: string, sharedIdleGraceMs?: number) => + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + sandboxKey, + persistent: true, + sharedIdleGraceMs, + }) + + const makeAttacher = (sandboxKey: string) => + dockerSandbox({ + image: TEST_IMAGE, + labels: { [TEST_LABEL]: `1` }, + sandboxKey, + owner: false, + }) + + it(`an attacher shares the owner's container + filesystem (never creates)`, async () => { + const key = `${KEY}-attach` + const owner = await makeOwned(key) + try { + await owner.writeFile(`/work/shared.txt`, `from-owner`) + const attacher = await makeAttacher(key) + try { + expect((await attacher.readFile(`/work/shared.txt`)).toString()).toBe( + `from-owner` + ) + } finally { + await attacher.dispose() + } + } finally { + await owner.dispose() + } + }, 90_000) + + it(`an attacher rejects with 'unavailable' when no owner sandbox is live`, async () => { + // No owner ever created this key → the attacher must NOT conjure a fresh, + // empty container; it fails closed. + await expect(makeAttacher(`${KEY}-no-owner`)).rejects.toMatchObject({ + kind: `unavailable`, + }) + expect(await keyState(`${KEY}-no-owner`)).toBe(`absent`) + }, 60_000) + + it(`owner reclaim wipes a persistent container immediately (overrides stop)`, async () => { + const key = `${KEY}-reclaim` + // Persistent + long grace: a normal idle dispose would STOP and keep it + // warm. A terminal reclaim removes it now regardless. + const owner = await makeOwned(key, 60_000) + await owner.writeFile(`/work/gone.txt`, `bye`) + await owner.dispose({ reclaim: true }) + await waitForKeyState(key, `absent`, 5_000) + }, 90_000) + + it(`an attacher dispose (even with reclaim) never tears down the owner's container`, async () => { + const key = `${KEY}-attach-safe` + const owner = await makeOwned(key, 60_000) + const attacher = await makeAttacher(key) + try { + // An attacher can't reclaim the owner's sandbox — the flag is ignored. + await attacher.dispose({ reclaim: true }) + expect(await keyState(key)).toBe(`running`) + const probe = await owner.exec({ command: `echo alive` }) + expect(probe.stdout.toString().trim()).toBe(`alive`) + } finally { + await owner.dispose() + } + }, 90_000) + + it(`a re-acquire within the grace cancels the idle-stop (stays warm)`, async () => { + const grace = 1_000 + const key = `${KEY}-warm` + const first = await make(key, grace) + await first.dispose() // schedules a stop `grace` ms out + + // Re-acquire well within the grace — this must cancel the pending stop. + await new Promise((r) => setTimeout(r, grace / 4)) + const second = await make(key, grace) + try { + // Wait past the *original* grace: the container must still be running. + await new Promise((r) => setTimeout(r, grace + 300)) + expect(await keyState(key)).toBe(`running`) + const r = await second.exec({ command: `echo warm` }) + expect(r.stdout.toString().trim()).toBe(`warm`) + } finally { + await second.dispose() + } + }, 90_000) + + it(`keeps the container alive until the LAST concurrent lease disposes`, async () => { + const grace = 800 + const key = `${KEY}-refcount-idle` + const [a, b] = await Promise.all([make(key, grace), make(key, grace)]) + // Dropping one of two leases must NOT schedule a stop. + await a.dispose() + await new Promise((r) => setTimeout(r, grace + 300)) + expect(await keyState(key)).toBe(`running`) + // Dropping the last lease stops it after the grace. + await b.dispose() + await waitForKeyState(key, `stopped`, 5_000) + }, 90_000) + + // Stop a container out-of-band (by key label) to simulate a process that + // left a non-running leftover behind. + const stopByKey = async (sandboxKey: string): Promise => { + const Docker = await loadDockerode() + const docker = new Docker() + const list = await docker.listContainers({ + all: true, + filters: { label: [`com.electric.sandbox.key=${sandboxKey}`] }, + }) + await docker.getContainer(list[0].Id).stop({ t: 0 }) + } + + it(`sweepOrphanedDockerSandboxes removes an exited *ephemeral* orphan`, async () => { + const key = `${KEY}-orphan-exited` + await make(key, 60_000, /* persistent */ false) + await stopByKey(key) // a crashed process leaves an exited ephemeral leftover + __resetPersistentRegistryForTests() + expect(await keyState(key)).toBe(`stopped`) + + const removed = await sweepOrphanedDockerSandboxes() + expect(removed.length).toBeGreaterThanOrEqual(1) + expect(await keyState(key)).toBe(`absent`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes leaves a *running* container untouched`, async () => { + // A running container may belong to a live sibling process sharing this + // daemon — force-removing it would wipe a peer's in-use sandbox. + const key = `${KEY}-orphan-running` + await make(key, 60_000, /* persistent */ false) + __resetPersistentRegistryForTests() + expect(await keyState(key)).toBe(`running`) + + await sweepOrphanedDockerSandboxes() + expect(await keyState(key)).toBe(`running`) + }, 90_000) + + it(`sweepOrphanedDockerSandboxes preserves a *persistent* container for reattach`, async () => { + // persistent: true exists so a restarted process can reattach by key, so a + // boot sweep must not wipe it even when it's stopped. + const key = `${KEY}-orphan-persist` + await make(key, 60_000, /* persistent */ true) + await stopByKey(key) + __resetPersistentRegistryForTests() + expect(await keyState(key)).toBe(`stopped`) + + await sweepOrphanedDockerSandboxes() + expect(await keyState(key)).toBe(`stopped`) + }, 90_000) +}) diff --git a/packages/agents-runtime/test/sandbox-exec-fetch.test.ts b/packages/agents-runtime/test/sandbox-exec-fetch.test.ts new file mode 100644 index 0000000000..538adc6f20 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-exec-fetch.test.ts @@ -0,0 +1,131 @@ +import { describe, it, expect, vi } from 'vitest' +import { fetchInSandbox } from '../src/sandbox/exec-fetch' +import { SandboxError } from '../src/sandbox/types' +import type { SandboxExecOpts, SandboxExecResult } from '../src/sandbox/types' + +/** + * `fetchInSandbox` runs an HTTP request *inside* the sandbox via `exec` and + * synthesizes a `Response` from the framed stdout the in-sandbox client emits: + * line 1: `\t` + * rest: base64 of the response body (whitespace-insensitive) + * These tests drive the TS parsing/synthesis + the exec wiring with a fake + * exec; the actual in-sandbox shell is exercised by the live docker tests. + */ + +function fakeExec( + stdout: string, + over: Partial = {} +): ReturnType { + return vi.fn( + async (_opts: SandboxExecOpts): Promise => ({ + exitCode: 0, + signal: null, + stdout: Buffer.from(stdout), + stderr: Buffer.from(``), + timedOut: false, + aborted: false, + outputTruncated: false, + ...over, + }) + ) +} + +function framed(status: number, contentType: string, body: string): string { + return `${status}\t${contentType}\n` + Buffer.from(body).toString(`base64`) +} + +describe(`fetchInSandbox`, () => { + it(`synthesizes a Response from framed stdout`, async () => { + const exec = fakeExec( + framed(200, `text/html; charset=utf-8`, `

hi

`) + ) + const res = await fetchInSandbox(exec, `https://example.com/`) + expect(res.ok).toBe(true) + expect(res.status).toBe(200) + expect(res.headers.get(`content-type`)).toBe(`text/html; charset=utf-8`) + expect(await res.text()).toBe(`

hi

`) + }) + + it(`runs the request inside the sandbox (single exec, never globalThis.fetch)`, async () => { + const exec = fakeExec(framed(200, `text/plain`, `ok`)) + await fetchInSandbox(exec, `https://example.com/page`) + expect(exec).toHaveBeenCalledTimes(1) + const opts = exec.mock.calls[0]![0] as SandboxExecOpts + // The command auto-detects an in-sandbox HTTP client. + expect(opts.command).toContain(`curl`) + expect(opts.command).toContain(`node`) + expect(opts.command).toContain(`wget`) + // The URL is passed via env, never interpolated into the command string + // (injection-safe). + expect(opts.command).not.toContain(`https://example.com/page`) + expect(opts.env?.FETCH_URL).toBe(`https://example.com/page`) + }) + + it(`forwards User-Agent and Accept headers via env`, async () => { + const exec = fakeExec(framed(200, `text/html`, `x`)) + await fetchInSandbox(exec, `https://example.com/`, { + headers: { + 'User-Agent': `MyAgent/1.0`, + Accept: `text/html,*/*`, + }, + }) + const opts = exec.mock.calls[0]![0] as SandboxExecOpts + expect(opts.env?.FETCH_UA).toBe(`MyAgent/1.0`) + expect(opts.env?.FETCH_ACCEPT).toBe(`text/html,*/*`) + }) + + it(`forwards the abort signal and sets an exec timeout`, async () => { + const exec = fakeExec(framed(200, `text/plain`, `ok`)) + const ac = new AbortController() + await fetchInSandbox(exec, `https://example.com/`, { signal: ac.signal }) + const opts = exec.mock.calls[0]![0] as SandboxExecOpts + expect(opts.signal).toBe(ac.signal) + expect(opts.timeoutMs).toBeGreaterThan(0) + }) + + it(`decodes base64 bodies even when line-wrapped`, async () => { + const body = + `line1\nline2\nline3 with a long tail to force wrapping`.repeat(4) + const b64 = Buffer.from(body).toString(`base64`) + // Simulate coreutils `base64` wrapping output at 76 columns. + const wrapped = b64.replace(/(.{76})/g, `$1\n`) + const exec = fakeExec(`200\ttext/plain\n` + wrapped) + const res = await fetchInSandbox(exec, `https://example.com/`) + expect(await res.text()).toBe(body) + }) + + it(`omits content-type header when the client reports none`, async () => { + const exec = fakeExec(framed(200, ``, `body`)) + const res = await fetchInSandbox(exec, `https://example.com/`) + expect(res.headers.get(`content-type`)).toBeNull() + expect(await res.text()).toBe(`body`) + }) + + it(`preserves non-ok HTTP statuses as a Response (tool reports them)`, async () => { + const exec = fakeExec(framed(404, `text/html`, `nope`)) + const res = await fetchInSandbox(exec, `https://example.com/missing`) + expect(res.ok).toBe(false) + expect(res.status).toBe(404) + }) + + it(`throws SandboxError(runtime) when no HTTP client is present`, async () => { + const exec = fakeExec(`NOCLIENT\n`) + await expect( + fetchInSandbox(exec, `https://example.com/`) + ).rejects.toMatchObject({ kind: `runtime` }) + }) + + it(`throws SandboxError when the request fails / egress is blocked (status 000)`, async () => { + const exec = fakeExec(`000\t\n`) + await expect( + fetchInSandbox(exec, `https://blocked.invalid/`) + ).rejects.toBeInstanceOf(SandboxError) + }) + + it(`throws SandboxError(runtime) on unparseable output`, async () => { + const exec = fakeExec(``, { exitCode: 1 }) + await expect( + fetchInSandbox(exec, `https://example.com/`) + ).rejects.toMatchObject({ kind: `runtime` }) + }) +}) diff --git a/packages/agents-runtime/test/sandbox-identity.test.ts b/packages/agents-runtime/test/sandbox-identity.test.ts new file mode 100644 index 0000000000..fa869188b2 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-identity.test.ts @@ -0,0 +1,132 @@ +import { describe, expect, it } from 'vitest' +import { + resolveSandboxIdentity, + sandboxWipesOnDispose, +} from '../src/sandbox/identity' + +const WAKE = { entityUrl: `/horton/abc/main`, wakeId: `wake-123` } + +describe(`resolveSandboxIdentity`, () => { + describe(`key (identity)`, () => { + it(`defaults scope to 'entity' → key is the entity url`, () => { + expect(resolveSandboxIdentity({}, WAKE).sandboxKey).toBe(WAKE.entityUrl) + }) + + it(`scope 'entity' → key is the entity url (stable across wakes)`, () => { + expect(resolveSandboxIdentity({ scope: `entity` }, WAKE).sandboxKey).toBe( + WAKE.entityUrl + ) + }) + + it(`scope 'wake' → key is entityUrl#wakeId (per-wake isolation)`, () => { + expect(resolveSandboxIdentity({ scope: `wake` }, WAKE).sandboxKey).toBe( + `${WAKE.entityUrl}#${WAKE.wakeId}` + ) + }) + + it(`distinct wake ids under scope 'wake' yield distinct keys`, () => { + const a = resolveSandboxIdentity( + { scope: `wake` }, + { entityUrl: WAKE.entityUrl, wakeId: `w1` } + ) + const b = resolveSandboxIdentity( + { scope: `wake` }, + { entityUrl: WAKE.entityUrl, wakeId: `w2` } + ) + expect(a.sandboxKey).not.toBe(b.sandboxKey) + }) + + it(`an explicit key wins over scope`, () => { + expect( + resolveSandboxIdentity({ key: `team-room`, scope: `wake` }, WAKE) + .sandboxKey + ).toBe(`team-room`) + }) + }) + + describe(`persistent (durability) defaults`, () => { + it(`scope 'wake' defaults to ephemeral (persistent false)`, () => { + expect(resolveSandboxIdentity({ scope: `wake` }, WAKE).persistent).toBe( + false + ) + }) + + it(`scope 'entity' (default) defaults to persistent`, () => { + expect(resolveSandboxIdentity({}, WAKE).persistent).toBe(true) + expect(resolveSandboxIdentity({ scope: `entity` }, WAKE).persistent).toBe( + true + ) + }) + + it(`an explicit key defaults to persistent`, () => { + expect( + resolveSandboxIdentity({ key: `team-room` }, WAKE).persistent + ).toBe(true) + }) + + it(`an explicit persistent value overrides the scope default`, () => { + // Per-wake but pinned persistent. + expect( + resolveSandboxIdentity({ scope: `wake`, persistent: true }, WAKE) + .persistent + ).toBe(true) + // Per-entity but forced ephemeral. + expect( + resolveSandboxIdentity({ scope: `entity`, persistent: false }, WAKE) + .persistent + ).toBe(false) + // Explicit key but forced ephemeral. + expect( + resolveSandboxIdentity({ key: `team-room`, persistent: false }, WAKE) + .persistent + ).toBe(false) + }) + }) + + describe(`owner (role)`, () => { + it(`defaults to owner`, () => { + expect(resolveSandboxIdentity({}, WAKE).owner).toBe(true) + expect(resolveSandboxIdentity({ scope: `wake` }, WAKE).owner).toBe(true) + expect(resolveSandboxIdentity({ key: `team-room` }, WAKE).owner).toBe( + true + ) + }) + + it(`an explicit owner:false makes the entity an attacher`, () => { + // The `inherit` shape: a key adopted from the parent, attach-only. + const r = resolveSandboxIdentity( + { key: `/horton/parent`, persistent: true, owner: false }, + WAKE + ) + expect(r.owner).toBe(false) + expect(r.sandboxKey).toBe(`/horton/parent`) + expect(r.persistent).toBe(true) + }) + + it(`owner is orthogonal to identity and durability`, () => { + // An attacher can still carry a persistent flag (it just never drives + // teardown); ownership doesn't change the resolved key. + const r = resolveSandboxIdentity( + { scope: `wake`, owner: false }, + { entityUrl: `/w/x/main`, wakeId: `k9` } + ) + expect(r.owner).toBe(false) + expect(r.sandboxKey).toBe(`/w/x/main#k9`) + }) + }) +}) + +describe(`sandboxWipesOnDispose`, () => { + it(`wipes when reclaimed (entity went terminal), even if persistent`, () => { + expect(sandboxWipesOnDispose(true, true)).toBe(true) + expect(sandboxWipesOnDispose(true, false)).toBe(true) + }) + + it(`wipes an ephemeral sandbox that was not reclaimed`, () => { + expect(sandboxWipesOnDispose(false, false)).toBe(true) + }) + + it(`preserves a persistent sandbox that was not reclaimed`, () => { + expect(sandboxWipesOnDispose(false, true)).toBe(false) + }) +}) diff --git a/packages/agents-runtime/test/sandbox-profiles.test.ts b/packages/agents-runtime/test/sandbox-profiles.test.ts new file mode 100644 index 0000000000..928cbfa7d1 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-profiles.test.ts @@ -0,0 +1,61 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { createRuntimeRouter } from '../src/create-handler' +import { clearRegistry } from '../src/define-entity' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' +import type { SandboxProfile } from '../src/sandbox/types' + +const localProfile: SandboxProfile = { + name: `local`, + label: `Local`, + description: `Runs on the host`, + factory: () => unrestrictedSandbox({ workingDirectory: process.cwd() }), +} + +const dockerProfile: SandboxProfile = { + name: `docker`, + label: `Docker`, + factory: () => unrestrictedSandbox({ workingDirectory: process.cwd() }), +} + +describe(`createRuntimeRouter sandboxProfiles`, () => { + beforeEach(() => clearRegistry()) + afterEach(() => clearRegistry()) + + it(`exposes wire-shape descriptors for the registered profiles`, () => { + const router = createRuntimeRouter({ + baseUrl: `http://localhost:4200`, + sandboxProfiles: [localProfile, dockerProfile], + }) + expect(router.sandboxProfileDescriptors).toEqual([ + { name: `local`, label: `Local`, description: `Runs on the host` }, + { name: `docker`, label: `Docker` }, + ]) + }) + + it(`exposes no descriptors when no profiles are registered`, () => { + const router = createRuntimeRouter({ baseUrl: `http://localhost:4200` }) + expect(router.sandboxProfileDescriptors).toEqual([]) + }) + + it(`rejects duplicate profile names`, () => { + expect(() => + createRuntimeRouter({ + baseUrl: `http://localhost:4200`, + sandboxProfiles: [ + localProfile, + { ...localProfile, label: `Other Local` }, + ], + }) + ).toThrowError(/duplicate sandbox profile name "local"/) + }) + + it(`omits factory closures from the exposed descriptors`, () => { + const router = createRuntimeRouter({ + baseUrl: `http://localhost:4200`, + sandboxProfiles: [localProfile], + }) + for (const desc of router.sandboxProfileDescriptors) { + expect(`factory` in desc).toBe(false) + } + }) +}) diff --git a/packages/agents-runtime/test/sandbox-remote.test.ts b/packages/agents-runtime/test/sandbox-remote.test.ts new file mode 100644 index 0000000000..85370b740b --- /dev/null +++ b/packages/agents-runtime/test/sandbox-remote.test.ts @@ -0,0 +1,704 @@ +import { describe, expect, it, vi } from 'vitest' +import { remoteSandbox } from '../src/sandbox/remote' +import { SandboxError } from '../src/sandbox/types' +import { + adaptE2B, + connectOrCreateE2BSandbox, + e2bNetworkCreateOpts, + type E2BSandboxClass, +} from '../src/sandbox/remote/e2b' +import type { RemoteSandboxClient } from '../src/sandbox/remote/types' + +/** + * Minimal e2b Sandbox instance recording the lifecycle calls the adapter + * makes. Only `setTimeout`/`kill` matter for the heartbeat tests; the rest are + * stubs so it satisfies the adapter's parameter type. + */ +function makeFakeSbx() { + const calls = { setTimeout: [] as Array, killed: 0 } + const sbx = { + sandboxId: `sbx-test`, + commands: { run: async () => ({ stdout: ``, stderr: ``, exitCode: 0 }) }, + files: { + read: async () => new Uint8Array(), + write: async () => undefined, + makeDir: async () => undefined, + }, + async setTimeout(ms: number) { + calls.setTimeout.push(ms) + }, + async kill() { + calls.killed++ + }, + } + return { + sbx: sbx as unknown as Parameters[0], + calls, + } +} + +function makeFakeClient(): RemoteSandboxClient & { + __calls: { + exec: Array<{ cmd: string; cwd?: string }> + read: Array + write: Array<{ path: string; size: number }> + mkdir: Array + killed: boolean + suspended: boolean + } +} { + const calls = { + exec: [] as Array<{ cmd: string; cwd?: string }>, + read: [] as Array, + write: [] as Array<{ path: string; size: number }>, + mkdir: [] as Array, + killed: false, + suspended: false, + } + const files = new Map() + return { + __calls: calls, + async exec(opts) { + calls.exec.push({ cmd: opts.command, cwd: opts.cwd }) + return { + stdout: Buffer.from(`stdout for ${opts.command}`), + stderr: Buffer.from(``), + exitCode: 0, + } + }, + async readFile(path) { + calls.read.push(path) + const buf = files.get(path) + if (!buf) throw new Error(`ENOENT: ${path}`) + return buf + }, + async writeFile(path, content) { + const buf = Buffer.isBuffer(content) ? content : Buffer.from(content) + calls.write.push({ path, size: buf.length }) + files.set(path, buf) + }, + async mkdir(path) { + calls.mkdir.push(path) + }, + async readdir() { + return [] + }, + async exists(path) { + return files.has(path) + }, + async remove(path) { + files.delete(path) + }, + async stat(path) { + const buf = files.get(path) + if (!buf) { + const e: NodeJS.ErrnoException = new Error(`ENOENT: ${path}`) + e.code = `ENOENT` + throw e + } + return { type: `file` as const, size: buf.length, mtimeMs: 0 } + }, + async kill() { + calls.killed = true + }, + async suspend() { + calls.suspended = true + }, + } +} + +/** + * In-memory stand-in for the e2b `Sandbox` class statics. Persists created + * sandboxes (keyed by id) across calls so we can assert reattach-by-key: a + * later call for the same key connect()s to the existing one instead of + * creating a second. + */ +function makeFakeE2B() { + interface Stored { + sandboxId: string + metadata?: Record + state: string + startedAt: Date + } + const store = new Map() + let idSeq = 0 + const calls = { + create: [] as Array<{ template?: string; opts?: Record }>, + connect: [] as Array, + list: 0, + } + const Sandbox = { + async create(arg1?: unknown, arg2?: unknown) { + const template = typeof arg1 === `string` ? arg1 : undefined + const opts = (typeof arg1 === `string` ? arg2 : arg1) as + | Record + | undefined + calls.create.push({ template, opts }) + const sandboxId = `sbx-${++idSeq}` + store.set(sandboxId, { + sandboxId, + metadata: opts?.metadata as Record | undefined, + state: `running`, + // Monotonic so the "oldest wins" tiebreak is deterministic. + startedAt: new Date(1_000 + idSeq), + }) + return { sandboxId } + }, + async connect(sandboxId: string) { + calls.connect.push(sandboxId) + const info = store.get(sandboxId) + if (info) info.state = `running` // connect() auto-resumes a paused VM + return { sandboxId } + }, + list(opts?: { + query?: { metadata?: Record; state?: Array } + }) { + calls.list++ + const wantMeta = opts?.query?.metadata ?? {} + const wantStates = opts?.query?.state + return { + async nextItems() { + return [...store.values()].filter( + (s) => + Object.entries(wantMeta).every( + ([k, v]) => s.metadata?.[k] === v + ) && + (wantStates === undefined || wantStates.includes(s.state)) + ) + }, + } + }, + } + return { Sandbox: Sandbox as unknown as E2BSandboxClass, store, calls } +} + +describe(`remoteSandbox`, () => { + describe(`identity`, () => { + it(`reports name 'remote:e2b' when constructed with an e2b client`, async () => { + const client = makeFakeClient() + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + expect(sandbox.name).toBe(`remote:e2b`) + expect(sandbox.workingDirectory).toBe(`/work`) + } finally { + await sandbox.dispose() + } + }) + }) + + describe(`exec`, () => { + it(`delegates to the client with the configured cwd`, async () => { + const client = makeFakeClient() + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + const result = await sandbox.exec({ command: `ls -la` }) + expect(result.exitCode).toBe(0) + expect(result.stdout.toString()).toBe(`stdout for ls -la`) + expect(client.__calls.exec).toEqual([{ cmd: `ls -la`, cwd: `/work` }]) + } finally { + await sandbox.dispose() + } + }) + + it(`overrides cwd from opts`, async () => { + const client = makeFakeClient() + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + await sandbox.exec({ command: `pwd`, cwd: `/tmp` }) + expect(client.__calls.exec[0].cwd).toBe(`/tmp`) + } finally { + await sandbox.dispose() + } + }) + }) + + describe(`filesystem`, () => { + it(`writeFile + readFile roundtrip via the client`, async () => { + const client = makeFakeClient() + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + await sandbox.writeFile(`/work/x.txt`, `hello`) + const buf = await sandbox.readFile(`/work/x.txt`) + expect(buf.toString(`utf-8`)).toBe(`hello`) + } finally { + await sandbox.dispose() + } + }) + + it(`writeFile rejects paths outside the working directory`, async () => { + const client = makeFakeClient() + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + await expect( + sandbox.writeFile(`/etc/passwd`, `nope`) + ).rejects.toBeInstanceOf(SandboxError) + } finally { + await sandbox.dispose() + } + }) + + it(`mkdir delegates to the client`, async () => { + const client = makeFakeClient() + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + await sandbox.mkdir(`/work/nested/deep`, { recursive: true }) + expect(client.__calls.mkdir).toContain(`/work/nested/deep`) + } finally { + await sandbox.dispose() + } + }) + }) + + describe(`fetch`, () => { + it(`runs the request inside the VM via exec and returns a Response`, async () => { + // A client whose exec emulates the in-sandbox HTTP client: it frames + // status + content-type + base64 body the way fetchInSandbox expects. + const execCalls: Array<{ + command: string + env?: Record + }> = [] + const client: RemoteSandboxClient = { + ...makeFakeClient(), + async exec(opts) { + execCalls.push({ command: opts.command, env: opts.env }) + const url = opts.env?.FETCH_URL ?? `` + const body = `

served from inside the VM for ${url}

` + return { + stdout: Buffer.from( + `200\ttext/html; charset=utf-8\n` + + Buffer.from(body).toString(`base64`) + ), + stderr: Buffer.from(``), + exitCode: 0, + } + }, + } + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + const res = await sandbox.fetch(`https://example.com/`) + expect(res.status).toBe(200) + expect(res.headers.get(`content-type`)).toContain(`text/html`) + expect(await res.text()).toContain(`served from inside the VM`) + // It egressed via exec inside the VM — not from the host process. + expect(execCalls).toHaveLength(1) + expect(execCalls[0]!.env?.FETCH_URL).toBe(`https://example.com/`) + } finally { + await sandbox.dispose() + } + }) + + it(`surfaces a failed in-sandbox request as a SandboxError`, async () => { + // status 000 ⇒ no response (egress blocked by the VM policy, or the + // host is unreachable) ⇒ the fetch rejects. + const client: RemoteSandboxClient = { + ...makeFakeClient(), + async exec() { + return { + stdout: Buffer.from(`000\t\n`), + stderr: Buffer.from(``), + exitCode: 0, + } + }, + } + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + try { + await expect( + sandbox.fetch(`https://blocked.invalid/`) + ).rejects.toBeInstanceOf(SandboxError) + } finally { + await sandbox.dispose() + } + }) + }) + + describe(`e2b network policy`, () => { + it(`maps a NetworkPolicy to e2b create-time egress opts`, () => { + expect(e2bNetworkCreateOpts({ mode: `allow-all` })).toEqual({ + allowInternetAccess: true, + }) + expect(e2bNetworkCreateOpts({ mode: `deny-all` })).toEqual({ + allowInternetAccess: false, + }) + expect( + e2bNetworkCreateOpts({ + mode: `allowlist`, + allow: [`example.com`, `api.test`], + }) + ).toEqual({ network: { allowOut: [`example.com`, `api.test`] } }) + }) + + it(`applies the egress policy to a freshly created (keyless) VM`, async () => { + const { Sandbox, calls } = makeFakeE2B() + await connectOrCreateE2BSandbox(Sandbox, { + persistent: false, + keepAliveMs: 1000, + network: e2bNetworkCreateOpts({ + mode: `allowlist`, + allow: [`example.com`], + }), + }) + expect(calls.create).toHaveLength(1) + expect(calls.create[0].opts?.network).toEqual({ + allowOut: [`example.com`], + }) + }) + + it(`applies the egress policy to a persistent created VM (alongside lifecycle)`, async () => { + const { Sandbox, calls } = makeFakeE2B() + await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `s1`, + keepAliveMs: 1000, + network: e2bNetworkCreateOpts({ mode: `deny-all` }), + }) + expect(calls.create).toHaveLength(1) + expect(calls.create[0].opts?.allowInternetAccess).toBe(false) + // Egress opts don't clobber the reattach lifecycle config. + expect(calls.create[0].opts?.lifecycle).toEqual({ + onTimeout: `pause`, + autoResume: true, + }) + }) + }) + + describe(`lifecycle`, () => { + it(`dispose kills the underlying remote workspace exactly once`, async () => { + const client = makeFakeClient() + const killSpy = vi.spyOn(client, `kill`) + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + }) + await sandbox.dispose() + expect(killSpy).toHaveBeenCalledTimes(1) + // Second dispose is a no-op — kill is not called again. + await sandbox.dispose() + expect(killSpy).toHaveBeenCalledTimes(1) + }) + + it(`ephemeral (non-persistent) sandbox dispose kills, never suspends`, async () => { + const client = makeFakeClient() + const killSpy = vi.spyOn(client, `kill`) + const suspendSpy = vi.spyOn(client, `suspend`) + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + // persistent defaults false + }) + await sandbox.dispose() + expect(killSpy).toHaveBeenCalledTimes(1) + expect(suspendSpy).not.toHaveBeenCalled() + }) + + it(`persistent sandbox dispose suspends (preserves state), never kills`, async () => { + const client = makeFakeClient() + const killSpy = vi.spyOn(client, `kill`) + const suspendSpy = vi.spyOn(client, `suspend`) + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + persistent: true, + }) + await sandbox.dispose() + expect(suspendSpy).toHaveBeenCalledTimes(1) + expect(killSpy).not.toHaveBeenCalled() + // Idempotent — neither runs again. + await sandbox.dispose() + expect(suspendSpy).toHaveBeenCalledTimes(1) + expect(killSpy).not.toHaveBeenCalled() + }) + + it(`persistent sandbox falls back to kill when the client has no suspend()`, async () => { + const client = makeFakeClient() + delete (client as { suspend?: unknown }).suspend + const killSpy = vi.spyOn(client, `kill`) + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + persistent: true, + }) + await sandbox.dispose() + expect(killSpy).toHaveBeenCalledTimes(1) + }) + + it(`an attacher dispose detaches (suspend), never kills — even when ephemeral`, async () => { + const client = makeFakeClient() + const killSpy = vi.spyOn(client, `kill`) + const suspendSpy = vi.spyOn(client, `suspend`) + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + persistent: false, + owner: false, // attacher: must never kill the owner's VM + }) + await sandbox.dispose() + expect(suspendSpy).toHaveBeenCalledTimes(1) + expect(killSpy).not.toHaveBeenCalled() + }) + + it(`owner reclaim kills even a persistent VM (terminal ⇒ wipe)`, async () => { + const client = makeFakeClient() + const killSpy = vi.spyOn(client, `kill`) + const suspendSpy = vi.spyOn(client, `suspend`) + const sandbox = await remoteSandbox({ + provider: `e2b`, + client, + workingDirectory: `/work`, + persistent: true, + }) + await sandbox.dispose({ reclaim: true }) + expect(killSpy).toHaveBeenCalledTimes(1) + expect(suspendSpy).not.toHaveBeenCalled() + }) + }) + + describe(`e2b reattach by key`, () => { + it(`keyless sandbox is created fresh with no metadata or reattach`, async () => { + const { Sandbox, calls } = makeFakeE2B() + await connectOrCreateE2BSandbox(Sandbox, { + persistent: false, + keepAliveMs: 1000, + }) + expect(calls.create).toHaveLength(1) + expect(calls.list).toBe(0) + expect(calls.connect).toHaveLength(0) + // No reuse metadata without an identity to reattach by. + expect(calls.create[0].opts?.metadata).toBeUndefined() + }) + + it(`an ephemeral keyed sandbox is tagged + reattachable but killed on idle`, async () => { + const { Sandbox, calls } = makeFakeE2B() + await connectOrCreateE2BSandbox(Sandbox, { + persistent: false, + sandboxKey: `/horton/solo#wake-1`, + keepAliveMs: 1000, + }) + // Reattach is by key regardless of durability — it looked up first. + expect(calls.list).toBe(1) + expect(calls.connect).toHaveLength(0) + expect(calls.create).toHaveLength(1) + const opts = calls.create[0].opts! + // Tagged so an in-window collaborator can reattach by key… + expect(Object.keys(opts.metadata as Record)).toHaveLength( + 1 + ) + // …but idle-collected by killing (wiped), not pausing. + expect(opts.lifecycle).toEqual({ onTimeout: `kill` }) + }) + + it(`a persistent sandbox with no live VM creates one tagged + pause-on-idle`, async () => { + const { Sandbox, calls } = makeFakeE2B() + await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + expect(calls.connect).toHaveLength(0) + expect(calls.create).toHaveLength(1) + const opts = calls.create[0].opts! + const metadata = opts.metadata as Record + // Tagged with exactly one reuse-key entry (the hashed key). + expect(Object.keys(metadata)).toHaveLength(1) + expect(opts.timeoutMs).toBe(1000) + // Idle-collected by suspending, not killing, so it stays reattachable. + expect(opts.lifecycle).toEqual({ onTimeout: `pause`, autoResume: true }) + }) + + it(`reconnects to the same identity for the same key`, async () => { + const { Sandbox, calls } = makeFakeE2B() + const first = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + const second = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + // Second resolution reattaches rather than creating a new VM. + expect(calls.create).toHaveLength(1) + expect(calls.connect).toEqual([first.sandboxId]) + expect(second.sandboxId).toBe(first.sandboxId) + }) + + it(`reconnects to a paused VM (connect auto-resumes it)`, async () => { + const { Sandbox, store } = makeFakeE2B() + const first = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + store.get(first.sandboxId)!.state = `paused` + const second = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + expect(second.sandboxId).toBe(first.sandboxId) + expect(store.get(first.sandboxId)!.state).toBe(`running`) + }) + + it(`distinct keys get distinct workspaces`, async () => { + const { Sandbox, calls } = makeFakeE2B() + const a = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + const b = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-2`, + keepAliveMs: 1000, + }) + expect(calls.create).toHaveLength(2) + expect(calls.connect).toHaveLength(0) + expect(a.sandboxId).not.toBe(b.sandboxId) + }) + + it(`an attacher reconnects to the owner's live VM (never creates)`, async () => { + const { Sandbox, calls } = makeFakeE2B() + const owner = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + const attacher = await connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + owner: false, + sandboxKey: `session-1`, + keepAliveMs: 1000, + }) + expect(calls.create).toHaveLength(1) // only the owner created + expect(calls.connect).toEqual([owner.sandboxId]) + expect(attacher.sandboxId).toBe(owner.sandboxId) + }) + + it(`an attacher rejects with 'unavailable' when no VM exists for the key`, async () => { + const { Sandbox, calls } = makeFakeE2B() + await expect( + connectOrCreateE2BSandbox(Sandbox, { + persistent: true, + owner: false, + sandboxKey: `nobody-home`, + keepAliveMs: 1000, + }) + ).rejects.toMatchObject({ kind: `unavailable` }) + // It must NOT have conjured a fresh, empty VM. + expect(calls.create).toHaveLength(0) + }) + }) + + describe(`e2b keep-alive (Model C)`, () => { + it(`heartbeats setTimeout while held and stops on suspend (no kill)`, async () => { + vi.useFakeTimers() + try { + const { sbx, calls } = makeFakeSbx() + const client = adaptE2B(sbx, `/work`, { + keepAliveMs: 1000, + heartbeatIntervalMs: 100, + }) + await vi.advanceTimersByTimeAsync(350) + // Several refreshes, each to the full keep-alive window. + expect(calls.setTimeout.length).toBeGreaterThanOrEqual(3) + expect(calls.setTimeout.every((ms) => ms === 1000)).toBe(true) + + const ticksBeforeSuspend = calls.setTimeout.length + await client.suspend!() + await vi.advanceTimersByTimeAsync(500) + // Heartbeat stopped; the platform is left to auto-suspend. No kill. + expect(calls.setTimeout.length).toBe(ticksBeforeSuspend) + expect(calls.killed).toBe(0) + } finally { + vi.useRealTimers() + } + }) + + it(`heartbeats regardless of durability (keeps the VM alive during a wake)`, async () => { + // The heartbeat keeps any held VM alive for the duration of the wake; an + // ephemeral VM is only reaped (killed) once every holder stops. + vi.useFakeTimers() + try { + const { sbx, calls } = makeFakeSbx() + const client = adaptE2B(sbx, `/work`, { + keepAliveMs: 1000, + heartbeatIntervalMs: 100, + }) + await vi.advanceTimersByTimeAsync(350) + expect(calls.setTimeout.length).toBeGreaterThanOrEqual(3) + await client.kill() + expect(calls.killed).toBe(1) + } finally { + vi.useRealTimers() + } + }) + + it(`kill() also stops the heartbeat`, async () => { + vi.useFakeTimers() + try { + const { sbx, calls } = makeFakeSbx() + const client = adaptE2B(sbx, `/work`, { + keepAliveMs: 1000, + heartbeatIntervalMs: 100, + }) + await vi.advanceTimersByTimeAsync(150) + const ticksBeforeKill = calls.setTimeout.length + await client.kill() + await vi.advanceTimersByTimeAsync(500) + expect(calls.setTimeout.length).toBe(ticksBeforeKill) + expect(calls.killed).toBe(1) + } finally { + vi.useRealTimers() + } + }) + }) + + describe(`provider loading`, () => { + it(`throws unavailable when no client and e2b is not installed`, async () => { + // Force the dynamic loader to fail by passing an unknown provider. + await expect( + remoteSandbox({ + provider: `unknown` as never, + workingDirectory: `/work`, + }) + ).rejects.toBeInstanceOf(SandboxError) + }) + }) +}) diff --git a/packages/agents-runtime/test/sandbox-tool-refactor.test.ts b/packages/agents-runtime/test/sandbox-tool-refactor.test.ts new file mode 100644 index 0000000000..5f1b534780 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-tool-refactor.test.ts @@ -0,0 +1,130 @@ +import { mkdtemp, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { createBashTool } from '../src/tools/bash' +import { createReadFileTool } from '../src/tools/read-file' +import { createWriteTool } from '../src/tools/write' +import { createEditTool } from '../src/tools/edit' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' + +/** + * Asserts the tool factories take a Sandbox (not a workingDirectory string) + * and delegate filesystem/exec calls to it. Behavior is preserved relative + * to the previous signatures — the refactor is plumbing. + */ +describe(`tool refactor to Sandbox`, () => { + let cwd: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `tool-refactor-`)) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + }) + + describe(`bash`, () => { + it(`runs commands through sandbox.exec, not raw child_process`, async () => { + const calls: Array = [] + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const wrapped = { + ...sandbox, + exec: async (opts: unknown) => { + calls.push(opts) + return sandbox.exec(opts as Parameters[0]) + }, + } + const tool = createBashTool(wrapped as typeof sandbox) + const result = await tool.execute(`call-1`, { command: `echo hi` }) + expect(calls).toHaveLength(1) + expect((result.content[0] as { text: string }).text.trim()).toBe(`hi`) + }) + + it(`does not forward arbitrary process.env to children`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createBashTool(sandbox) + process.env.__SANDBOX_TEST_SECRET__ = `should-not-leak` + try { + const result = await tool.execute(`call`, { + command: `node -e "console.log(process.env.__SANDBOX_TEST_SECRET__ ?? '')"`, + }) + expect((result.content[0] as { text: string }).text.trim()).toBe( + `` + ) + } finally { + delete process.env.__SANDBOX_TEST_SECRET__ + } + }) + + it(`description string no longer claims sandboxing`, () => { + const sandbox = { + name: `unrestricted`, + workingDirectory: cwd, + } as never + const tool = createBashTool(sandbox) + expect(tool.description.toLowerCase()).not.toMatch(/sandbox/) + }) + }) + + describe(`read`, () => { + it(`reads via sandbox.readFile`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await sandbox.writeFile(join(cwd, `f.txt`), `payload`) + const tool = createReadFileTool(sandbox) + const result = await tool.execute(`r`, { path: `f.txt` }) + expect((result.content[0] as { text: string }).text).toBe(`payload`) + }) + + it(`rejects paths that escape the working directory`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createReadFileTool(sandbox) + const result = await tool.execute(`r`, { path: `../escape.txt` }) + expect((result.content[0] as { text: string }).text).toMatch( + /outside the working directory/ + ) + }) + }) + + describe(`write`, () => { + it(`writes via sandbox.writeFile`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createWriteTool(sandbox) + await tool.execute(`w`, { path: `out.txt`, content: `hello` }) + const buf = await sandbox.readFile(join(cwd, `out.txt`)) + expect(buf.toString(`utf-8`)).toBe(`hello`) + }) + + it(`creates parent directories via sandbox.mkdir`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createWriteTool(sandbox) + await tool.execute(`w`, { + path: `nested/dir/leaf.txt`, + content: `deep`, + }) + const buf = await sandbox.readFile(join(cwd, `nested/dir/leaf.txt`)) + expect(buf.toString(`utf-8`)).toBe(`deep`) + }) + }) + + describe(`edit`, () => { + it(`edits via sandbox.readFile + writeFile`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await sandbox.writeFile(join(cwd, `f.txt`), `hello world`) + const readSet = new Set() + const readTool = createReadFileTool(sandbox, readSet) + await readTool.execute(`r`, { path: `f.txt` }) + const editTool = createEditTool(sandbox, readSet) + const result = await editTool.execute(`e`, { + path: `f.txt`, + old_string: `world`, + new_string: `there`, + }) + expect((result.content[0] as { text: string }).text).toMatch( + /Edited|replacement/ + ) + const after = await sandbox.readFile(join(cwd, `f.txt`)) + expect(after.toString(`utf-8`)).toBe(`hello there`) + }) + }) +}) diff --git a/packages/agents-runtime/test/sandbox-tool-symlink-safety.test.ts b/packages/agents-runtime/test/sandbox-tool-symlink-safety.test.ts new file mode 100644 index 0000000000..e839c317e6 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-tool-symlink-safety.test.ts @@ -0,0 +1,73 @@ +import { mkdir, mkdtemp, rm, symlink, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { createEditTool } from '../src/tools/edit' +import { createReadFileTool } from '../src/tools/read-file' +import { createWriteTool } from '../src/tools/write' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' + +describe(`tools refuse symlink-based escape from the working directory`, () => { + let cwd: string + let outside: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `sandbox-symlink-cwd-`)) + outside = await mkdtemp(join(tmpdir(), `sandbox-symlink-outside-`)) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + await rm(outside, { recursive: true, force: true }) + }) + + it(`read rejects a symlink pointing outside the working directory`, async () => { + await writeFile(join(outside, `secret.txt`), `s3cret`, `utf-8`) + await symlink(join(outside, `secret.txt`), join(cwd, `link.txt`)) + + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createReadFileTool(sandbox) + const result = await tool.execute(`r`, { path: `link.txt` }) + + expect((result.content[0] as { text: string }).text).toMatch( + /outside the working directory/ + ) + await sandbox.dispose() + }) + + it(`edit rejects a symlink pointing outside the working directory`, async () => { + await writeFile(join(outside, `victim.txt`), `untouched`, `utf-8`) + await symlink(join(outside, `victim.txt`), join(cwd, `link.txt`)) + + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const readSet = new Set([join(cwd, `link.txt`)]) + const tool = createEditTool(sandbox, readSet) + const result = await tool.execute(`e`, { + path: `link.txt`, + old_string: `untouched`, + new_string: `hijacked`, + }) + + expect((result.content[0] as { text: string }).text).toMatch( + /outside the working directory/ + ) + await sandbox.dispose() + }) + + it(`write rejects a path whose parent is a symlink to outside the working directory`, async () => { + await mkdir(join(outside, `target-dir`)) + await symlink(join(outside, `target-dir`), join(cwd, `linked-dir`)) + + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createWriteTool(sandbox) + const result = await tool.execute(`w`, { + path: `linked-dir/leaked.txt`, + content: `should not land outside`, + }) + + expect((result.content[0] as { text: string }).text).toMatch( + /outside the working directory/ + ) + await sandbox.dispose() + }) +}) diff --git a/packages/agents-runtime/test/sandbox-unrestricted-containment.test.ts b/packages/agents-runtime/test/sandbox-unrestricted-containment.test.ts new file mode 100644 index 0000000000..b7bf96701b --- /dev/null +++ b/packages/agents-runtime/test/sandbox-unrestricted-containment.test.ts @@ -0,0 +1,120 @@ +import { mkdir, mkdtemp, rm, symlink, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' +import { SandboxError } from '../src/sandbox/types' + +/** + * Containment is a sandbox concern. The unrestricted provider shares the + * host filesystem, so it is the one that must resolve paths (following + * symlinks) and reject anything that escapes the working directory with a + * `policy` SandboxError — the defense the tool layer used to perform via + * resolveSafePath now lives here, where the filesystem actually is. + */ +describe(`unrestrictedSandbox workspace containment`, () => { + let cwd: string + let outside: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `unrestricted-contain-cwd-`)) + outside = await mkdtemp(join(tmpdir(), `unrestricted-contain-out-`)) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + await rm(outside, { recursive: true, force: true }) + }) + + it(`readFile rejects a relative ../ escape with a policy error`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await writeFile(join(outside, `secret.txt`), `s3cret`, `utf-8`) + try { + await expect(sandbox.readFile(`../secret.txt`)).rejects.toMatchObject({ + kind: `policy`, + }) + } finally { + await sandbox.dispose() + } + }) + + it(`writeFile rejects an absolute path outside the workspace`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + await expect( + sandbox.writeFile(join(outside, `leaked.txt`), `nope`) + ).rejects.toBeInstanceOf(SandboxError) + await expect( + sandbox.writeFile(join(outside, `leaked.txt`), `nope`) + ).rejects.toMatchObject({ kind: `policy` }) + } finally { + await sandbox.dispose() + } + }) + + it(`readFile follows a symlink and rejects when the target escapes`, async () => { + await writeFile(join(outside, `secret.txt`), `s3cret`, `utf-8`) + await symlink(join(outside, `secret.txt`), join(cwd, `link.txt`)) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + await expect(sandbox.readFile(`link.txt`)).rejects.toMatchObject({ + kind: `policy`, + }) + } finally { + await sandbox.dispose() + } + }) + + it(`writeFile rejects when a parent component is a symlink out of the workspace`, async () => { + await mkdir(join(outside, `target-dir`)) + await symlink(join(outside, `target-dir`), join(cwd, `linked-dir`)) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + await expect( + sandbox.writeFile(`linked-dir/leaked.txt`, `nope`) + ).rejects.toMatchObject({ kind: `policy` }) + } finally { + await sandbox.dispose() + } + }) + + it(`mkdir and stat reject escapes with a policy error`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + await expect(sandbox.mkdir(`../new-dir`)).rejects.toMatchObject({ + kind: `policy`, + }) + await expect( + sandbox.stat(join(outside, `secret.txt`)) + ).rejects.toMatchObject({ kind: `policy` }) + } finally { + await sandbox.dispose() + } + }) + + it(`exists returns false (not throw) for a denied path — safe-probe semantics`, async () => { + await writeFile(join(outside, `secret.txt`), `s3cret`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + expect(await sandbox.exists(join(outside, `secret.txt`))).toBe(false) + expect(await sandbox.exists(`../secret.txt`)).toBe(false) + } finally { + await sandbox.dispose() + } + }) + + it(`still serves paths inside the workspace`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + try { + await sandbox.mkdir(`nested`, { recursive: true }) + await sandbox.writeFile(`nested/ok.txt`, `inside`) + expect((await sandbox.readFile(`nested/ok.txt`)).toString()).toBe( + `inside` + ) + expect(await sandbox.exists(`nested/ok.txt`)).toBe(true) + expect((await sandbox.stat(`nested/ok.txt`)).type).toBe(`file`) + } finally { + await sandbox.dispose() + } + }) +}) diff --git a/packages/agents-runtime/test/sandbox-unrestricted.test.ts b/packages/agents-runtime/test/sandbox-unrestricted.test.ts new file mode 100644 index 0000000000..b2f3901ec1 --- /dev/null +++ b/packages/agents-runtime/test/sandbox-unrestricted.test.ts @@ -0,0 +1,151 @@ +import { mkdtemp, realpath, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' + +describe(`unrestrictedSandbox`, () => { + let cwd: string + + beforeEach(async () => { + cwd = await mkdtemp(join(tmpdir(), `unrestricted-sandbox-`)) + }) + + afterEach(async () => { + await rm(cwd, { recursive: true, force: true }) + }) + + describe(`identity`, () => { + it(`reports name 'unrestricted' and exposes workingDirectory`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + expect(sandbox.name).toBe(`unrestricted`) + expect(sandbox.workingDirectory).toBe(cwd) + await sandbox.dispose() + }) + }) + + describe(`exec`, () => { + it(`runs a shell command in the working directory`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const result = await sandbox.exec({ command: `pwd` }) + expect(result.exitCode).toBe(0) + expect(result.timedOut).toBe(false) + expect(result.stdout.toString().trim()).toBe(await realpath(cwd)) + await sandbox.dispose() + }) + + it(`captures stderr separately from stdout`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const result = await sandbox.exec({ + command: `echo out && echo err >&2`, + }) + expect(result.stdout.toString().trim()).toBe(`out`) + expect(result.stderr.toString().trim()).toBe(`err`) + await sandbox.dispose() + }) + + it(`reports non-zero exit codes`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const result = await sandbox.exec({ command: `exit 42` }) + expect(result.exitCode).toBe(42) + await sandbox.dispose() + }) + + it(`enforces timeoutMs and sets timedOut`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const result = await sandbox.exec({ + command: `sleep 5`, + timeoutMs: 100, + }) + expect(result.timedOut).toBe(true) + await sandbox.dispose() + }) + + it(`truncates output to maxOutputBytes and reports it`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const result = await sandbox.exec({ + command: `node -e "process.stdout.write('x'.repeat(1000))"`, + maxOutputBytes: 100, + }) + expect(result.stdout.length).toBeLessThanOrEqual(100) + expect(result.outputTruncated).toBe(true) + await sandbox.dispose() + }) + + it(`passes env from opts merged onto the sandbox base`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const result = await sandbox.exec({ + command: `node -e "console.log(process.env.MY_VAR)"`, + env: { MY_VAR: `hello` }, + }) + expect(result.stdout.toString().trim()).toBe(`hello`) + await sandbox.dispose() + }) + }) + + describe(`readFile`, () => { + it(`reads file contents as a Buffer`, async () => { + await writeFile(join(cwd, `f.txt`), `hello`, `utf-8`) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const buf = await sandbox.readFile(join(cwd, `f.txt`)) + expect(buf).toBeInstanceOf(Buffer) + expect(buf.toString(`utf-8`)).toBe(`hello`) + await sandbox.dispose() + }) + + it(`propagates ENOENT for missing files`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await expect(sandbox.readFile(join(cwd, `missing.txt`))).rejects.toThrow() + await sandbox.dispose() + }) + }) + + describe(`writeFile`, () => { + it(`writes string content as utf-8`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await sandbox.writeFile(join(cwd, `out.txt`), `world`) + const buf = await sandbox.readFile(join(cwd, `out.txt`)) + expect(buf.toString(`utf-8`)).toBe(`world`) + await sandbox.dispose() + }) + + it(`writes Buffer content verbatim`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const payload = Buffer.from([0x00, 0x01, 0x02, 0xff]) + await sandbox.writeFile(join(cwd, `bin`), payload) + const buf = await sandbox.readFile(join(cwd, `bin`)) + expect(buf.equals(payload)).toBe(true) + await sandbox.dispose() + }) + }) + + describe(`mkdir`, () => { + it(`creates nested directories with recursive: true`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await sandbox.mkdir(join(cwd, `a/b/c`), { recursive: true }) + await sandbox.writeFile(join(cwd, `a/b/c/leaf.txt`), `here`) + const buf = await sandbox.readFile(join(cwd, `a/b/c/leaf.txt`)) + expect(buf.toString(`utf-8`)).toBe(`here`) + await sandbox.dispose() + }) + }) + + describe(`fetch`, () => { + it(`returns a Response from a successful HTTP call`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + // Use a data: URL so the test does not depend on network. + const dataUrl = `data:text/plain;base64,aGVsbG8=` + const res = await sandbox.fetch(dataUrl) + expect(res.ok).toBe(true) + expect(await res.text()).toBe(`hello`) + await sandbox.dispose() + }) + }) + + describe(`dispose`, () => { + it(`returns a resolved promise`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + await expect(sandbox.dispose()).resolves.toBeUndefined() + }) + }) +}) diff --git a/packages/agents-runtime/test/tool-path-symlink.test.ts b/packages/agents-runtime/test/tool-path-symlink.test.ts deleted file mode 100644 index cbce1ca01a..0000000000 --- a/packages/agents-runtime/test/tool-path-symlink.test.ts +++ /dev/null @@ -1,98 +0,0 @@ -import { mkdtemp, readFile, rm, symlink, writeFile } from 'node:fs/promises' -import { tmpdir } from 'node:os' -import { join } from 'node:path' -import { afterEach, beforeEach, describe, expect, it } from 'vitest' -import { createEditTool } from '../src/tools/edit' -import { createReadFileTool } from '../src/tools/read-file' -import { createWriteTool } from '../src/tools/write' - -// Characterization: read/write/edit guard the working directory using a -// path-prefix check (resolve + relative + startsWith('..')) but do NOT call -// `realpath`, so a symlink inside the working directory that points outside -// is followed transparently — CVE-2025-53109/53110 class bypass. A follow-up -// PR will add realpath resolution; update these expectations when it lands. -describe(`tool path traversal — current symlink behavior`, () => { - let cwd: string - let outside: string - - beforeEach(async () => { - cwd = await mkdtemp(join(tmpdir(), `path-symlink-`)) - outside = await mkdtemp(join(tmpdir(), `path-outside-`)) - }) - - afterEach(async () => { - await rm(cwd, { recursive: true, force: true }) - await rm(outside, { recursive: true, force: true }) - }) - - it(`read: ".." escape is rejected by the prefix check`, async () => { - const tool = createReadFileTool(cwd) - const result = await tool.execute(`r-dotdot`, { path: `../escape.txt` }) - expect((result.content[0] as { text: string }).text).toMatch( - /outside the working directory/ - ) - }) - - it(`read: symlink inside cwd pointing outside currently succeeds`, async () => { - const secret = join(outside, `secret.txt`) - await writeFile(secret, `secret data`, `utf-8`) - await symlink(secret, join(cwd, `link.txt`)) - const tool = createReadFileTool(cwd) - const result = await tool.execute(`r-link`, { path: `link.txt` }) - expect((result.content[0] as { text: string }).text).toBe(`secret data`) - }) - - it(`write: ".." escape is rejected by the prefix check`, async () => { - const tool = createWriteTool(cwd) - const result = await tool.execute(`w-dotdot`, { - path: `../escape.txt`, - content: `nope`, - }) - expect((result.content[0] as { text: string }).text).toMatch( - /outside the working directory/ - ) - }) - - it(`write: symlink inside cwd pointing outside currently clobbers the target`, async () => { - const target = join(outside, `target.txt`) - await writeFile(target, `original`, `utf-8`) - await symlink(target, join(cwd, `link.txt`)) - const tool = createWriteTool(cwd) - const result = await tool.execute(`w-link`, { - path: `link.txt`, - content: `clobbered`, - }) - expect(result.details).toMatchObject({ bytesWritten: 9 }) - expect(await readFile(target, `utf-8`)).toBe(`clobbered`) - }) - - it(`edit: ".." escape is rejected by the prefix check`, async () => { - const tool = createEditTool(cwd, new Set()) - const result = await tool.execute(`e-dotdot`, { - path: `../escape.txt`, - old_string: `a`, - new_string: `b`, - }) - expect((result.content[0] as { text: string }).text).toMatch( - /outside the working directory/ - ) - }) - - it(`edit: symlink inside cwd pointing outside currently edits through the link`, async () => { - const target = join(outside, `t.txt`) - await writeFile(target, `hello world`, `utf-8`) - const linkPath = join(cwd, `link.txt`) - await symlink(target, linkPath) - // The edit tool requires the file to be in readSet; populate it with the - // resolved path the tool would compute. This mirrors what read would have - // done in the same session. - const tool = createEditTool(cwd, new Set([linkPath])) - const result = await tool.execute(`e-link`, { - path: `link.txt`, - old_string: `world`, - new_string: `there`, - }) - expect(result.details).toMatchObject({ replacements: 1 }) - expect(await readFile(target, `utf-8`)).toBe(`hello there`) - }) -}) diff --git a/packages/agents-runtime/test/write-edit-roundtrip.test.ts b/packages/agents-runtime/test/write-edit-roundtrip.test.ts index a056bc929d..13fb39b80c 100644 --- a/packages/agents-runtime/test/write-edit-roundtrip.test.ts +++ b/packages/agents-runtime/test/write-edit-roundtrip.test.ts @@ -4,6 +4,7 @@ import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { createEditTool } from '../src/tools/edit' import { createWriteTool } from '../src/tools/write' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' describe(`write→edit roundtrip in same wake`, () => { let cwd: string @@ -17,9 +18,10 @@ describe(`write→edit roundtrip in same wake`, () => { }) it(`edit succeeds on a freshly-written file (write populates readSet)`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSet = new Set() - const write = createWriteTool(cwd, readSet) - const edit = createEditTool(cwd, readSet) + const write = createWriteTool(sandbox, readSet) + const edit = createEditTool(sandbox, readSet) await write.execute(`w`, { path: `r.txt`, @@ -34,5 +36,6 @@ describe(`write→edit roundtrip in same wake`, () => { /Edited|Replaced/ ) expect(await readFile(join(cwd, `r.txt`), `utf-8`)).toBe(`modified content`) + await sandbox.dispose() }) }) diff --git a/packages/agents-runtime/test/write-tool.test.ts b/packages/agents-runtime/test/write-tool.test.ts index 23c31037ce..e9b4c63de6 100644 --- a/packages/agents-runtime/test/write-tool.test.ts +++ b/packages/agents-runtime/test/write-tool.test.ts @@ -3,6 +3,7 @@ import { tmpdir } from 'node:os' import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import { createWriteTool } from '../src/tools/write' +import { unrestrictedSandbox } from '../src/sandbox/unrestricted' describe(`write tool`, () => { let cwd: string @@ -16,8 +17,9 @@ describe(`write tool`, () => { }) it(`writes a new file and updates the readSet`, async () => { + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) const readSet = new Set() - const tool = createWriteTool(cwd, readSet) + const tool = createWriteTool(sandbox, readSet) const result = await tool.execute(`call-1`, { path: `hello.txt`, content: `hi there`, @@ -25,28 +27,37 @@ describe(`write tool`, () => { expect(result.content[0]).toMatchObject({ type: `text` }) const written = await readFile(join(cwd, `hello.txt`), `utf-8`) expect(written).toBe(`hi there`) + // The readSet key is a pure-string normalization (resolve against the + // working directory) shared by read/write/edit — not realpath-canonical, + // since path resolution now lives inside the sandbox. expect(readSet.has(join(cwd, `hello.txt`))).toBe(true) + await sandbox.dispose() }) it(`creates parent directories as needed`, async () => { - const tool = createWriteTool(cwd) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createWriteTool(sandbox) await tool.execute(`call-2`, { path: `nested/dir/file.txt`, content: `nested content`, }) const written = await readFile(join(cwd, `nested/dir/file.txt`), `utf-8`) expect(written).toBe(`nested content`) + await sandbox.dispose() }) it(`overwrites existing files`, async () => { - const tool = createWriteTool(cwd) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createWriteTool(sandbox) await tool.execute(`a`, { path: `f.txt`, content: `first` }) await tool.execute(`b`, { path: `f.txt`, content: `second` }) expect(await readFile(join(cwd, `f.txt`), `utf-8`)).toBe(`second`) + await sandbox.dispose() }) it(`rejects paths that escape the working directory`, async () => { - const tool = createWriteTool(cwd) + const sandbox = await unrestrictedSandbox({ workingDirectory: cwd }) + const tool = createWriteTool(sandbox) const result = await tool.execute(`x`, { path: `../escape.txt`, content: `nope`, @@ -54,5 +65,6 @@ describe(`write tool`, () => { expect((result.content[0] as { text: string }).text).toMatch( /outside the working directory/ ) + await sandbox.dispose() }) }) diff --git a/packages/agents-runtime/tsdown.config.ts b/packages/agents-runtime/tsdown.config.ts index f106db7088..322205ead3 100644 --- a/packages/agents-runtime/tsdown.config.ts +++ b/packages/agents-runtime/tsdown.config.ts @@ -1,7 +1,14 @@ import type { Options } from 'tsdown' const config: Options = { - entry: [`src/index.ts`, `src/react.ts`, `src/tools.ts`, `src/client.ts`], + entry: [ + `src/index.ts`, + `src/react.ts`, + `src/tools.ts`, + `src/sandbox.ts`, + `src/sandbox-docker.ts`, + `src/client.ts`, + ], format: [`esm`, `cjs`], external: [/^@tanstack\//, /^@durable-streams\//], dts: true, diff --git a/packages/agents-server-conformance-tests/src/electric-agents-tests.ts b/packages/agents-server-conformance-tests/src/electric-agents-tests.ts index 56962b2628..6873966f28 100644 --- a/packages/agents-server-conformance-tests/src/electric-agents-tests.ts +++ b/packages/agents-server-conformance-tests/src/electric-agents-tests.ts @@ -1887,31 +1887,59 @@ export function runElectricAgentsConformanceTests( return block?.type === `text` && block.text ? block.text : `` } + async function makeSandbox(workingDirectory: string) { + const { unrestrictedSandbox } = await import( + `../../agents-runtime/src/sandbox/unrestricted` + ) + return unrestrictedSandbox({ workingDirectory }) + } + test(`bash tool captures stdout and stderr`, async () => { const { createBashTool } = await import(`../../agents-runtime/src/tools`) - const tool = createBashTool(`/tmp`) - const result = await tool.execute(`test-tc`, { - command: `echo "hello" && echo "error" >&2`, - }) - expect(firstText(result)).toContain(`hello`) - expect(firstText(result)).toContain(`error`) - expect(result.details.exitCode).toBe(0) + const sandbox = await makeSandbox(`/tmp`) + try { + const tool = createBashTool(sandbox) + const result = await tool.execute(`test-tc`, { + command: `echo "hello" && echo "error" >&2`, + }) + expect(firstText(result)).toContain(`hello`) + expect(firstText(result)).toContain(`error`) + expect(result.details.exitCode).toBe(0) + } finally { + await sandbox.dispose() + } }) test(`bash tool enforces timeout`, async () => { const { createBashTool } = await import(`../../agents-runtime/src/tools`) - const tool = createBashTool(`/tmp`) - const result = await tool.execute(`test-tc`, { command: `sleep 60` }) - expect(result.details.timedOut).toBe(true) + const sandbox = await makeSandbox(`/tmp`) + try { + const tool = createBashTool(sandbox) + const result = await tool.execute(`test-tc`, { command: `sleep 60` }) + expect(result.details.timedOut).toBe(true) + } finally { + await sandbox.dispose() + } }, 35_000) test(`read_file rejects paths outside working directory`, async () => { const { createReadFileTool } = await import( `../../agents-runtime/src/tools` ) - const tool = createReadFileTool(`/tmp/test-workdir`) - const result = await tool.execute(`test-tc`, { path: `../../etc/passwd` }) - expect(firstText(result)).toContain(`outside the working directory`) + const fs = await import(`node:fs/promises`) + const dir = `/tmp/test-workdir-${Date.now()}` + await fs.mkdir(dir, { recursive: true }) + const sandbox = await makeSandbox(dir) + try { + const tool = createReadFileTool(sandbox) + const result = await tool.execute(`test-tc`, { + path: `../../etc/passwd`, + }) + expect(firstText(result)).toContain(`outside the working directory`) + } finally { + await sandbox.dispose() + await fs.rm(dir, { recursive: true, force: true }) + } }) test(`read_file rejects binary files`, async () => { @@ -1927,11 +1955,15 @@ export function runElectricAgentsConformanceTests( const binPath = path.join(dir, `test.bin`) await fs.writeFile(binPath, Buffer.from([0x00, 0x01, 0x02, 0xff])) - const tool = createReadFileTool(dir) - const result = await tool.execute(`test-tc`, { path: `test.bin` }) - expect(firstText(result)).toContain(`binary file`) - - await fs.rm(dir, { recursive: true }) + const sandbox = await makeSandbox(dir) + try { + const tool = createReadFileTool(sandbox) + const result = await tool.execute(`test-tc`, { path: `test.bin` }) + expect(firstText(result)).toContain(`binary file`) + } finally { + await sandbox.dispose() + await fs.rm(dir, { recursive: true }) + } }) test(`read_file rejects oversized files`, async () => { @@ -1947,11 +1979,15 @@ export function runElectricAgentsConformanceTests( // Write 600KB file (over 512KB limit) await fs.writeFile(bigPath, `x`.repeat(600 * 1024)) - const tool = createReadFileTool(dir) - const result = await tool.execute(`test-tc`, { path: `big.txt` }) - expect(firstText(result)).toContain(`too large`) - - await fs.rm(dir, { recursive: true }) + const sandbox = await makeSandbox(dir) + try { + const tool = createReadFileTool(sandbox) + const result = await tool.execute(`test-tc`, { path: `big.txt` }) + expect(firstText(result)).toContain(`too large`) + } finally { + await sandbox.dispose() + await fs.rm(dir, { recursive: true }) + } }) test(`web_search tool has correct interface`, async () => { @@ -1961,9 +1997,17 @@ export function runElectricAgentsConformanceTests( }) test(`fetch_url tool has correct interface`, async () => { - const { fetchUrlTool } = await import(`../../agents-runtime/src/tools`) - expect(fetchUrlTool.name).toBe(`fetch_url`) - expect(typeof fetchUrlTool.execute).toBe(`function`) + const { createFetchUrlTool } = await import( + `../../agents-runtime/src/tools` + ) + const sandbox = await makeSandbox(`/tmp`) + try { + const tool = createFetchUrlTool(sandbox) + expect(tool.name).toBe(`fetch_url`) + expect(typeof tool.execute).toBe(`function`) + } finally { + await sandbox.dispose() + } }) }) diff --git a/packages/agents-server-ui/src/components/ApiKeysForm.tsx b/packages/agents-server-ui/src/components/ApiKeysForm.tsx index 3e10feb585..2dd21d5e46 100644 --- a/packages/agents-server-ui/src/components/ApiKeysForm.tsx +++ b/packages/agents-server-ui/src/components/ApiKeysForm.tsx @@ -14,6 +14,7 @@ export type ApiKeysFormValues = { deepseek: string moonshot: string brave: string + e2b: string } type ApiKeyFieldId = keyof ApiKeysFormValues @@ -43,6 +44,7 @@ interface ApiKeysFormProps { showModelKeys?: boolean showBrave?: boolean modelControls?: Partial> + showE2b?: boolean /** * When `true`, persist on field blur (after the user has typed) * instead of waiting for a Save click. Hides the explicit @@ -80,6 +82,7 @@ export function ApiKeysForm({ showModelKeys = true, showBrave = true, modelControls, + showE2b = true, autoSave = false, }: ApiKeysFormProps): React.ReactElement { const [anthropic, setAnthropic] = useState(initial.anthropic) @@ -87,6 +90,7 @@ export function ApiKeysForm({ const [deepseek, setDeepseek] = useState(initial.deepseek) const [moonshot, setMoonshot] = useState(initial.moonshot) const [brave, setBrave] = useState(initial.brave) + const [e2b, setE2b] = useState(initial.e2b) const [visibleKeys, setVisibleKeys] = useState< Record >({ @@ -95,6 +99,7 @@ export function ApiKeysForm({ deepseek: false, moonshot: false, brave: false, + e2b: false, }) const [saving, setSaving] = useState(false) // Tracks the last set of values we've actually persisted, so an @@ -111,6 +116,7 @@ export function ApiKeysForm({ deepseek: false, moonshot: false, brave: false, + e2b: false, }) const canSave = @@ -119,18 +125,36 @@ export function ApiKeysForm({ openai.trim().length > 0 || deepseek.trim().length > 0 || moonshot.trim().length > 0)) || - (showBrave && brave.trim().length > 0) + (showBrave && brave.trim().length > 0) || + (showE2b && e2b.trim().length > 0) const handleSave = useCallback(async (): Promise => { if (!canSave || saving) return setSaving(true) try { - await onSave({ anthropic, openai, deepseek, moonshot, brave }) - persistedRef.current = { anthropic, openai, deepseek, moonshot, brave } + await onSave({ anthropic, openai, deepseek, moonshot, brave, e2b }) + persistedRef.current = { + anthropic, + openai, + deepseek, + moonshot, + brave, + e2b, + } } finally { setSaving(false) } - }, [anthropic, openai, deepseek, moonshot, brave, canSave, saving, onSave]) + }, [ + anthropic, + openai, + deepseek, + moonshot, + brave, + e2b, + canSave, + saving, + onSave, + ]) const handleSubmit = useCallback( async (e: React.FormEvent) => { @@ -168,9 +192,19 @@ export function ApiKeysForm({ deepseek, moonshot, brave, + e2b, }) }, - [autoSave, anthropic, openai, deepseek, moonshot, brave, persistIfDirty] + [ + autoSave, + anthropic, + openai, + deepseek, + moonshot, + brave, + e2b, + persistIfDirty, + ] ) const wrapOnChange = useCallback( @@ -301,6 +335,24 @@ export function ApiKeysForm({ } /> )} + {showE2b && ( + handleAutoSaveBlur(`e2b`)} + onToggleVisible={toggleVisible} + /> + } + /> + )} {showActions && ( {onSecondary && secondaryLabel && ( @@ -414,6 +466,21 @@ export function ApiKeysForm({ /> )} + {showE2b && ( + + + + )} {onSecondary && secondaryLabel && ( diff --git a/packages/agents-server-ui/src/components/EntityHeader.module.css b/packages/agents-server-ui/src/components/EntityHeader.module.css index b96d8e9ea8..03453358c9 100644 --- a/packages/agents-server-ui/src/components/EntityHeader.module.css +++ b/packages/agents-server-ui/src/components/EntityHeader.module.css @@ -82,6 +82,12 @@ margin-right: 4px; } +.runtimeBadges { + display: inline-flex; + align-items: center; + margin-right: 4px; +} + /* Toggled-on state for icon-only buttons (e.g. state-explorer toggle). Neutral grey so it reads as "active" without colouring the icon — matches the rest of the chrome. */ diff --git a/packages/agents-server-ui/src/components/EntityHeader.tsx b/packages/agents-server-ui/src/components/EntityHeader.tsx index ff9511f2fb..c574270f9e 100644 --- a/packages/agents-server-ui/src/components/EntityHeader.tsx +++ b/packages/agents-server-ui/src/components/EntityHeader.tsx @@ -5,6 +5,7 @@ import { Icon as UiIcon, IconButton, Text, Tooltip } from '../ui' import type { BadgeTone } from '../ui' import { MainHeader } from './MainHeader' import { InlineStatusBadge } from './InlineStatusBadge' +import { EntityRuntimeBadges } from './EntityRuntimeBadges' import { listViews, type ViewId } from '../lib/workspace/viewRegistry' import styles from './EntityHeader.module.css' import type { ElectricEntity } from '../lib/ElectricAgentsProvider' @@ -172,6 +173,10 @@ function EntityActions({ {entity.status} + + + + {showViewStrip && availableViews.map((view) => { const Icon = view.icon diff --git a/packages/agents-server-ui/src/components/EntityRuntimeBadges.module.css b/packages/agents-server-ui/src/components/EntityRuntimeBadges.module.css new file mode 100644 index 0000000000..08b2b21090 --- /dev/null +++ b/packages/agents-server-ui/src/components/EntityRuntimeBadges.module.css @@ -0,0 +1,68 @@ +.badges { + display: inline-flex; + align-items: center; + gap: 6px; +} + +/* Unstyled button so a Badge can act as a popover trigger while staying + keyboard-focusable. */ +.badgeTrigger { + display: inline-flex; + align-items: center; + padding: 0; + margin: 0; + border: none; + background: none; + cursor: pointer; + font: inherit; + color: inherit; +} + +.badgeTrigger:disabled { + cursor: default; +} + +.badgeLabel { + max-width: 14ch; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.popoverBody { + display: flex; + flex-direction: column; + gap: 8px; + min-width: 200px; + max-width: 320px; +} + +.popoverTitle { + display: flex; + align-items: center; + gap: 6px; +} + +.rows { + display: flex; + flex-direction: column; + gap: 4px; +} + +.row { + display: flex; + align-items: baseline; + justify-content: space-between; + gap: 12px; +} + +.rowLabel { + flex: 0 0 auto; +} + +.rowValue { + flex: 1 1 auto; + min-width: 0; + text-align: right; + overflow-wrap: anywhere; +} diff --git a/packages/agents-server-ui/src/components/EntityRuntimeBadges.tsx b/packages/agents-server-ui/src/components/EntityRuntimeBadges.tsx new file mode 100644 index 0000000000..4bcea9d0c7 --- /dev/null +++ b/packages/agents-server-ui/src/components/EntityRuntimeBadges.tsx @@ -0,0 +1,226 @@ +import { useMemo } from 'react' +import { Box, Server } from 'lucide-react' +import { useLiveQuery } from '@tanstack/react-db' +import { Badge, Icon, Popover, Text } from '../ui' +import { useElectricAgents } from '../lib/ElectricAgentsProvider' +import { + getEntityRunnerId, + resolveEffectiveSandbox, + resolveRunner, + runnerDisplayLabel, +} from '../lib/entityRuntime' +import { formatRelativeTime } from '../lib/formatTime' +import styles from './EntityRuntimeBadges.module.css' +import type { EffectiveSandbox } from '../lib/entityRuntime' +import type { + ElectricEntity, + ElectricRunner, +} from '../lib/ElectricAgentsProvider' + +/** + * Resolve the runner + sandbox an entity is associated with, from the + * runners collection. The sandbox is always populated — when the entity has + * no explicit profile, this reports the host "Local" default the runtime + * falls back to (see {@link resolveEffectiveSandbox}). Runner resolution + * degrades gracefully when the collection hasn't synced (callers fall back + * to the id). + */ +export function useEntityRuntimeInfo(entity: ElectricEntity): { + runnerId: string | null + runner: ElectricRunner | null + sandbox: EffectiveSandbox + sandboxKey: string | null +} { + const { runnersCollection } = useElectricAgents() + const { data: runners = [] } = useLiveQuery( + (q) => { + if (!runnersCollection) return undefined + return q.from({ r: runnersCollection }) + }, + [runnersCollection] + ) + + const runnerId = getEntityRunnerId(entity) + const explicitProfile = entity.sandbox?.profile ?? null + const sandboxKey = entity.sandbox?.key ?? null + + return useMemo(() => { + const runner = resolveRunner(runners, runnerId) + return { + runnerId, + runner, + sandbox: resolveEffectiveSandbox(runners, entity, runner), + sandboxKey, + } + // `explicitProfile` is the only entity-derived input to the sandbox + // resolution; depend on it (not the whole entity) to keep this stable. + }, [runners, runnerId, explicitProfile, sandboxKey, entity]) +} + +/** + * Runner + sandbox badges for the entity header. Each badge opens a popover + * with the corresponding runtime details. The runner badge is shown only when + * the entity pins a runner; the sandbox badge only when a sandbox is set. + */ +export function EntityRuntimeBadges({ + entity, +}: { + entity: ElectricEntity +}): React.ReactElement { + const info = useEntityRuntimeInfo(entity) + const hasRunner = info.runnerId !== null + + return ( + + {hasRunner && ( + + + + + + {runnerDisplayLabel(info.runner, info.runnerId)} + + + + } + /> + + + + + )} + + + + + {info.sandbox.label} + + + } + /> + + + + + + ) +} + +function DetailRow({ + label, + value, + mono, +}: { + label: string + value: string + mono?: boolean +}): React.ReactElement { + return ( +
+ + {label} + + + {value} + +
+ ) +} + +function relativeFromIso(iso: string | null | undefined): string | null { + if (!iso) return null + const ms = Date.parse(iso) + return Number.isNaN(ms) ? null : formatRelativeTime(ms) +} + +function RunnerDetails({ + runner, + runnerId, +}: { + runner: ElectricRunner | null + runnerId: string | null +}): React.ReactElement { + const lastSeen = relativeFromIso(runner?.last_seen_at) + return ( +
+
+ + {runnerDisplayLabel(runner, runnerId)} +
+
+ {runner ? ( + <> + + + {lastSeen && } + + + ) : ( + <> + {runnerId && } + + Runner is not currently registered. + + + )} +
+
+ ) +} + +function SandboxDetails({ + sandbox, + sandboxKey, +}: { + sandbox: EffectiveSandbox + sandboxKey: string | null +}): React.ReactElement { + return ( +
+
+ + {sandbox.label} +
+
+ + {sandbox.description && ( + + {sandbox.description} + + )} + + {sandbox.isDefault && ( + + No profile was chosen at spawn — running the host default. + + )} + {sandboxKey && } +
+
+ ) +} diff --git a/packages/agents-server-ui/src/components/EntityTimeline.tsx b/packages/agents-server-ui/src/components/EntityTimeline.tsx index 3f9803c14d..67cf85ae99 100644 --- a/packages/agents-server-ui/src/components/EntityTimeline.tsx +++ b/packages/agents-server-ui/src/components/EntityTimeline.tsx @@ -10,7 +10,7 @@ import { } from 'react' import { useNavigate } from '@tanstack/react-router' import { useLiveQuery } from '@tanstack/react-db' -import { inArray } from '@durable-streams/state' +import { eq, inArray } from '@durable-streams/state' import { measureElement as defaultMeasureElement, useVirtualizer, @@ -36,6 +36,10 @@ import { attachmentDownloadUrl, isAttachmentManifest, } from '../lib/attachments' +import { + resolveSandboxProfile, + sandboxDisplayLabel, +} from '../lib/entityRuntime' import { warmMarkdownRenderCache } from '../lib/markdownRenderCache' import { Icon, IconButton, ScrollArea, Stack, Text, Tooltip } from '../ui' import { UserMessage } from './UserMessage' @@ -898,7 +902,7 @@ export function EntityTimeline({ stopPending?: boolean onStopGeneration?: () => void }): React.ReactElement { - const { entitiesCollection } = useElectricAgents() + const { entitiesCollection, runnersCollection } = useElectricAgents() const referencedEntityUrlKey = useMemo( () => stableEntityUrlKey(entities.map((entity) => entity.url)), [entities] @@ -922,6 +926,36 @@ export function EntityTimeline({ }, [entitiesCollection, referencedEntityUrlKey] ) + // Pull the sandbox profile name for the currently-focused entity so + // we can surface it as a read-only badge next to the spawned marker. + // The sandbox choice is set at spawn time and immutable for the + // entity's lifetime, so a single read here is sufficient. + const { data: focusedEntity = [] } = useLiveQuery( + (q) => { + if (!entitiesCollection || !entityUrl) return undefined + return q + .from({ e: entitiesCollection as any }) + .where(({ e }: any) => eq(e.url, entityUrl)) + .select(({ e }: any) => ({ sandbox: e.sandbox })) + }, + [entitiesCollection, entityUrl] + ) + const sandboxProfileName = focusedEntity[0]?.sandbox?.profile ?? null + // Resolve the profile's advertised label (e.g. "Docker") rather than the raw + // profile name, matching how the header/sidebar badges render it. + const { data: runners = [] } = useLiveQuery( + (q) => { + if (!runnersCollection) return undefined + return q.from({ r: runnersCollection }) + }, + [runnersCollection] + ) + const sandboxLabel = sandboxProfileName + ? (sandboxDisplayLabel( + resolveSandboxProfile(runners, sandboxProfileName), + sandboxProfileName + ) ?? sandboxProfileName) + : null const entityStatusByUrl = useMemo(() => { const statusByUrl = new Map() for (const entity of entities) { @@ -1464,7 +1498,7 @@ export function EntityTimeline({ ref={contentRef} className={`${styles.content} mobile-chat-content`} > - + {spawnTime ? ( @@ -1486,6 +1520,15 @@ export function EntityTimeline({ )} + {sandboxLabel && ( + + + + {`sandbox · ${sandboxLabel}`} + + + + )} {displayRows.length === 0 ? ( diff --git a/packages/agents-server-ui/src/components/OnboardingModal.tsx b/packages/agents-server-ui/src/components/OnboardingModal.tsx index effc01f2a9..aec4fa8fc8 100644 --- a/packages/agents-server-ui/src/components/OnboardingModal.tsx +++ b/packages/agents-server-ui/src/components/OnboardingModal.tsx @@ -867,6 +867,9 @@ function ProviderItem({ deepseek: keysStatus.saved.deepseek ?? null, moonshot: keysStatus.saved.moonshot ?? null, brave: keysStatus.saved.brave ?? null, + // e2b is configured in Settings → Credentials, not onboarding; + // preserve any saved value so editing another key here can't wipe it. + e2b: keysStatus.saved.e2b ?? null, [provider.id]: next, } await persistApiKeys(nextKeys) diff --git a/packages/agents-server-ui/src/components/SchemaForm.tsx b/packages/agents-server-ui/src/components/SchemaForm.tsx index 72435094d0..094a8e1422 100644 --- a/packages/agents-server-ui/src/components/SchemaForm.tsx +++ b/packages/agents-server-ui/src/components/SchemaForm.tsx @@ -94,11 +94,18 @@ export function SchemaForm({ submitLabel = `Create`, onSubmit, onCancel, + extraRows, }: { schema: unknown submitLabel?: string onSubmit: (args: Record) => void onCancel?: () => void + /** + * Optional rows rendered above the schema-derived fields. Used to + * surface spawn-time controls that aren't part of the entity's + * creation_schema (e.g., the sandbox profile picker). + */ + extraRows?: React.ReactNode }): React.ReactElement { if (isObjectSchema(schema)) { return ( @@ -107,6 +114,7 @@ export function SchemaForm({ submitLabel={submitLabel} onSubmit={onSubmit} onCancel={onCancel} + extraRows={extraRows} /> ) } @@ -115,6 +123,7 @@ export function SchemaForm({ submitLabel={submitLabel} onSubmit={onSubmit} onCancel={onCancel} + extraRows={extraRows} /> ) } @@ -124,11 +133,13 @@ function ObjectSchemaForm({ submitLabel, onSubmit, onCancel, + extraRows, }: { schema: ObjectSchema submitLabel: string onSubmit: (args: Record) => void onCancel?: () => void + extraRows?: React.ReactNode }): React.ReactElement { const properties = schema.properties const requiredSet = useMemo( @@ -195,6 +206,7 @@ function ObjectSchemaForm({ return (
+ {extraRows} {Object.entries(properties).map(([key, prop], i) => ( ) => void onCancel?: () => void + extraRows?: React.ReactNode }): React.ReactElement { const [raw, setRaw] = useState(`{}`) const [parseError, setParseError] = useState(null) @@ -416,6 +430,7 @@ function RawJsonForm({ return ( + {extraRows} Arguments (JSON)