Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .changeset/agents-runtime-sandbox-primitive.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
'@electric-ax/agents-runtime': patch
'@electric-ax/agents': patch
'@electric-ax/agents-server-conformance-tests': patch
'@electric-ax/agents-desktop': patch
'@electric-ax/agents-server-ui': patch
'@electric-ax/agents-server': patch
---

Adds the `Sandbox` primitive (`@electric-ax/agents-runtime/sandbox`) for isolating LLM-driven tool calls. Three providers ship: `unrestrictedSandbox()` (explicit pass-through), `remoteSandbox({provider: 'e2b'})` (E2B as an optional peer dep), and `dockerSandbox()` (container isolation via `dockerode` as an optional peer dep).

Built-in entities (Horton, Worker) default to `unrestrictedSandbox` via the new `chooseDefaultSandbox(workingDirectory)` helper. Stronger isolation is opt-in by constructing `dockerSandbox` or `remoteSandbox` directly — `dockerSandbox` is the recommended path for multi-entity hosting.

Behavior changes folded in: bash no longer forwards `process.env` to children (removes the trivial `env`-dump leak of secrets like `$ANTHROPIC_API_KEY` — note the host-sharing `unrestricted` provider still can't fully contain secrets, e.g. via `/proc/<ppid>/environ`, so use `docker`/`remote` for untrusted or multi-tenant entities), tool descriptions corrected, and read/write/edit reject symlink escapes from the workspace.

Runtimes advertise named **sandbox profiles** (e.g. `local`, `docker`) to the agents-server; spawn requests pick a profile by name, the server validates the choice against the target runner's advertised set, and the new-session UI surfaces a picker. Internally, the built-in tool factories (`createBashTool`, `createFetchUrlTool`, etc.) now route their filesystem and network access through the active `Sandbox`.
2 changes: 2 additions & 0 deletions packages/agents-desktop/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
"@electric-sql/client": "^1.5.20",
"@mixmark-io/domino": "^2.2.0",
"better-sqlite3": "^12.9.0",
"dockerode": "^5.0.0",
"e2b": ">=2.0.0",
"fix-path": "^4.0.0",
"jsdom": "^28.1.0",
"pino": "^10.3.1",
Expand Down
16 changes: 12 additions & 4 deletions packages/agents-desktop/src/credentials/api-keys.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export const EMPTY_API_KEYS: ApiKeys = {
deepseek: null,
moonshot: null,
brave: null,
e2b: null,
}

export const GLOBAL_API_KEYS_REF = `api-keys:global`
Expand All @@ -19,6 +20,7 @@ export function captureEnvApiKeys(env: NodeJS.ProcessEnv): ApiKeys {
deepseek: env.DEEPSEEK_API_KEY?.trim() || null,
moonshot: env.MOONSHOT_API_KEY?.trim() || null,
brave: env.BRAVE_SEARCH_API_KEY?.trim() || null,
e2b: env.E2B_API_KEY?.trim() || null,
}
}

Expand All @@ -36,6 +38,7 @@ export function normalizeApiKeys(value: unknown): ApiKeys {
deepseek: pick(maybe.deepseek),
moonshot: pick(maybe.moonshot),
brave: pick(maybe.brave),
e2b: pick(maybe.e2b),
}
}

Expand All @@ -45,7 +48,8 @@ export function hasAnyApiKey(keys: ApiKeys): boolean {
keys.openai ||
keys.deepseek ||
keys.moonshot ||
keys.brave
keys.brave ||
keys.e2b
)
}

Expand Down Expand Up @@ -93,6 +97,7 @@ export function applyApiKeysToEnv(
| `DEEPSEEK_API_KEY`
| `MOONSHOT_API_KEY`
| `BRAVE_SEARCH_API_KEY`
| `E2B_API_KEY`
): void => {
const next = value ?? fallback
if (next) {
Expand All @@ -106,6 +111,7 @@ export function applyApiKeysToEnv(
resolveSlot(saved.deepseek, launchEnv.deepseek, `DEEPSEEK_API_KEY`)
resolveSlot(saved.moonshot, launchEnv.moonshot, `MOONSHOT_API_KEY`)
resolveSlot(saved.brave, launchEnv.brave, `BRAVE_SEARCH_API_KEY`)
resolveSlot(saved.e2b, launchEnv.e2b, `E2B_API_KEY`)
}

export type ApiKeyStatusDeps = {
Expand All @@ -119,8 +125,8 @@ export async function getApiKeysStatus(
deps: ApiKeyStatusDeps
): Promise<ApiKeysStatus> {
const saved = deps.apiKeys
// Brave is optional (falls back to Anthropic built-in search), so it doesn't
// count toward "the app is configured".
// Brave and E2B are optional: search falls back to Anthropic's built-in tool,
// and E2B only enables the remote sandbox profile.
const hasAnyKey = Boolean(
saved.anthropic || saved.openai || saved.deepseek || saved.moonshot
)
Expand All @@ -130,6 +136,7 @@ export async function getApiKeysStatus(
deepseek: saved.deepseek ? null : deps.launchEnv.deepseek,
moonshot: saved.moonshot ? null : deps.launchEnv.moonshot,
brave: saved.brave ? null : deps.launchEnv.brave,
e2b: saved.e2b ? null : deps.launchEnv.e2b,
}
const codex = await deps.getCodexStatus()
const modelPicker = createModelPickerStatus({
Expand Down Expand Up @@ -167,7 +174,8 @@ export async function setApiKeys(
normalized.openai !== deps.apiKeys.openai ||
normalized.deepseek !== deps.apiKeys.deepseek ||
normalized.moonshot !== deps.apiKeys.moonshot ||
normalized.brave !== deps.apiKeys.brave
normalized.brave !== deps.apiKeys.brave ||
normalized.e2b !== deps.apiKeys.e2b
Object.assign(deps.apiKeys, normalized)
await saveApiKeysToSecret(deps.secretStore, deps.apiKeysRef(), deps.apiKeys)
applyApiKeysToEnv(deps.apiKeys, deps.launchEnv, deps.env)
Expand Down
1 change: 1 addition & 0 deletions packages/agents-desktop/src/shared/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ export type ApiKeys = {
deepseek: string | null
moonshot: string | null
brave: string | null
e2b: string | null
}

export type ModelPickerChoice = {
Expand Down
9 changes: 9 additions & 0 deletions packages/agents-desktop/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ const MUST_EXTERNALIZE = new Set([
`jsdom`,
`pino`,
`pino-pretty`,
// `inlineDynamicImports` would inline the lazy `dockerode` import (and its
// native `ssh2`/`cpu-features` deps), which rollup can't bundle. Externalize
// the chain: it's an optional runtime dep, gracefully absent otherwise.
`dockerode`,
`ssh2`,
`cpu-features`,
// Same treatment for the lazy `e2b` import behind the remote sandbox
// profile — an optional runtime dep, required from node_modules at runtime.
`e2b`,
])

function externalizeBareImports(
Expand Down
30 changes: 30 additions & 0 deletions packages/agents-runtime/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,32 @@
"default": "./dist/tools.cjs"
}
},
"./sandbox": {
"import": {
"types": "./dist/sandbox.d.ts",
"default": "./dist/sandbox.js"
},
"require": {
"types": "./dist/sandbox.d.cts",
"default": "./dist/sandbox.cjs"
}
},
"./sandbox/docker": {
"import": {
"types": "./dist/sandbox-docker.d.ts",
"default": "./dist/sandbox-docker.js"
},
"require": {
"types": "./dist/sandbox-docker.d.cts",
"default": "./dist/sandbox-docker.cjs"
}
},
"./package.json": "./package.json"
},
"peerDependencies": {
"@tanstack/react-db": ">=0.1.78",
"dockerode": ">=5.0.0",
"e2b": ">=2.0.0",
"react": ">=18"
},
"peerDependenciesMeta": {
Expand All @@ -76,6 +98,12 @@
},
"@tanstack/react-db": {
"optional": true
},
"dockerode": {
"optional": true
},
"e2b": {
"optional": true
}
},
"dependencies": {
Expand All @@ -102,10 +130,12 @@
},
"devDependencies": {
"@durable-streams/server": "^0.3.5",
"@types/dockerode": "^4.0.1",
"@types/jsdom": "^27.0.0",
"@types/node": "^22.19.15",
"@types/turndown": "^5.0.6",
"@vitest/coverage-v8": "^3.2.4",
"dockerode": "^5.0.0",
"tsdown": "^0.9.0",
"typescript": "^5.9.3",
"vitest": "^3.2.4"
Expand Down
3 changes: 3 additions & 0 deletions packages/agents-runtime/src/context-factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { CACHE_TIERS } from './types'
import { composeToolsWithProviders } from './tool-providers'
import type { HydratedEventSourceWake } from './event-sources'
import type { ChangeEvent } from '@durable-streams/state'
import type { Sandbox } from './sandbox/types'
import type {
AgentConfig,
AgentHandle,
Expand Down Expand Up @@ -71,6 +72,7 @@ export interface HandlerContextConfig<TState extends StateProxy = StateProxy> {
state: TState
actions: Record<string, (...args: Array<unknown>) => unknown>
electricTools: Array<AgentTool>
sandbox: Sandbox
events: Array<ChangeEvent>
writeEvent: (event: ChangeEvent) => void
wakeSession: WakeSession
Expand Down Expand Up @@ -708,6 +710,7 @@ export function createHandlerContext<TState extends StateProxy = StateProxy>(
actions: config.actions,
electricTools: config.electricTools,
signal: config.runSignal ?? new AbortController().signal,
sandbox: config.sandbox,
useAgent(cfg) {
agentConfig = cfg
return agent
Expand Down
59 changes: 55 additions & 4 deletions packages/agents-runtime/src/create-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { passthrough } from './entity-schema'
import { runtimeLog } from './log'
import { appendPathToUrl } from './url'
import { verifyWebhookSignature } from './webhook-signature'
import type { SandboxProfile } from './sandbox/types'
import type { EntityRegistry } from './define-entity'
import type { IncomingMessage, ServerResponse } from 'node:http'
import type { WebhookSignatureVerifierConfig } from './webhook-signature'
Expand Down Expand Up @@ -105,6 +106,15 @@ export interface RuntimeRouterConfig {
onWakeError?: (error: Error) => boolean | void
/** Max number of concurrent entity-type registrations (default: 8). */
registrationConcurrency?: number
/**
* Sandbox profiles registered by this runtime. Each profile is a
* `(name, label, description?, factory)` tuple — the factory stays
* local to the runtime; only the descriptive fields are advertised
* to the agents-server (via the runner registration) and surfaced
* in the UI picker. Spawn payloads pass `sandbox.profile` and the
* server validates against the target runner's advertised set.
*/
sandboxProfiles?: ReadonlyArray<SandboxProfile>
/**
* Public URL of this runtime, forwarded to the agents-server so it can be
* included in GET /api/runtimes. If omitted the runtime is registered but
Expand Down Expand Up @@ -161,6 +171,20 @@ export interface RuntimeRouter {
/** Names of all registered entity types */
readonly typeNames: Array<string>

/**
* Wire-shape descriptors for sandbox profiles registered on this
* runtime. Used by the runner registration to advertise the profile
* set to the agents-server (factory closures are intentionally not
* included).
*/
readonly sandboxProfileDescriptors: Array<{
name: string
label: string
description?: string
/** True for off-host (remote-provider) profiles; see SandboxProfile.remote. */
remote?: boolean
}>

/** Register all entity types with the durable streams server */
registerTypes: () => Promise<void>
}
Expand Down Expand Up @@ -201,17 +225,31 @@ export function createRuntimeRouter(
webhookSignature,
} = normalized

const getRegisteredType = (name: string) =>
registry ? registry.get(name) : getEntityType(name)
const getRegisteredTypes = () =>
registry ? registry.list() : listEntityTypes()

// Index the runtime's profiles by name. Duplicate names are a
// configuration bug — fail fast rather than silently dropping one.
const sandboxProfiles = new Map<string, SandboxProfile>()
for (const profile of config.sandboxProfiles ?? []) {
if (sandboxProfiles.has(profile.name)) {
throw new Error(
`[agent-runtime] duplicate sandbox profile name "${profile.name}" registered on createRuntimeRouter`
)
}
sandboxProfiles.set(profile.name, profile)
}

const wakeConfig: ProcessWakeConfig = {
baseUrl,
registry,
createElectricTools,
idleTimeout,
heartbeatInterval,
sandboxProfiles,
}
const getRegisteredType = (name: string) =>
registry ? registry.get(name) : getEntityType(name)
const getRegisteredTypes = () =>
registry ? registry.list() : listEntityTypes()
const debugRegistrationTiming =
process.env.ELECTRIC_AGENTS_DEBUG_REGISTRATION_TIMING === `1`
const pendingWakes = new Set<Promise<void>>()
Expand Down Expand Up @@ -541,6 +579,17 @@ export function createRuntimeRouter(
}
}

const sandboxProfileDescriptors = [...sandboxProfiles.values()].map(
(profile) => ({
name: profile.name,
label: profile.label,
...(profile.description !== undefined && {
description: profile.description,
}),
...(profile.remote !== undefined && { remote: profile.remote }),
})
)

return {
handleRequest,
handleWebhookRequest,
Expand All @@ -553,6 +602,7 @@ export function createRuntimeRouter(
get typeNames() {
return getRegisteredTypes().map((entry) => entry.name)
},
sandboxProfileDescriptors,
registerTypes,
}
}
Expand Down Expand Up @@ -600,6 +650,7 @@ export function createRuntimeHandler(
get typeNames() {
return router.typeNames
},
sandboxProfileDescriptors: router.sandboxProfileDescriptors,
registerTypes: router.registerTypes,
}
}
Expand Down
Loading
Loading