From f67e189b8eeba0540b157eb88450ff68f33e09da Mon Sep 17 00:00:00 2001 From: Michael Ramos Date: Sat, 4 Jul 2026 15:20:07 -0700 Subject: [PATCH 1/4] feat(guide): per-file summary on guide diff refs Each diffs[] entry now carries a required (schema-enforced) 1-2 sentence summary of the semantic change in that file, written from the diff hunks alone. Sanitizer passes it through when it's a non-blank string and omits it otherwise -- a missing summary never drops the ref or fails the guide. Rendered as a muted inline-markdown line above each diff in the guide; marker-engine contract and demo data updated to match. --- .../components/guide/GuideDiffSection.tsx | 6 +++ packages/review-editor/demoGuide.ts | 5 +++ packages/server/guide/guide-review.test.ts | 23 +++++++++++ packages/server/guide/guide-review.ts | 41 +++++++++++++------ packages/shared/guide.ts | 10 +++-- 5 files changed, 69 insertions(+), 16 deletions(-) diff --git a/packages/review-editor/components/guide/GuideDiffSection.tsx b/packages/review-editor/components/guide/GuideDiffSection.tsx index d8e966c18..f3d0b4ad2 100644 --- a/packages/review-editor/components/guide/GuideDiffSection.tsx +++ b/packages/review-editor/components/guide/GuideDiffSection.tsx @@ -1,5 +1,6 @@ import React, { useMemo } from 'react'; import type { GuideDiffRef } from '@plannotator/shared/guide'; +import { renderInlineMarkdown } from '../../utils/renderInlineMarkdown'; import { DiffViewer } from '../DiffViewer'; import { useReviewState } from '../../dock/ReviewStateContext'; import { annotationMatchesPrScope } from '../../utils/annotationScope'; @@ -80,6 +81,11 @@ export const GuideDiffSection: React.FC = ({ diffRef, isF return (
+ {diffRef.summary && ( +

+ {renderInlineMarkdown(diffRef.summary)} +

+ )} {/* DiffViewer is `h-full flex flex-col` internally (FileHeader + its own scrolling body) — it fills whatever height its parent gives it rather than growing to fit content. The guide page is one continuous scroll diff --git a/packages/review-editor/demoGuide.ts b/packages/review-editor/demoGuide.ts index bccf53402..78cb1472e 100644 --- a/packages/review-editor/demoGuide.ts +++ b/packages/review-editor/demoGuide.ts @@ -24,6 +24,7 @@ export const DEMO_GUIDE: CodeGuideData = { diffs: [ { file: 'src/components/Button.tsx', + summary: 'Guards `onClick` behind the `disabled` prop and forwards native `disabled` to the DOM element.', }, ], }, @@ -34,9 +35,11 @@ export const DEMO_GUIDE: CodeGuideData = { diffs: [ { file: 'src/hooks/useAuth.ts', + summary: 'Replaces the untyped fetch calls with `api.auth.*` and adds `error` state so failed logins surface in the UI.', }, { file: 'src/services/api.ts', + summary: 'New typed request client with an `ApiError` class carrying HTTP status and error code.', }, ], }, @@ -47,9 +50,11 @@ export const DEMO_GUIDE: CodeGuideData = { diffs: [ { file: 'src/config/settings.ts', + summary: 'Indentation-only reformat of the config getters; no behavior change.', }, { file: 'src/components/Modal.tsx', + summary: 'New portal-based dialog component; added but not yet wired up anywhere.', }, ], }, diff --git a/packages/server/guide/guide-review.test.ts b/packages/server/guide/guide-review.test.ts index 0b51444f1..f25a9df3f 100644 --- a/packages/server/guide/guide-review.test.ts +++ b/packages/server/guide/guide-review.test.ts @@ -65,6 +65,29 @@ describe("validateGuideOutput", () => { expect(result.guide.unplacedFiles?.sort()).toEqual(["src/b.ts", "src/c.ts"]); }); + it("carries per-file summaries through, omitting blank/non-string ones without dropping the ref", () => { + const raw = JSON.parse( + guideJson([ + { + title: "S", + overview: "o", + diffs: [ + { file: "src/a.ts", summary: "Adds the thing." }, + { file: "src/b.ts", summary: " " }, + { file: "src/c.ts", summary: 42 }, + ], + }, + ]), + ); + const result = validateGuideOutput(raw, FILES); + if ("error" in result) throw new Error(result.error); + expect(result.guide.sections[0].diffs).toEqual([ + { file: "src/a.ts", summary: "Adds the thing." }, + { file: "src/b.ts" }, + { file: "src/c.ts" }, + ]); + }); + it("coerces non-string title/intent from prompt-only marker engines", () => { const raw = JSON.parse(guideJson([{ title: "S", overview: "o", diffs: [{ file: "src/a.ts" }] }])); raw.title = 42; diff --git a/packages/server/guide/guide-review.ts b/packages/server/guide/guide-review.ts index 23235ceab..ff8480303 100644 --- a/packages/server/guide/guide-review.ts +++ b/packages/server/guide/guide-review.ts @@ -44,8 +44,9 @@ export const GUIDE_SCHEMA_JSON = JSON.stringify({ type: "object", properties: { file: { type: "string" }, + summary: { type: "string" }, }, - required: ["file"], + required: ["file", "summary"], additionalProperties: false, }, }, @@ -186,12 +187,20 @@ never shares a chapter. - A tiny fenced code block (2-5 lines) only when code says it better than a sentence, e.g. a new API shape. Never paste diff hunks; the diffs render next to the overview already. -- **diffs**: one or more file references. Each has exactly one field, - **file**: the EXACT repo-relative path as it appears in the diff (or in - the Changed files list, if provided). Copy it, never invent it, never - abbreviate or normalize it (no leading/trailing slash changes, no case - changes). All explanation lives in the overview; there are no per-file - captions. +- **diffs**: one or more file references. Each has two fields: + - **file**: the EXACT repo-relative path as it appears in the diff (or in + the Changed files list, if provided). Copy it, never invent it, never + abbreviate or normalize it (no leading/trailing slash changes, no case + changes). + - **summary**: 1-2 sentences describing the semantic change in THIS file, + written from the diff hunks you already have. Say what the change does + ("extracts the staging logic into a tri-state override map"), not where + it sits ("modifies lines 30-80"). Do NOT open the file, search the + codebase, or do any per-file investigation to write it. Do not repeat + the section overview: the overview carries the why and the + implications; the summary says what this specific file contributes. + For a trivial change (import bump, rename fallout), one short clause + is enough. ### unplacedFiles Always include unplacedFiles. Use an empty array when every changed file is @@ -480,7 +489,7 @@ ${markerOpen(nonce)} "title": "Guide marker contract", "overview": "Explains what changed here, why it exists, and its key implications, in 2-6 sentences.", "diffs": [ - { "file": "packages/server/guide/guide-review.ts" } + { "file": "packages/server/guide/guide-review.ts", "summary": "Adds the marker output contract so engines without a schema flag return the same guide JSON." } ] } ], @@ -497,9 +506,12 @@ Schema: and its key implications. Backtick file names/symbols/config keys; bold the single key clause; bullets only for 3+ parallel changes; a tiny fenced code block only when code says it better than prose - - diffs: array of objects, each with exactly one field: + - diffs: array of objects, each with two fields: - file: string — the EXACT repo-relative path as it appears in the diff or the Changed files list; never invented, abbreviated, or re-cased + - summary: string — 1-2 sentences on the semantic change in this file, + written from the diff hunks alone (no per-file investigation); what the + change does, not which lines it touches; never a repeat of the overview - unplacedFiles: array of strings, always present — changed files that don't belong in any section; use an empty array when every changed file is placed @@ -721,11 +733,16 @@ function sanitizeGuideSection(raw: unknown): GuideSection | null { const s = raw as Record; const title = typeof s.title === "string" ? s.title : ""; const overview = typeof s.overview === "string" ? s.overview : ""; - // Map to `{ file }` only — stray model-emitted fields never reach the client. + // Map to `{ file, summary? }` only — stray model-emitted fields never reach + // the client. A missing/non-string/blank summary is simply omitted (the UI + // renders nothing for it), never a reason to drop the ref or fail the guide. const diffs: GuideDiffRef[] = Array.isArray(s.diffs) ? s.diffs - .filter((d): d is { file: string } => !!d && typeof d === "object" && typeof (d as Record).file === "string") - .map((d) => ({ file: d.file })) + .filter((d): d is Record & { file: string } => !!d && typeof d === "object" && typeof (d as Record).file === "string") + .map((d) => { + const summary = typeof d.summary === "string" && d.summary.trim().length > 0 ? d.summary : undefined; + return summary ? { file: d.file, summary } : { file: d.file }; + }) : []; if (title.trim().length === 0 && overview.trim().length === 0 && diffs.length === 0) return null; // Every surviving section gets a non-empty title: a diffs-only section diff --git a/packages/shared/guide.ts b/packages/shared/guide.ts index 13bd50fa8..90d1d920c 100644 --- a/packages/shared/guide.ts +++ b/packages/shared/guide.ts @@ -1,9 +1,11 @@ export interface GuideDiffRef { - /** Repo-relative path; must match a DiffFile.path in the current review patch. - * Deliberately the only field: the section overview carries all semantics — - * per-diff captions/line hints proved to be noise the model shouldn't spend - * generation effort on. */ + /** Repo-relative path; must match a DiffFile.path in the current review patch. */ file: string; + /** 1-2 sentence semantic description of what changed in THIS file, written + * from the diff hunks alone (no investigation). Required by the JSON schema + * for schema-enforced engines; optional here so a marker engine that omits + * it still yields a valid guide — the UI simply renders nothing. */ + summary?: string; } export interface GuideSection { From 8b033f07bcf7db6fb1bf78211ec1b4f8c792d3dd Mon Sep 17 00:00:00 2001 From: Michael Ramos Date: Sat, 4 Jul 2026 15:21:58 -0700 Subject: [PATCH 2/4] fix(guide): repair prompt must not invent missing summaries The guide schema now requires summary, so a schema-enforced repair of a payload that lacks them (marker-engine output) would force the model to fabricate captions with no diff in sight. Instruct it to fill missing required fields with an empty string instead; the sanitizer already renders nothing for blanks. --- packages/server/guide/guide-review.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/server/guide/guide-review.ts b/packages/server/guide/guide-review.ts index ff8480303..259fef7ab 100644 --- a/packages/server/guide/guide-review.ts +++ b/packages/server/guide/guide-review.ts @@ -540,7 +540,7 @@ export function composeGuideMarkerPrompt(userMessage: string, nonce: string): st /** System framing shared verbatim across all three repair engine paths. */ function buildGuideRepairFraming(): string { - return "The JSON below was produced for the schema that follows but is malformed or structurally invalid. Output ONLY the corrected JSON. Fix structure and syntax; NEVER change the content: titles, overviews, file paths stay exactly as written unless syntactically impossible."; + return "The JSON below was produced for the schema that follows but is malformed or structurally invalid. Output ONLY the corrected JSON. Fix structure and syntax; NEVER change the content: titles, overviews, file paths, summaries stay exactly as written unless syntactically impossible. If a required field is missing from the payload (e.g. a diff entry's summary), fill it with an empty string; never invent content."; } /** Repair prompt for the schema-enforced engines (Claude --json-schema, From 3625e894ac60f5809d1d83babf7c9bb0015e255f Mon Sep 17 00:00:00 2001 From: Michael Ramos Date: Sat, 4 Jul 2026 15:58:01 -0700 Subject: [PATCH 3/4] feat(agents): GitHub Copilot CLI as a marker engine for review + guide jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds copilot as the fourth marker engine (no schema flag, so it uses the nonce-tagged marker-block contract like Cursor/OpenCode/Pi): - marker-review.ts: COPILOT_ENGINE — 'copilot --output-format json' JSONL stream (assistant.message carries the assembled text; deltas skipped), models discovered from 'copilot help config', live-log formatting for tool.execution_start/complete. Non-interactive posture: --no-ask-user auto-denies unallowed tools, --deny-tool=write, read-only-oriented shell allowlist (git/gh/glab/jj/wc), builtin MCPs and auto-update disabled. - Exported MarkerEngineId and replaced every 'cursor'|'opencode'|'pi' cast with it (Bun server, Pi server mirror, guide-review) so the next engine is a two-edit change. - agent-jobs (both runtimes): copilot in SERVER_BUILT_PROVIDERS; capability entry + model discovery come free from the MARKER_ENGINES loop. - UI: copilot review/guide engine with per-surface model settings (useAgentSettings), AgentsTab launch + config rows, GuideEmptyState launcher, job-detail labels, CopilotIcon (currentColor, official mark). - Tests: argv/read-only flags, help-config model parsing, stream reduction, full marker pipeline; profile-map expectations widened. Verified live: composed review prompt through the real copilot binary, marker block parsed, seeded bug found. --- AGENTS.md | 2 +- apps/pi-extension/server/agent-jobs.ts | 10 +- apps/pi-extension/server/serverReview.ts | 7 +- .../components/guide/GuideEmptyState.tsx | 22 ++- .../dock/panels/ReviewAgentJobDetailPanel.tsx | 4 +- packages/server/agent-jobs.ts | 10 +- packages/server/guide/guide-review.ts | 11 +- packages/server/marker-review.test.ts | 52 ++++++ packages/server/marker-review.ts | 159 +++++++++++++++++- packages/server/review.ts | 7 +- packages/ui/components/AgentsTab.tsx | 58 ++++++- packages/ui/components/icons/AgentIcons.tsx | 12 ++ packages/ui/hooks/useAgentSettings.test.ts | 4 + packages/ui/hooks/useAgentSettings.ts | 40 ++++- 14 files changed, 364 insertions(+), 34 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 73f802432..2d9be25c1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -327,7 +327,7 @@ During normal plan review, an Archive sidebar tab provides the same browsing via | `/api/external-annotations` | POST | Add external annotations (single or batch `{ annotations: [...] }`) | | `/api/external-annotations` | PATCH | Update fields on a single annotation (`?id=`) | | `/api/external-annotations` | DELETE | Remove by `?id=`, `?source=`, or clear all | -| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour, guide, cursor, opencode, pi) | +| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour, guide, cursor, opencode, pi, copilot) | | `/api/agents/review-profiles` | GET | List launchable review profiles (enabled skills + builtin default) | | `/api/agents/skills` | GET | List all discovered skills for the add-a-review picker (each flagged `enabled`) | | `/api/agents/review-skills` | POST | Enable a skill as a review (body: `{ name }`); writes `review-skills.json` | diff --git a/apps/pi-extension/server/agent-jobs.ts b/apps/pi-extension/server/agent-jobs.ts index 6990ce431..a1649e50c 100644 --- a/apps/pi-extension/server/agent-jobs.ts +++ b/apps/pi-extension/server/agent-jobs.ts @@ -28,6 +28,7 @@ import { MARKER_ENGINES, formatMarkerLogEvent, type MarkerEngine, + type MarkerEngineId, type MarkerModel, } from "../generated/marker-review.js"; import { json, parseBody } from "./helpers.js"; @@ -51,6 +52,7 @@ const SERVER_BUILT_PROVIDERS: ReadonlySet = new Set([ "cursor", "opencode", "pi", + "copilot", ]); // --------------------------------------------------------------------------- @@ -179,7 +181,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) { const markerModelsCache = new Map(); async function buildCapabilitiesResponse(): Promise { const providers = await Promise.all(capabilities.map(async (c) => { - const engine = MARKER_ENGINES[c.id as "cursor" | "opencode" | "pi"]; + const engine = MARKER_ENGINES[c.id as MarkerEngineId]; if (!engine || !c.available) return c; let models = markerModelsCache.get(engine.id); if (!models) { @@ -294,8 +296,8 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) { // Guide jobs keep provider: "guide" and carry the marker engine on // spawnOptions.engine instead — fall back to that lookup so guide // logs get the same readable formatting as review jobs. - const markerEngine = MARKER_ENGINES[provider as "cursor" | "opencode" | "pi"] - ?? (spawnOptions?.engine ? MARKER_ENGINES[spawnOptions.engine as "cursor" | "opencode" | "pi"] : undefined); + const markerEngine = MARKER_ENGINES[provider as MarkerEngineId] + ?? (spawnOptions?.engine ? MARKER_ENGINES[spawnOptions.engine as MarkerEngineId] : undefined); if (markerEngine) { const formatted = formatMarkerLogEvent(line, markerEngine); if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' }); @@ -390,7 +392,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) { // that fail-closed rule too: both are single-shot, all-or-nothing // outputs with nothing meaningful partially ingested, so an // unexpected throw means the whole result is unusable. - if (MARKER_ENGINES[provider as "cursor" | "opencode" | "pi"]) { + if (MARKER_ENGINES[provider as MarkerEngineId]) { entry.info.status = "failed"; entry.info.error = err instanceof Error ? err.message : `${provider} result ingestion failed`; } else if (provider === "tour" || provider === "guide") { diff --git a/apps/pi-extension/server/serverReview.ts b/apps/pi-extension/server/serverReview.ts index 3f3bc9161..322b03839 100644 --- a/apps/pi-extension/server/serverReview.ts +++ b/apps/pi-extension/server/serverReview.ts @@ -120,6 +120,7 @@ import { transformMarkerFindings, makeMarkerNonce, extractMarkerNonce, + type MarkerEngineId, } from "../generated/marker-review.js"; import { WorkspaceReviewSession, @@ -939,7 +940,7 @@ export async function startReviewServer(options: { // id itself for claude/codex. const failedEngine = typeof config?.engine === "string" && config.engine ? config.engine : undefined; const failedEngineBinary = failedEngine - ? MARKER_ENGINES[failedEngine as "cursor" | "opencode" | "pi"]?.binary ?? failedEngine + ? MARKER_ENGINES[failedEngine as MarkerEngineId]?.binary ?? failedEngine : undefined; const repairEngine = failedEngine && commandExists(failedEngineBinary!) @@ -1027,7 +1028,7 @@ export async function startReviewServer(options: { // no cwd flag — it always uses the process's actual cwd, which spawnJob // already sets from this same cwd). // captureStdout is required: the marker block comes back on stdout NDJSON. - const markerEngine = MARKER_ENGINES[provider as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[provider as MarkerEngineId]; if (markerEngine) { const model = typeof config?.model === "string" && config.model ? config.model : undefined; const thinking = typeof config?.thinking === "string" && config.thinking ? config.thinking : undefined; @@ -1141,7 +1142,7 @@ export async function startReviewServer(options: { // an exit-0 job marked done). Mirrors the Tour fail-closed pattern below. // Findings carry nullable file/line, classified into line/whole-file/ // general by transformMarkerFindings — nothing is dropped (same as Claude). - const markerEngine = MARKER_ENGINES[job.provider as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[job.provider as MarkerEngineId]; if (markerEngine) { // Recover the per-job nonce embedded in the prompt; without it no block // can be trusted, so parse fails closed below. diff --git a/packages/review-editor/components/guide/GuideEmptyState.tsx b/packages/review-editor/components/guide/GuideEmptyState.tsx index cae2f8dab..37942ca89 100644 --- a/packages/review-editor/components/guide/GuideEmptyState.tsx +++ b/packages/review-editor/components/guide/GuideEmptyState.tsx @@ -23,6 +23,7 @@ const GUIDE_ENGINES = Object.keys(REVIEW_ENGINE_LABEL) as ReviewEngine[]; const CURSOR_FALLBACK = [{ value: 'auto', label: 'Auto' }]; const OPENCODE_FALLBACK = [{ value: '', label: 'Default' }]; const PI_FALLBACK = [{ value: '', label: 'Default' }]; +const COPILOT_FALLBACK = [{ value: '', label: 'Default' }]; type Option = { value: string; label: string }; @@ -169,6 +170,7 @@ export const GuideEmptyState: React.FC = ({ capabilities, guideOpencodeModel, guidePiModel, guidePiThinking, + guideCopilotModel, setGuideEngine, setGuideClaudeModel, setGuideClaudeEffort, @@ -178,6 +180,7 @@ export const GuideEmptyState: React.FC = ({ capabilities, setGuideOpencodeModel, setGuidePiModel, setGuidePiThinking, + setGuideCopilotModel, } = useAgentSettings(); const [launching, setLaunching] = useState(false); @@ -277,7 +280,7 @@ export const GuideEmptyState: React.FC = ({ capabilities, // saved-default user with a blank pill and no way back after picking a // concrete model. Cursor REPLACES: its discovered list natively includes // 'auto', so prepending would duplicate it. - const markerModels = (id: 'cursor' | 'opencode' | 'pi', fallback: Option[]): Option[] => { + const markerModels = (id: 'cursor' | 'opencode' | 'pi' | 'copilot', fallback: Option[]): Option[] => { const models = capabilities?.providers.find((p) => p.id === id)?.models; if (!models || models.length === 0) return fallback; const discovered = models.map((m) => ({ value: m.id, label: m.label })); @@ -289,6 +292,7 @@ export const GuideEmptyState: React.FC = ({ capabilities, const cursorOptions = markerModels('cursor', CURSOR_FALLBACK); const opencodeOptions = markerModels('opencode', OPENCODE_FALLBACK); const piOptions = markerModels('pi', PI_FALLBACK); + const copilotOptions = markerModels('copilot', COPILOT_FALLBACK); // AgentsTab reconciles the saved guide Cursor/OpenCode/Pi model back to the // catalog head via effects when the live catalog no longer contains it (see @@ -307,6 +311,7 @@ export const GuideEmptyState: React.FC = ({ capabilities, const effectiveCursorModel = effectiveModel(guideCursorModel, cursorOptions); const effectiveOpencodeModel = effectiveModel(guideOpencodeModel, opencodeOptions); const effectivePiModel = effectiveModel(guidePiModel, piOptions); + const effectiveCopilotModel = effectiveModel(guideCopilotModel, copilotOptions); const modelPicker: { value: string; options: Option[]; onChange: (v: string) => void } = engine === 'claude' @@ -317,7 +322,9 @@ export const GuideEmptyState: React.FC = ({ capabilities, ? { value: effectiveCursorModel, options: cursorOptions, onChange: setGuideCursorModel } : engine === 'opencode' ? { value: effectiveOpencodeModel, options: opencodeOptions, onChange: setGuideOpencodeModel } - : { value: effectivePiModel, options: piOptions, onChange: setGuidePiModel }; + : engine === 'copilot' + ? { value: effectiveCopilotModel, options: copilotOptions, onChange: setGuideCopilotModel } + : { value: effectivePiModel, options: piOptions, onChange: setGuidePiModel }; const canLaunch = guideAvailable && availableEngines.length > 0 && !launching; @@ -350,7 +357,14 @@ export const GuideEmptyState: React.FC = ({ capabilities, ...(effectivePiModel ? { model: effectivePiModel } : {}), thinking: guidePiThinking, } - : { + : engine === 'copilot' + ? { + provider: 'guide', + label: 'Guided Review', + engine: 'copilot', + ...(effectiveCopilotModel ? { model: effectiveCopilotModel } : {}), + } + : { provider: 'guide', label: 'Guided Review', engine, @@ -434,7 +448,7 @@ export const GuideEmptyState: React.FC = ({ capabilities, {!guideAvailable || availableEngines.length === 0 ? (

- Guided review needs an agent CLI (Claude, Codex, Cursor, OpenCode, or Pi) available on this machine. + Guided review needs an agent CLI (Claude, Codex, Cursor, OpenCode, Pi, or Copilot) available on this machine.

) : ( <> diff --git a/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx b/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx index 0643036d1..b507d2f03 100644 --- a/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx +++ b/packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx @@ -431,10 +431,10 @@ function ProviderPill({ provider, engine, model }: { provider: string; engine?: // Guide's engine union is wider than Tour's (marker engines included) — // only show the model for Claude, mirroring Tour's own "skip it for // engines with verbose/technical model ids" convention. - const engineLabel = engine === 'codex' ? 'Codex' : engine === 'cursor' ? 'Cursor' : engine === 'opencode' ? 'OpenCode' : engine === 'pi' ? 'Pi' : 'Claude'; + const engineLabel = engine === 'codex' ? 'Codex' : engine === 'cursor' ? 'Cursor' : engine === 'opencode' ? 'OpenCode' : engine === 'pi' ? 'Pi' : engine === 'copilot' ? 'Copilot' : 'Claude'; label = model && engine === 'claude' ? `Guide · ${engineLabel} ${model.charAt(0).toUpperCase() + model.slice(1)}` : `Guide · ${engineLabel}`; } else { - label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : provider === 'cursor' ? 'Cursor' : provider === 'opencode' ? 'OpenCode' : provider === 'pi' ? 'Pi' : 'Shell'; + label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : provider === 'cursor' ? 'Cursor' : provider === 'opencode' ? 'OpenCode' : provider === 'pi' ? 'Pi' : provider === 'copilot' ? 'Copilot' : 'Shell'; } return ( = new Set([ "cursor", "opencode", "pi", + "copilot", ]); // --------------------------------------------------------------------------- @@ -204,7 +206,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob const markerModelsCache = new Map(); async function buildCapabilitiesResponse(): Promise { const providers = await Promise.all(capabilities.map(async (c) => { - const engine = MARKER_ENGINES[c.id as "cursor" | "opencode" | "pi"]; + const engine = MARKER_ENGINES[c.id as MarkerEngineId]; if (!engine || !c.available) return c; let models = markerModelsCache.get(engine.id); if (!models) { @@ -356,8 +358,8 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob // Guide jobs keep provider: "guide" and carry the marker engine on // spawnOptions.engine instead — fall back to that lookup so guide // logs get the same readable formatting as review jobs. - const markerEngine = MARKER_ENGINES[provider as "cursor" | "opencode" | "pi"] - ?? (spawnOptions?.engine ? MARKER_ENGINES[spawnOptions.engine as "cursor" | "opencode" | "pi"] : undefined); + const markerEngine = MARKER_ENGINES[provider as MarkerEngineId] + ?? (spawnOptions?.engine ? MARKER_ENGINES[spawnOptions.engine as MarkerEngineId] : undefined); if (markerEngine) { const formatted = formatMarkerLogEvent(line, markerEngine); if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' }); @@ -430,7 +432,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions): AgentJob // stops/checklist, a guide's sections) with nothing meaningful // partially ingested, so an unexpected throw here means the whole // result is unusable — it must not sit at "done" with no content. - if (MARKER_ENGINES[provider as "cursor" | "opencode" | "pi"]) { + if (MARKER_ENGINES[provider as MarkerEngineId]) { entry.info.status = "failed"; entry.info.error = err instanceof Error ? err.message : `${provider} result ingestion failed`; } else if (provider === "tour" || provider === "guide") { diff --git a/packages/server/guide/guide-review.ts b/packages/server/guide/guide-review.ts index 259fef7ab..eacb9a13b 100644 --- a/packages/server/guide/guide-review.ts +++ b/packages/server/guide/guide-review.ts @@ -15,6 +15,7 @@ import { extractLastMarkerBlock, buildMarkerCommand, type MarkerEngine, + type MarkerEngineId, } from "../marker-review"; import type { CodeGuideOutput, @@ -873,7 +874,7 @@ export interface GuideSessionBuildCommandResult { cwd?: string; label?: string; prompt?: string; - engine: "claude" | "codex" | "cursor" | "opencode" | "pi"; + engine: "claude" | "codex" | MarkerEngineId; model: string; effort?: string; reasoningEffort?: string; @@ -1104,7 +1105,7 @@ export function createGuideSession(): GuideSession { launchChangedFiles, async buildCommand({ cwd, patch, diffType, options, prMetadata, changedFiles, config, repair }) { - const engine = (typeof config?.engine === "string" ? config.engine : "claude") as "claude" | "codex" | "cursor" | "opencode" | "pi"; + const engine = (typeof config?.engine === "string" ? config.engine : "claude") as "claude" | "codex" | MarkerEngineId; const explicitModel = typeof config?.model === "string" && config.model ? config.model : null; // "sonnet" is a Claude model, so we must NOT pass it to Codex or the // marker engines (Cursor, OpenCode, Pi) when no model is explicitly @@ -1120,7 +1121,7 @@ export function createGuideSession(): GuideSession { // output) IS the content to fix, not the diff. Force low-effort // defaults — this is a mechanical JSON-syntax fix, not a // re-analysis, and should be fast and cheap. - const markerEngine = MARKER_ENGINES[engine as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[engine as MarkerEngineId]; if (markerEngine) { const thinking = "minimal"; const nonce = makeMarkerNonce(); @@ -1149,7 +1150,7 @@ export function createGuideSession(): GuideSession { // marker branch: per-job nonce embedded in the prompt, recovered from // job.prompt at parse time in onJobComplete below. captureStdout is // required — the marker block comes back on stdout NDJSON. - const markerEngine = MARKER_ENGINES[engine as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[engine as MarkerEngineId]; if (markerEngine) { const thinking = typeof config?.thinking === "string" && config.thinking ? config.thinking : undefined; const nonce = makeMarkerNonce(); @@ -1183,7 +1184,7 @@ export function createGuideSession(): GuideSession { // succeeds, then only stashed below on an actual failure. let rawCandidate: string | undefined; - const markerEngine = MARKER_ENGINES[job.engine as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[job.engine as MarkerEngineId]; if (markerEngine) { // Recover the per-job nonce embedded in the prompt; without it no // block can be trusted, so parsing fails closed below (same diff --git a/packages/server/marker-review.test.ts b/packages/server/marker-review.test.ts index f46e120d7..5ed1d1484 100644 --- a/packages/server/marker-review.test.ts +++ b/packages/server/marker-review.test.ts @@ -121,6 +121,34 @@ describe("buildMarkerCommand: opencode", () => { }); }); +describe("buildMarkerCommand: copilot", () => { + test("json output, non-interactive denial posture, read-only shell allowlist, prompt via -p", () => { + const { command } = buildMarkerCommand(MARKER_ENGINES.copilot, "review this", "claude-sonnet-5", "/repo"); + expect(command[0]).toBe("copilot"); + expect(command[command.indexOf("-C") + 1]).toBe("/repo"); + expect(command[command.indexOf("--output-format") + 1]).toBe("json"); + expect(command).toContain("--no-ask-user"); + expect(command).toContain("--no-auto-update"); + expect(command).toContain("--disable-builtin-mcps"); + expect(command).toContain("--deny-tool=write"); + expect(command).toContain("--allow-tool=shell(git:*)"); + expect(command).toContain("--allow-tool=shell(gh:*)"); + expect(command).not.toContain("--allow-all-tools"); + expect(command).not.toContain("--yolo"); + expect(command[command.indexOf("--model") + 1]).toBe("claude-sonnet-5"); + // Prompt is the VALUE of -p, last in argv. + expect(command[command.length - 2]).toBe("-p"); + expect(command[command.length - 1]).toBe("review this"); + }); + + test("omits --model for auto/empty/undefined; -C when no cwd", () => { + expect(buildMarkerCommand(MARKER_ENGINES.copilot, "p", "auto", "/repo").command).not.toContain("--model"); + expect(buildMarkerCommand(MARKER_ENGINES.copilot, "p", "", "/repo").command).not.toContain("--model"); + expect(buildMarkerCommand(MARKER_ENGINES.copilot, "p", undefined, "/repo").command).not.toContain("--model"); + expect(buildMarkerCommand(MARKER_ENGINES.copilot, "p").command).not.toContain("-C"); + }); +}); + // =========================================================================== // parseModels — both descriptors // =========================================================================== @@ -168,6 +196,30 @@ describe("parseModels: opencode", () => { // reduceMarkerStream / extractText — both descriptors // =========================================================================== +describe("reduceMarkerStream: copilot", () => { + test("reads assistant.message content once; deltas and bookkeeping are skipped", () => { + const stdout = + ndjson({ type: "session.tools_updated", data: { model: "claude-sonnet-5" } }) + + ndjson({ type: "assistant.message_delta", data: { deltaContent: "Loo" } }) + + ndjson({ type: "assistant.message_delta", data: { deltaContent: "king" } }) + + ndjson({ type: "assistant.message", data: { content: "Looking at the diff." } }) + + ndjson({ type: "assistant.message", data: { content: "", toolRequests: [{ name: "bash" }] } }) + + ndjson({ type: "result", exitCode: 0 }); + const { canonicalText } = reduceMarkerStream(stdout, MARKER_ENGINES.copilot); + expect(canonicalText).toBe("Looking at the diff."); + }); + + test("full pipeline: marker block inside assistant.message content parses", () => { + const stdout = ndjson({ + type: "assistant.message", + data: { content: "Commentary first.\n" + markerBlock(validReview) }, + }); + const out = parseMarkerStreamOutput(stdout, MARKER_ENGINES.copilot, NONCE); + expect(out?.findings).toHaveLength(1); + expect(out?.findings[0].file).toBe("packages/server/review.ts"); + }); +}); + describe("reduceMarkerStream: cursor", () => { test("reconstructs canonical text regardless of chunk boundaries", () => { const stdout = diff --git a/packages/server/marker-review.ts b/packages/server/marker-review.ts index 7cb56c303..70eabe08a 100644 --- a/packages/server/marker-review.ts +++ b/packages/server/marker-review.ts @@ -201,9 +201,13 @@ export interface MarkerBuildOptions { thinking?: string; } +/** Stable ids of the marker engines — the single union every cast/lookup + * should use, so adding an engine is one edit here plus a descriptor below. */ +export type MarkerEngineId = "cursor" | "opencode" | "pi" | "copilot"; + export interface MarkerEngine { /** Stable engine id — also the provider id used by the server. */ - id: "cursor" | "opencode" | "pi"; + id: MarkerEngineId; /** Display name for the capabilities/provider listing (e.g. "Cursor CLI"). */ name: string; /** The CLI binary to spawn (NOTE: cursor's binary is `agent`). */ @@ -645,7 +649,143 @@ function piBuildArgv(prompt: string, model?: string, cwd?: string, opts?: Marker } // --------------------------------------------------------------------------- -// The three descriptors + registry. +// Copilot engine helpers +// --------------------------------------------------------------------------- + +/** + * Copilot `extractText`: assistant text lives on `assistant.message` events + * (the complete message for that turn) as `data.content`. We deliberately do + * NOT also read `assistant.message_delta` events — `assistant.message` already + * carries the fully-assembled text, so summing both would double the canonical + * text (same shape as Pi's message_update/message_end split). A message that + * only carries toolRequests has empty content and contributes nothing. + */ +function copilotExtractText(event: MarkerStreamEvent): string | null { + if (event.type !== "assistant.message") return null; + const data = event.data as { content?: unknown } | undefined; + return typeof data?.content === "string" && data.content ? data.content : null; +} + +/** + * Parse `copilot help config` output into a model catalog. Copilot has no + * dedicated model-list command; the config help enumerates the valid values of + * the `model` setting as an indented block: + * + * `model`: AI model to use for Copilot CLI; ... + * - "claude-sonnet-5" + * - "gpt-5.5" + * + * We locate the `model`: line, then collect consecutive `- ""` lines until + * the first line that isn't one (the next setting's block). No header found — + * e.g. a future help rewrite — fails closed to [] (the picker just offers + * "Default" and the --model flag is omitted). + */ +function copilotParseModels(stdout: string): MarkerModel[] { + try { + if (!stdout) return []; + const lines = stripAnsi(stdout).split("\n"); + const headerIndex = lines.findIndex((l) => /^\s*`model`\s*:/.test(l)); + if (headerIndex === -1) return []; + + const models: MarkerModel[] = []; + const seen = new Set(); + for (const line of lines.slice(headerIndex + 1)) { + const match = /^\s*-\s*"([^"]+)"\s*$/.exec(line); + if (!match) break; // end of the model value block + const id = match[1]; + if (!id || seen.has(id)) continue; + seen.add(id); + models.push({ id, label: id }); + } + return models; + } catch { + return []; + } +} + +/** + * Format one `copilot --output-format json` event for the LiveLogViewer. Tool + * calls surface as `tool.execution_start` (toolName + args preview) and their + * failures as `tool.execution_complete` errors (a permission denial is worth + * seeing live); assistant text surfaces once, on `assistant.message` — the same + * event `copilotExtractText` reads. Session bookkeeping (mcp_servers_loaded, + * skills_loaded), the echoed user turn, turn lifecycle, and per-character + * `assistant.message_delta` events are noise and are skipped. + */ +function copilotFormatLogEvent(event: MarkerStreamEvent): string | null { + const data = event.data as Record | undefined; + switch (event.type) { + case "session.tools_updated": { + const model = typeof data?.model === "string" ? data.model : undefined; + return model ? `[init] model=${model}` : null; + } + case "tool.execution_start": { + const name = typeof data?.toolName === "string" ? data.toolName : "tool"; + const args = data?.arguments !== undefined ? JSON.stringify(data.arguments).slice(0, 100) : ""; + return `[${name}] ${args}`.trimEnd(); + } + case "tool.execution_complete": { + const error = data?.error as { message?: unknown } | undefined; + if (error && typeof error.message === "string") return `[tool] ${error.message}`; + return null; // success — the start line already showed the call + } + case "assistant.message": { + const text = copilotExtractText(event); + return text ? text : null; + } + default: + return null; + } +} + +/** + * Build the `copilot -p` command. + * + * `--output-format json` emits the JSONL event stream we capture on stdout. + * `--no-ask-user` disables the ask_user tool (a background job can never + * answer), and in non-interactive mode any tool without an allow rule is + * auto-denied rather than prompted — the model receives a clean "denied" + * result and continues. Read-ONLY-ish posture: `--deny-tool=write` removes the + * file-mutation tools (deny rules beat every allow rule, per Copilot's + * permission docs) and the allowlist opens only the VCS/forge inspection + * commands the review/guide prompts rely on (`git`/`gh`/`glab`/`jj`, plus wc) — + * the same command families Claude's fine-grained allowlist grants, at + * first-level-subcommand-wildcard granularity because that's the pattern shape + * Copilot documents for git/gh. Shell is otherwise auto-denied, which puts + * Copilot BETWEEN Claude (full subcommand granularity) and Cursor/OpenCode/Pi + * (Bash unrestricted) on the trust spectrum. `--disable-builtin-mcps` skips the + * github-mcp-server handshake at startup (PR reads go through the allowlisted + * `gh` CLI instead); `--no-auto-update` keeps a background job from pausing to + * download a CLI update. The `=` form is used for the variadic permission + * flags so they can never greedily consume a following argument. The prompt is + * the value of `-p`, passed last. `--model` is omitted when empty or `auto` + * so Copilot picks its own default. + */ +function copilotBuildArgv(prompt: string, model?: string, cwd?: string): string[] { + const useModel = !!model && model.trim().length > 0 && model.toLowerCase() !== "auto"; + return [ + "copilot", + ...(cwd ? ["-C", cwd] : []), + "--output-format", + "json", + "--no-ask-user", + "--no-auto-update", + "--disable-builtin-mcps", + "--deny-tool=write", + "--allow-tool=shell(git:*)", + "--allow-tool=shell(gh:*)", + "--allow-tool=shell(glab:*)", + "--allow-tool=shell(jj:*)", + "--allow-tool=shell(wc)", + ...(useModel ? ["--model", model] : []), + // Prompt is the value of -p (copilot reads it from argv, not stdin). + "-p", + prompt, + ]; +} + +// --------------------------------------------------------------------------- +// The four descriptors + registry. // --------------------------------------------------------------------------- const CURSOR_ENGINE: MarkerEngine = { @@ -684,10 +824,23 @@ const PI_ENGINE: MarkerEngine = { formatLogEvent: piFormatLogEvent, }; -export const MARKER_ENGINES: Record<"cursor" | "opencode" | "pi", MarkerEngine> = { +const COPILOT_ENGINE: MarkerEngine = { + id: "copilot", + name: "Copilot CLI", + binary: "copilot", + author: "Copilot", + buildArgv: copilotBuildArgv, + extractText: copilotExtractText, + modelsArgv: ["help", "config"], + parseModels: copilotParseModels, + formatLogEvent: copilotFormatLogEvent, +}; + +export const MARKER_ENGINES: Record = { cursor: CURSOR_ENGINE, opencode: OPENCODE_ENGINE, pi: PI_ENGINE, + copilot: COPILOT_ENGINE, }; // --------------------------------------------------------------------------- diff --git a/packages/server/review.ts b/packages/server/review.ts index 40a7b1713..f84d6bdc6 100644 --- a/packages/server/review.ts +++ b/packages/server/review.ts @@ -84,6 +84,7 @@ import { transformMarkerFindings, makeMarkerNonce, extractMarkerNonce, + type MarkerEngineId, } from "./marker-review"; import { loadConfig, saveConfig, detectGitUser, getServerConfig } from "./config"; import { type PRMetadata, type PRRef, type PRReviewFileComment, type PRStackTree, type PRListItem, fetchPR, fetchPRFileContent, fetchPRContext, submitPRReview, fetchPRViewedFiles, markPRFilesViewed, fetchPRStack, fetchPRList, getPRUser, parsePRUrl, prRefFromMetadata, isSameProject, getDisplayRepo, getMRLabel, getMRNumberLabel, prCommandRuntime } from "./pr"; @@ -905,7 +906,7 @@ export async function startReviewServer( // engine id itself for claude/codex. const failedEngine = typeof config?.engine === "string" && config.engine ? config.engine : undefined; const failedEngineBinary = failedEngine - ? MARKER_ENGINES[failedEngine as "cursor" | "opencode" | "pi"]?.binary ?? failedEngine + ? MARKER_ENGINES[failedEngine as MarkerEngineId]?.binary ?? failedEngine : undefined; const repairEngine = failedEngine && Bun.which(failedEngineBinary!) @@ -993,7 +994,7 @@ export async function startReviewServer( // no cwd flag — it always uses the process's actual cwd, which spawnJob // already sets from this same cwd). // captureStdout is required: the marker block comes back on stdout NDJSON. - const markerEngine = MARKER_ENGINES[provider as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[provider as MarkerEngineId]; if (markerEngine) { const model = typeof config?.model === "string" && config.model ? config.model : undefined; const thinking = typeof config?.thinking === "string" && config.thinking ? config.thinking : undefined; @@ -1111,7 +1112,7 @@ export async function startReviewServer( // an exit-0 job marked done). Mirrors the Tour fail-closed pattern below. // Findings carry nullable file/line, classified into line/whole-file/ // general by transformMarkerFindings — nothing is dropped (same as Claude). - const markerEngine = MARKER_ENGINES[job.provider as "cursor" | "opencode" | "pi"]; + const markerEngine = MARKER_ENGINES[job.provider as MarkerEngineId]; if (markerEngine) { // Recover the per-job nonce embedded in the prompt; without it no block // can be trusted, so parse fails closed below. diff --git a/packages/ui/components/AgentsTab.tsx b/packages/ui/components/AgentsTab.tsx index b2bfa9c63..de866069f 100644 --- a/packages/ui/components/AgentsTab.tsx +++ b/packages/ui/components/AgentsTab.tsx @@ -17,7 +17,7 @@ import type { AgentJobInfo, AgentCapabilities } from '../types'; import { isTerminalStatus } from '@plannotator/shared/agent-jobs'; import { cn } from '../lib/utils'; import { ReviewAgentsIcon } from './ReviewAgentsIcon'; -import { ClaudeIcon, CodexIcon, CursorIcon, OpenCodeIcon, PiIcon } from './icons/AgentIcons'; +import { ClaudeIcon, CodexIcon, CopilotIcon, CursorIcon, OpenCodeIcon, PiIcon } from './icons/AgentIcons'; import { useAgentSettings } from '../hooks/useAgentSettings'; import type { AgentEngine, AgentMode, ReviewEngine } from '../hooks/useAgentSettings'; import type { AgentLaunchParams } from '../hooks/useAgentJobs'; @@ -103,6 +103,12 @@ const PI_MODELS: Array<{ value: string; label: string }> = [ { value: '', label: 'Default' }, ]; +// Fallback Copilot model catalog. The real list is discovered server-side via +// `copilot help config`; empty value means "let Copilot pick". +const COPILOT_MODELS: Array<{ value: string; label: string }> = [ + { value: '', label: 'Default' }, +]; + // Pi's unified reasoning knob (`--thinking`), applied to whichever model is // selected. xhigh is accepted only by codex-max models. export const PI_THINKING: Array<{ value: string; label: string }> = [ @@ -140,6 +146,7 @@ export const REVIEW_ENGINE_LABEL: Record = { cursor: 'Cursor', opencode: 'OpenCode', pi: 'Pi', + copilot: 'Copilot', }; // Review-only icon map — the wide set. Tour keeps the narrow ENGINE_ICON. @@ -149,6 +156,7 @@ const REVIEW_ENGINE_ICON: Record> cursor: CursorIcon, opencode: OpenCodeIcon, pi: PiIcon, + copilot: CopilotIcon, }; export type AgentLaunchResult = AgentJobInfo | null | void; @@ -242,11 +250,12 @@ function formatModel(provider: string, engine: string | undefined, model: string if (provider === 'cursor') return catalogLabel(CURSOR_MODELS, model); if (provider === 'opencode') return model ? model : 'Default'; if (provider === 'pi') return model || 'Default'; + if (provider === 'copilot') return model || 'Default'; if (provider === 'codex' || engine === 'codex') return catalogLabel(CODEX_MODELS, model); if ((provider === 'tour' || provider === 'guide') && engine === 'claude') return catalogLabel(TOUR_CLAUDE_MODELS, model); if (provider === 'tour' || provider === 'guide') { if (engine === 'cursor') return catalogLabel(CURSOR_MODELS, model); - if (engine === 'opencode' || engine === 'pi') return model || 'Default'; + if (engine === 'opencode' || engine === 'pi' || engine === 'copilot') return model || 'Default'; } return catalogLabel(CLAUDE_MODELS, model); } @@ -547,6 +556,7 @@ export const AgentsTab: React.FC = ({ opencodeModel, piModel, piThinking, + copilotModel, tourClaudeModel, tourClaudeEffort, tourCodexModel, @@ -560,6 +570,7 @@ export const AgentsTab: React.FC = ({ guideOpencodeModel, guidePiModel, guidePiThinking, + guideCopilotModel, setSelectedMode, setReviewEngine, setReviewProfileId, @@ -574,6 +585,7 @@ export const AgentsTab: React.FC = ({ setOpencodeModel, setPiModel, setPiThinking, + setCopilotModel, setTourClaudeModel, setTourClaudeEffort, setTourCodexModel, @@ -587,6 +599,7 @@ export const AgentsTab: React.FC = ({ setGuideOpencodeModel, setGuidePiModel, setGuidePiThinking, + setGuideCopilotModel, } = settings; // Review profiles (built-in default plus the user's enabled skills). Loaded @@ -621,6 +634,7 @@ export const AgentsTab: React.FC = ({ const cursorAvailable = capabilities?.providers.some((p) => p.id === 'cursor' && p.available) ?? false; const opencodeAvailable = capabilities?.providers.some((p) => p.id === 'opencode' && p.available) ?? false; const piAvailable = capabilities?.providers.some((p) => p.id === 'pi' && p.available) ?? false; + const copilotAvailable = capabilities?.providers.some((p) => p.id === 'copilot' && p.available) ?? false; // Cursor's model catalog is account-specific and discovered server-side, so // prefer the live list from the capability; fall back to `auto`-only when the @@ -647,6 +661,14 @@ export const AgentsTab: React.FC = ({ return opts.length > 0 ? [...PI_MODELS, ...opts] : PI_MODELS; }, [capabilities]); + // Copilot models discovered server-side via `copilot help config`; prepend + // the "Default" option so the user can leave the model to Copilot's own pick. + const copilotModels = useMemo>(() => { + const discovered = capabilities?.providers.find((p) => p.id === 'copilot')?.models ?? []; + const opts = discovered.map((m) => ({ value: m.id, label: m.label })); + return opts.length > 0 ? [...COPILOT_MODELS, ...opts] : COPILOT_MODELS; + }, [capabilities]); + // Tour engines (narrow union). Cursor is NOT included here — it is review-only. const availableEngines = useMemo(() => { const engines: AgentEngine[] = []; @@ -661,8 +683,9 @@ export const AgentsTab: React.FC = ({ if (cursorAvailable) engines.push('cursor'); if (opencodeAvailable) engines.push('opencode'); if (piAvailable) engines.push('pi'); + if (copilotAvailable) engines.push('copilot'); return engines; - }, [availableEngines, cursorAvailable, opencodeAvailable, piAvailable]); + }, [availableEngines, cursorAvailable, opencodeAvailable, piAvailable, copilotAvailable]); const availableModes = useMemo(() => { const modes: AgentMode[] = []; @@ -684,6 +707,7 @@ export const AgentsTab: React.FC = ({ engine === 'cursor' ? cursorAvailable : engine === 'opencode' ? opencodeAvailable : engine === 'pi' ? piAvailable + : engine === 'copilot' ? copilotAvailable : engineAvailable(engine); // Reconcile mode + engine choices against live capabilities. Runs when @@ -754,6 +778,15 @@ export const AgentsTab: React.FC = ({ setGuidePiModel(piModels[0]?.value ?? ''); } }, [piAvailable, piModels, piModel, setPiModel, guidePiModel, setGuidePiModel]); + useEffect(() => { + if (!copilotAvailable) return; + if (!copilotModels.some((m) => m.value === copilotModel)) { + setCopilotModel(copilotModels[0]?.value ?? ''); + } + if (!copilotModels.some((m) => m.value === guideCopilotModel)) { + setGuideCopilotModel(copilotModels[0]?.value ?? ''); + } + }, [copilotAvailable, copilotModels, copilotModel, setCopilotModel, guideCopilotModel, setGuideCopilotModel]); // Annotation counts per job source const annotationCounts = useMemo(() => { @@ -825,6 +858,15 @@ export const AgentsTab: React.FC = ({ ...review, }; } + if (engine === 'copilot') { + // Empty model ⇒ Copilot's own pick; only send a real model id. + return { + provider: 'copilot', + label: 'Code Review', + ...(copilotModel ? { model: copilotModel } : {}), + ...review, + }; + } return { provider: 'codex', label: 'Code Review', @@ -872,6 +914,14 @@ export const AgentsTab: React.FC = ({ thinking: guidePiThinking, }; } + if (guideEngine === 'copilot') { + return { + provider: 'guide', + label: 'Guided Review', + engine: 'copilot', + ...(guideCopilotModel ? { model: guideCopilotModel } : {}), + }; + } return { provider: 'guide', label: 'Guided Review', @@ -1079,6 +1129,7 @@ export const AgentsTab: React.FC = ({ )} + {reviewEngine === 'copilot' && renderMarkerEngineConfig(copilotModel, copilotModels, setCopilotModel)} )} @@ -1156,6 +1207,7 @@ export const AgentsTab: React.FC = ({ )} + {guideEngine === 'copilot' && renderMarkerEngineConfig(guideCopilotModel, copilotModels, setGuideCopilotModel)} )}
diff --git a/packages/ui/components/icons/AgentIcons.tsx b/packages/ui/components/icons/AgentIcons.tsx index 67d3e293f..196b728ce 100644 --- a/packages/ui/components/icons/AgentIcons.tsx +++ b/packages/ui/components/icons/AgentIcons.tsx @@ -31,6 +31,18 @@ export function PiIcon({ className }: { className?: string }) { ); } +// GitHub Copilot brand mark — path from the official agent icon set +// (welcome.developers.workers.dev/icons/agents/copilot); the dark/light +// variants there differ only by fill, so one currentColor component covers +// both themes, like PiIcon/OpenCodeIcon. +export function CopilotIcon({ className }: { className?: string }) { + return ( + + ); +} + // Cursor brand mark — from packages/ui/components/icons/app/cursor.svg // (black rounded square + white cursor facet). export function CursorIcon({ className }: { className?: string }) { diff --git a/packages/ui/hooks/useAgentSettings.test.ts b/packages/ui/hooks/useAgentSettings.test.ts index 7718a0974..3eded3bf8 100644 --- a/packages/ui/hooks/useAgentSettings.test.ts +++ b/packages/ui/hooks/useAgentSettings.test.ts @@ -51,6 +51,7 @@ describe("parseReviewProfileByEngine", () => { cursor: "builtin:default", opencode: "builtin:default", pi: "builtin:default", + copilot: "builtin:default", }); }); @@ -61,6 +62,7 @@ describe("parseReviewProfileByEngine", () => { cursor: "skill:security", opencode: "skill:security", pi: "skill:security", + copilot: "skill:security", }); }); @@ -76,6 +78,7 @@ describe("parseReviewProfileByEngine", () => { cursor: "skill:b", opencode: "skill:legacy", pi: "skill:legacy", + copilot: "skill:legacy", }); }); @@ -88,6 +91,7 @@ describe("parseReviewProfileByEngine", () => { cursor: "builtin:default", opencode: "builtin:default", pi: "builtin:default", + copilot: "builtin:default", }); }); }); diff --git a/packages/ui/hooks/useAgentSettings.ts b/packages/ui/hooks/useAgentSettings.ts index 9c1d77a09..53bb6a98c 100644 --- a/packages/ui/hooks/useAgentSettings.ts +++ b/packages/ui/hooks/useAgentSettings.ts @@ -54,6 +54,11 @@ export const DEFAULT_PI_MODEL = ''; // Pi's unified reasoning knob (`--thinking`); 'medium' matches Pi's own default. export const DEFAULT_PI_THINKING = 'medium'; +// Copilot has no `auto` pseudo-model in our picker; empty string means "let +// Copilot pick" and copilotBuildArgv omits `--model` for it — same convention +// as OpenCode/Pi. +export const DEFAULT_COPILOT_MODEL = ''; + // Guide-scoped marker-engine defaults — same isolation rationale as // DEFAULT_GUIDE_CLAUDE_EFFORT above (a guide's Cursor/OpenCode/Pi model must // not silently change the next Cursor/OpenCode/Pi code review, and vice @@ -64,6 +69,7 @@ export const DEFAULT_GUIDE_CURSOR_MODEL = 'auto'; export const DEFAULT_GUIDE_OPENCODE_MODEL = ''; export const DEFAULT_GUIDE_PI_MODEL = ''; export const DEFAULT_GUIDE_PI_THINKING = 'medium'; +export const DEFAULT_GUIDE_COPILOT_MODEL = ''; interface ClaudeSection { model: string; @@ -93,11 +99,17 @@ interface PiSection { thinking: string; // off | minimal | low | medium | high | xhigh } +// Copilot: flat model only, like Cursor/OpenCode (its --effort knob is +// deliberately not surfaced yet — the CLI default is sensible). +interface CopilotSection { + model: string; // '' (default) or a model id from `copilot help config` +} + export type AgentMode = 'review' | 'tour' | 'guide'; export type AgentEngine = 'claude' | 'codex'; // Review-only engine union. Tour stays on the narrow AgentEngine so its // exhaustive Record maps remain valid without change. -export type ReviewEngine = AgentEngine | 'cursor' | 'opencode' | 'pi'; +export type ReviewEngine = AgentEngine | 'cursor' | 'opencode' | 'pi' | 'copilot'; interface AgentSettingsState { selectedMode?: AgentMode; @@ -115,6 +127,7 @@ interface AgentSettingsState { cursor: CursorSection; opencode: OpencodeSection; pi: PiSection; + copilot: CopilotSection; tourClaude: ClaudeSection; tourCodex: CodexSection; guideClaude: ClaudeSection; @@ -126,10 +139,11 @@ interface AgentSettingsState { guideCursor: CursorSection; guideOpencode: OpencodeSection; guidePi: PiSection; + guideCopilot: CopilotSection; } const BUILTIN_DEFAULT_PROFILE = 'builtin:default'; -const REVIEW_ENGINES: ReviewEngine[] = ['claude', 'codex', 'cursor', 'opencode', 'pi']; +const REVIEW_ENGINES: ReviewEngine[] = ['claude', 'codex', 'cursor', 'opencode', 'pi', 'copilot']; const initialState: AgentSettingsState = { selectedMode: 'review', @@ -140,6 +154,7 @@ const initialState: AgentSettingsState = { cursor: BUILTIN_DEFAULT_PROFILE, opencode: BUILTIN_DEFAULT_PROFILE, pi: BUILTIN_DEFAULT_PROFILE, + copilot: BUILTIN_DEFAULT_PROFILE, }, tourEngine: 'claude', guideEngine: 'claude', @@ -148,6 +163,7 @@ const initialState: AgentSettingsState = { cursor: { model: DEFAULT_CURSOR_MODEL }, opencode: { model: DEFAULT_OPENCODE_MODEL }, pi: { model: DEFAULT_PI_MODEL, thinking: DEFAULT_PI_THINKING }, + copilot: { model: DEFAULT_COPILOT_MODEL }, tourClaude: { model: DEFAULT_TOUR_CLAUDE_MODEL, perModel: {} }, tourCodex: { model: DEFAULT_TOUR_CODEX_MODEL, perModel: {} }, guideClaude: { model: DEFAULT_GUIDE_CLAUDE_MODEL, perModel: {} }, @@ -155,6 +171,7 @@ const initialState: AgentSettingsState = { guideCursor: { model: DEFAULT_GUIDE_CURSOR_MODEL }, guideOpencode: { model: DEFAULT_GUIDE_OPENCODE_MODEL }, guidePi: { model: DEFAULT_GUIDE_PI_MODEL, thinking: DEFAULT_GUIDE_PI_THINKING }, + guideCopilot: { model: DEFAULT_GUIDE_COPILOT_MODEL }, }; // One-shot migration: drop any cached "none" codex reasoning entries. The @@ -192,6 +209,7 @@ function parseReviewEngine(value: unknown): ReviewEngine { if (value === 'cursor') return 'cursor'; if (value === 'opencode') return 'opencode'; if (value === 'pi') return 'pi'; + if (value === 'copilot') return 'copilot'; return parseEngine(value); } @@ -244,6 +262,9 @@ function readCookie(): AgentSettingsState { model: typeof parsed.pi?.model === 'string' ? parsed.pi.model : DEFAULT_PI_MODEL, thinking: typeof parsed.pi?.thinking === 'string' ? parsed.pi.thinking : DEFAULT_PI_THINKING, }, + copilot: { + model: typeof parsed.copilot?.model === 'string' ? parsed.copilot.model : DEFAULT_COPILOT_MODEL, + }, tourClaude: { model: typeof parsed.tourClaude?.model === 'string' ? parsed.tourClaude.model : DEFAULT_TOUR_CLAUDE_MODEL, perModel: parsed.tourClaude?.perModel ?? {}, @@ -270,6 +291,9 @@ function readCookie(): AgentSettingsState { model: typeof parsed.guidePi?.model === 'string' ? parsed.guidePi.model : DEFAULT_GUIDE_PI_MODEL, thinking: typeof parsed.guidePi?.thinking === 'string' ? parsed.guidePi.thinking : DEFAULT_GUIDE_PI_THINKING, }, + guideCopilot: { + model: typeof parsed.guideCopilot?.model === 'string' ? parsed.guideCopilot.model : DEFAULT_GUIDE_COPILOT_MODEL, + }, }; } catch { return initialState; @@ -389,6 +413,10 @@ export function useAgentSettings() { setState((s) => ({ ...s, pi: { ...s.pi, thinking } })); }, []); + const setCopilotModel = useCallback((model: string) => { + setState((s) => ({ ...s, copilot: { ...s.copilot, model } })); + }, []); + const patchCodex = useCallback( ( section: 'codex' | 'tourCodex' | 'guideCodex', @@ -475,6 +503,10 @@ export function useAgentSettings() { setState((s) => ({ ...s, guidePi: { ...s.guidePi, thinking } })); }, []); + const setGuideCopilotModel = useCallback((model: string) => { + setState((s) => ({ ...s, guideCopilot: { ...s.guideCopilot, model } })); + }, []); + const claudeEffort = state.claude.perModel[state.claude.model]?.effort ?? DEFAULT_CLAUDE_EFFORT; const codexReasoning = state.codex.perModel[state.codex.model]?.reasoning ?? DEFAULT_CODEX_REASONING; const codexFast = state.codex.perModel[state.codex.model]?.fast ?? DEFAULT_CODEX_FAST; @@ -499,6 +531,7 @@ export function useAgentSettings() { opencodeModel: state.opencode.model, piModel: state.pi.model, piThinking: state.pi.thinking, + copilotModel: state.copilot.model, tourClaudeModel: state.tourClaude.model, tourClaudeEffort, tourCodexModel: state.tourCodex.model, @@ -512,6 +545,7 @@ export function useAgentSettings() { guideOpencodeModel: state.guideOpencode.model, guidePiModel: state.guidePi.model, guidePiThinking: state.guidePi.thinking, + guideCopilotModel: state.guideCopilot.model, setSelectedMode, setReviewEngine, setReviewProfileId, @@ -526,6 +560,7 @@ export function useAgentSettings() { setOpencodeModel, setPiModel, setPiThinking, + setCopilotModel, setTourClaudeModel, setTourClaudeEffort, setTourCodexModel, @@ -539,5 +574,6 @@ export function useAgentSettings() { setGuideOpencodeModel, setGuidePiModel, setGuidePiThinking, + setGuideCopilotModel, }; } From d3bb0ab1fb5dbe4808945cd6ecf2f29738f3a0f0 Mon Sep 17 00:00:00 2001 From: Michael Ramos Date: Sun, 5 Jul 2026 09:01:54 -0700 Subject: [PATCH 4/4] fix(agents): structurally deny high-consequence verbs for Copilot jobs git:*/gh:*/glab:* stay allowed for inspection ergonomics, but Copilot's deny-precedence rules now block the verbs a prompt-injected background job could abuse: git push/reset/clean/checkout/restore (local reviews run in the user's real working tree), and PR/MR/issue comment/create/merge/ close/edit/review on gh and glab. Probe-verified: git log runs, git push --dry-run is denied by the shell(git push) rule. Addresses the one confirmed finding from the PR #997 review round; the other two (untracked-file reads, git -C) were refuted by live probes. --- packages/server/marker-review.test.ts | 9 +++++++++ packages/server/marker-review.ts | 28 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/packages/server/marker-review.test.ts b/packages/server/marker-review.test.ts index 5ed1d1484..644b4e70d 100644 --- a/packages/server/marker-review.test.ts +++ b/packages/server/marker-review.test.ts @@ -133,6 +133,15 @@ describe("buildMarkerCommand: copilot", () => { expect(command).toContain("--deny-tool=write"); expect(command).toContain("--allow-tool=shell(git:*)"); expect(command).toContain("--allow-tool=shell(gh:*)"); + // High-consequence verbs are structurally denied even though git:*/gh:*/ + // glab:* are allowed — deny rules take precedence in Copilot's model. + expect(command).toContain("--deny-tool=shell(git push)"); + expect(command).toContain("--deny-tool=shell(git reset)"); + expect(command).toContain("--deny-tool=shell(git clean)"); + expect(command).toContain("--deny-tool=shell(git restore)"); + expect(command).toContain("--deny-tool=shell(gh pr comment)"); + expect(command).toContain("--deny-tool=shell(gh pr merge)"); + expect(command).toContain("--deny-tool=shell(glab mr note)"); expect(command).not.toContain("--allow-all-tools"); expect(command).not.toContain("--yolo"); expect(command[command.indexOf("--model") + 1]).toBe("claude-sonnet-5"); diff --git a/packages/server/marker-review.ts b/packages/server/marker-review.ts index 70eabe08a..ef3418d40 100644 --- a/packages/server/marker-review.ts +++ b/packages/server/marker-review.ts @@ -772,6 +772,34 @@ function copilotBuildArgv(prompt: string, model?: string, cwd?: string): string[ "--no-auto-update", "--disable-builtin-mcps", "--deny-tool=write", + // Deny rules take precedence over every allow rule (Copilot's documented + // permission model), and match at first-level-subcommand granularity + // ("git push", "gh pr create") — probe-verified: with these in place, + // `git log` runs while `git push --dry-run` is denied. This keeps the + // broad `git:*`/`gh:*` allows below for inspection ergonomics while + // structurally blocking the high-consequence verbs a prompt-injected + // background job could abuse: remote writes (push), working-tree + // destruction (reset/clean/checkout/restore — local reviews run in the + // user's REAL working tree, so these mean uncommitted-work loss), and + // outward-facing forge writes (PR/MR/issue comments, creation, merges), + // which the review methodology already forbids at the prompt level. + "--deny-tool=shell(git push)", + "--deny-tool=shell(git reset)", + "--deny-tool=shell(git clean)", + "--deny-tool=shell(git checkout)", + "--deny-tool=shell(git restore)", + "--deny-tool=shell(gh pr comment)", + "--deny-tool=shell(gh pr create)", + "--deny-tool=shell(gh pr merge)", + "--deny-tool=shell(gh pr close)", + "--deny-tool=shell(gh pr edit)", + "--deny-tool=shell(gh pr review)", + "--deny-tool=shell(gh issue comment)", + "--deny-tool=shell(gh issue create)", + "--deny-tool=shell(glab mr note)", + "--deny-tool=shell(glab mr create)", + "--deny-tool=shell(glab mr merge)", + "--deny-tool=shell(glab mr close)", "--allow-tool=shell(git:*)", "--allow-tool=shell(gh:*)", "--allow-tool=shell(glab:*)",