diff --git a/backend/app/gateway/routers/models.py b/backend/app/gateway/routers/models.py index a36ece9277..f3c42fa644 100644 --- a/backend/app/gateway/routers/models.py +++ b/backend/app/gateway/routers/models.py @@ -16,6 +16,10 @@ class ModelResponse(BaseModel): description: str | None = Field(None, description="Model description") supports_thinking: bool = Field(default=False, description="Whether model supports thinking mode") supports_reasoning_effort: bool = Field(default=False, description="Whether model supports reasoning effort") + reasoning_efforts: list[str] | None = Field( + default=None, + description="Provider-specific reasoning_effort values accepted by this model", + ) class TokenUsageResponse(BaseModel): @@ -56,7 +60,8 @@ async def list_models(config: AppConfig = Depends(get_config)) -> ModelsListResp "display_name": "GPT-4", "description": "OpenAI GPT-4 model", "supports_thinking": false, - "supports_reasoning_effort": false + "supports_reasoning_effort": false, + "reasoning_efforts": null }, { "name": "claude-3-opus", @@ -64,7 +69,8 @@ async def list_models(config: AppConfig = Depends(get_config)) -> ModelsListResp "display_name": "Claude 3 Opus", "description": "Anthropic Claude 3 Opus model", "supports_thinking": true, - "supports_reasoning_effort": false + "supports_reasoning_effort": false, + "reasoning_efforts": null } ], "token_usage": { @@ -81,6 +87,7 @@ async def list_models(config: AppConfig = Depends(get_config)) -> ModelsListResp description=model.description, supports_thinking=model.supports_thinking, supports_reasoning_effort=model.supports_reasoning_effort, + reasoning_efforts=model.reasoning_efforts, ) for model in config.models ] @@ -129,4 +136,5 @@ async def get_model(model_name: str, config: AppConfig = Depends(get_config)) -> description=model.description, supports_thinking=model.supports_thinking, supports_reasoning_effort=model.supports_reasoning_effort, + reasoning_efforts=model.reasoning_efforts, ) diff --git a/backend/docs/API.md b/backend/docs/API.md index 10ea998580..67c6dc16a3 100644 --- a/backend/docs/API.md +++ b/backend/docs/API.md @@ -182,18 +182,24 @@ GET /api/models "name": "gpt-4", "display_name": "GPT-4", "supports_thinking": false, + "supports_reasoning_effort": false, + "reasoning_efforts": null, "supports_vision": true }, { "name": "claude-3-opus", "display_name": "Claude 3 Opus", "supports_thinking": false, + "supports_reasoning_effort": false, + "reasoning_efforts": null, "supports_vision": true }, { "name": "deepseek-v3", "display_name": "DeepSeek V3", "supports_thinking": true, + "supports_reasoning_effort": true, + "reasoning_efforts": ["low", "medium", "high", "max", "xhigh"], "supports_vision": false } ] @@ -214,6 +220,8 @@ GET /api/models/{model_name} "model": "gpt-4", "max_tokens": 4096, "supports_thinking": false, + "supports_reasoning_effort": false, + "reasoning_efforts": null, "supports_vision": true } ``` diff --git a/backend/docs/CONFIGURATION.md b/backend/docs/CONFIGURATION.md index 2f26c81285..530d4210f7 100644 --- a/backend/docs/CONFIGURATION.md +++ b/backend/docs/CONFIGURATION.md @@ -131,12 +131,18 @@ Some models support "thinking" mode for complex reasoning: models: - name: deepseek-v3 supports_thinking: true + supports_reasoning_effort: true + reasoning_efforts: [low, medium, high, max, xhigh] when_thinking_enabled: extra_body: thinking: type: enabled ``` +Use `reasoning_efforts` when a provider supports reasoning effort but only accepts +a subset of DeerFlow's UI values. For example, omit `minimal` for providers that +reject it. + **Gemini with thinking via OpenAI-compatible gateway**: When routing Gemini through an OpenAI-compatible proxy (Vertex AI OpenAI compat endpoint, AI Studio, or third-party gateways) with thinking enabled, the API attaches a `thought_signature` to each tool-call object returned in the response. Every subsequent request that replays those assistant messages **must** echo those signatures back on the tool-call entries or the API returns: diff --git a/backend/packages/harness/deerflow/client.py b/backend/packages/harness/deerflow/client.py index 8ffa89e2ca..5184dc5902 100644 --- a/backend/packages/harness/deerflow/client.py +++ b/backend/packages/harness/deerflow/client.py @@ -58,6 +58,13 @@ logger = logging.getLogger(__name__) +def _get_reasoning_efforts(model: Any) -> list[str] | None: + efforts = getattr(model, "reasoning_efforts", None) + if not isinstance(efforts, (list, tuple)): + return None + return [str(effort) for effort in efforts] + + StreamEventType = Literal["values", "messages-tuple", "custom", "end"] @@ -851,6 +858,7 @@ def list_models(self) -> dict: "description": getattr(model, "description", None), "supports_thinking": getattr(model, "supports_thinking", False), "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False), + "reasoning_efforts": _get_reasoning_efforts(model), } for model in self._app_config.models ], @@ -922,6 +930,7 @@ def get_model(self, name: str) -> dict | None: "description": getattr(model, "description", None), "supports_thinking": getattr(model, "supports_thinking", False), "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False), + "reasoning_efforts": _get_reasoning_efforts(model), } # ------------------------------------------------------------------ diff --git a/backend/packages/harness/deerflow/config/model_config.py b/backend/packages/harness/deerflow/config/model_config.py index e9a3e1c16b..75a9803b23 100644 --- a/backend/packages/harness/deerflow/config/model_config.py +++ b/backend/packages/harness/deerflow/config/model_config.py @@ -23,6 +23,10 @@ class ModelConfig(BaseModel): ) supports_thinking: bool = Field(default_factory=lambda: False, description="Whether the model supports thinking") supports_reasoning_effort: bool = Field(default_factory=lambda: False, description="Whether the model supports reasoning effort") + reasoning_efforts: list[str] | None = Field( + default=None, + description="Provider-specific reasoning_effort values accepted by this model", + ) when_thinking_enabled: dict | None = Field( default_factory=lambda: None, description="Extra settings to be passed to the model when thinking is enabled", diff --git a/backend/packages/harness/deerflow/models/factory.py b/backend/packages/harness/deerflow/models/factory.py index c6a3573f8b..4b1606a9bd 100644 --- a/backend/packages/harness/deerflow/models/factory.py +++ b/backend/packages/harness/deerflow/models/factory.py @@ -47,6 +47,18 @@ def _enable_stream_usage_by_default(model_use_path: str, model_settings_from_con model_settings_from_config["stream_usage"] = True +def _configured_reasoning_efforts(model_config) -> set[str] | None: + if not model_config.reasoning_efforts: + return None + return {str(effort) for effort in model_config.reasoning_efforts} + + +def _remove_unsupported_reasoning_effort(settings: dict, allowed_efforts: set[str]) -> None: + effort = settings.get("reasoning_effort") + if effort is not None and str(effort) not in allowed_efforts: + settings.pop("reasoning_effort", None) + + def create_chat_model(name: str | None = None, thinking_enabled: bool = False, *, app_config: AppConfig | None = None, attach_tracing: bool = True, **kwargs) -> BaseChatModel: """Create a chat model instance from the config. @@ -85,6 +97,7 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * "description", "supports_thinking", "supports_reasoning_effort", + "reasoning_efforts", "when_thinking_enabled", "when_thinking_disabled", "thinking", @@ -126,6 +139,9 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * if not model_config.supports_reasoning_effort: kwargs.pop("reasoning_effort", None) model_settings_from_config.pop("reasoning_effort", None) + elif allowed_efforts := _configured_reasoning_efforts(model_config): + _remove_unsupported_reasoning_effort(kwargs, allowed_efforts) + _remove_unsupported_reasoning_effort(model_settings_from_config, allowed_efforts) _enable_stream_usage_by_default(model_config.use, model_settings_from_config) diff --git a/backend/tests/test_client.py b/backend/tests/test_client.py index f0e918a083..de07c0855a 100644 --- a/backend/tests/test_client.py +++ b/backend/tests/test_client.py @@ -34,6 +34,7 @@ def mock_app_config(): model.model = "test-model" model.supports_thinking = False model.supports_reasoning_effort = False + model.reasoning_efforts = None model.model_dump.return_value = {"name": "test-model", "use": "langchain_openai:ChatOpenAI"} config = MagicMock() @@ -130,6 +131,7 @@ def test_list_models(self, client): assert "model" in result["models"][0] assert "display_name" in result["models"][0] assert "supports_thinking" in result["models"][0] + assert "reasoning_efforts" in result["models"][0] def test_list_skills(self, client): skill = MagicMock() @@ -1015,6 +1017,7 @@ def test_found(self, client): model_cfg.description = "A test model" model_cfg.supports_thinking = True model_cfg.supports_reasoning_effort = True + model_cfg.reasoning_efforts = None client._app_config.get_model_config.return_value = model_cfg result = client.get_model("test-model") @@ -1025,6 +1028,7 @@ def test_found(self, client): "description": "A test model", "supports_thinking": True, "supports_reasoning_effort": True, + "reasoning_efforts": None, } def test_not_found(self, client): @@ -2315,6 +2319,7 @@ def test_list_models(self, mock_app_config): model.description = "A test model" model.supports_thinking = False model.supports_reasoning_effort = False + model.reasoning_efforts = None mock_app_config.models = [model] mock_app_config.token_usage.enabled = True @@ -2335,6 +2340,8 @@ def test_get_model(self, mock_app_config): model.display_name = "Test Model" model.description = "A test model" model.supports_thinking = True + model.supports_reasoning_effort = True + model.reasoning_efforts = ["low", "medium", "high"] mock_app_config.models = [model] mock_app_config.get_model_config.return_value = model @@ -2346,6 +2353,7 @@ def test_get_model(self, mock_app_config): parsed = ModelResponse(**result) assert parsed.name == "test-model" assert parsed.model == "gpt-test" + assert parsed.reasoning_efforts == ["low", "medium", "high"] def test_list_skills(self, client): skill = MagicMock() diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py index 554cbc47b2..b34f83d82e 100644 --- a/backend/tests/test_model_factory.py +++ b/backend/tests/test_model_factory.py @@ -29,6 +29,7 @@ def _make_model( use: str = "langchain_openai:ChatOpenAI", supports_thinking: bool = False, supports_reasoning_effort: bool = False, + reasoning_efforts: list[str] | None = None, when_thinking_enabled: dict | None = None, when_thinking_disabled: dict | None = None, thinking: dict | None = None, @@ -43,6 +44,7 @@ def _make_model( max_tokens=max_tokens, supports_thinking=supports_thinking, supports_reasoning_effort=supports_reasoning_effort, + reasoning_efforts=reasoning_efforts, when_thinking_enabled=when_thinking_enabled, when_thinking_disabled=when_thinking_disabled, thinking=thinking, @@ -429,6 +431,69 @@ def __init__(self, **kwargs): assert captured.get("reasoning_effort") == "minimal" +def test_runtime_reasoning_effort_removed_when_not_in_model_allowlist(monkeypatch): + cfg = _make_app_config( + [ + _make_model( + "deepseek", + supports_thinking=True, + supports_reasoning_effort=True, + reasoning_efforts=["low", "medium", "high", "max", "xhigh"], + when_thinking_disabled={"extra_body": {"thinking": {"type": "disabled"}}}, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model( + name="deepseek", + thinking_enabled=False, + reasoning_effort="minimal", + ) + + assert captured.get("extra_body") == {"thinking": {"type": "disabled"}} + assert captured.get("reasoning_effort") is None + + +def test_runtime_reasoning_effort_preserved_when_in_model_allowlist(monkeypatch): + cfg = _make_app_config( + [ + _make_model( + "deepseek", + supports_reasoning_effort=True, + reasoning_efforts=["low", "medium", "high", "max", "xhigh"], + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model( + name="deepseek", + thinking_enabled=True, + reasoning_effort="xhigh", + ) + + assert captured.get("reasoning_effort") == "xhigh" + + # --------------------------------------------------------------------------- # thinking shortcut field # --------------------------------------------------------------------------- diff --git a/config.example.yaml b/config.example.yaml index 118b1be4dc..ef7902855c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -50,6 +50,9 @@ models: # supports_thinking: true # supports_vision: true # supports_reasoning_effort: true + # # Optional: restrict UI/backend reasoning_effort values to those this provider accepts. + # # For providers such as DeepSeek, omit "minimal" if their API rejects it. + # reasoning_efforts: [low, medium, high, max, xhigh] # when_thinking_enabled: # extra_body: # thinking: diff --git a/frontend/src/components/workspace/input-box.tsx b/frontend/src/components/workspace/input-box.tsx index 6344a26d2a..81a4b5791b 100644 --- a/frontend/src/components/workspace/input-box.tsx +++ b/frontend/src/components/workspace/input-box.tsx @@ -59,7 +59,12 @@ import { fetch } from "@/core/api/fetcher"; import { getBackendBaseURL } from "@/core/config"; import { useI18n } from "@/core/i18n/hooks"; import { useModels } from "@/core/models/hooks"; -import type { AgentThreadContext } from "@/core/threads"; +import { + getDefaultReasoningEffort, + getModelReasoningEfforts, + normalizeReasoningEffort, +} from "@/core/models/reasoning-effort"; +import type { AgentThreadContext, ReasoningEffort } from "@/core/threads"; import { textOfMessage } from "@/core/threads/utils"; import { cn } from "@/lib/utils"; @@ -86,6 +91,14 @@ import { Tooltip } from "./tooltip"; type InputMode = "flash" | "thinking" | "pro" | "ultra"; +type InputBoxContext = Omit< + AgentThreadContext, + "thread_id" | "is_plan_mode" | "thinking_enabled" | "subagent_enabled" +> & { + mode: InputMode | undefined; + reasoning_effort?: ReasoningEffort; +}; + function getResolvedMode( mode: InputMode | undefined, supportsThinking: boolean, @@ -99,6 +112,52 @@ function getResolvedMode( return supportsThinking ? "pro" : "flash"; } +type InputBoxTranslations = ReturnType["t"]["inputBox"]; + +function getReasoningEffortLabel( + inputBox: InputBoxTranslations, + effort: ReasoningEffort | undefined, +): string { + switch (effort) { + case "minimal": + return inputBox.reasoningEffortMinimal; + case "low": + return inputBox.reasoningEffortLow; + case "medium": + return inputBox.reasoningEffortMedium; + case "high": + return inputBox.reasoningEffortHigh; + case "max": + return inputBox.reasoningEffortMax; + case "xhigh": + return inputBox.reasoningEffortXHigh; + default: + return inputBox.reasoningEffortMedium; + } +} + +function getReasoningEffortDescription( + inputBox: InputBoxTranslations, + effort: ReasoningEffort | undefined, +): string { + switch (effort) { + case "minimal": + return inputBox.reasoningEffortMinimalDescription; + case "low": + return inputBox.reasoningEffortLowDescription; + case "medium": + return inputBox.reasoningEffortMediumDescription; + case "high": + return inputBox.reasoningEffortHighDescription; + case "max": + return inputBox.reasoningEffortMaxDescription; + case "xhigh": + return inputBox.reasoningEffortXHighDescription; + default: + return inputBox.reasoningEffortMediumDescription; + } +} + export function InputBox({ className, disabled, @@ -118,13 +177,7 @@ export function InputBox({ assistantId?: string | null; status?: ChatStatus; disabled?: boolean; - context: Omit< - AgentThreadContext, - "thread_id" | "is_plan_mode" | "thinking_enabled" | "subagent_enabled" - > & { - mode: "flash" | "thinking" | "pro" | "ultra" | undefined; - reasoning_effort?: "minimal" | "low" | "medium" | "high"; - }; + context: InputBoxContext; extraHeader?: React.ReactNode; /** * Whether to render the input in welcome layout (vertically centered, @@ -134,15 +187,7 @@ export function InputBox({ isWelcomeMode?: boolean; threadId: string; initialValue?: string; - onContextChange?: ( - context: Omit< - AgentThreadContext, - "thread_id" | "is_plan_mode" | "thinking_enabled" | "subagent_enabled" - > & { - mode: "flash" | "thinking" | "pro" | "ultra" | undefined; - reasoning_effort?: "minimal" | "low" | "medium" | "high"; - }, - ) => void; + onContextChange?: (context: InputBoxContext) => void; onFollowupsVisibilityChange?: (visible: boolean) => void; onSubmit?: (message: PromptInputMessage) => void | Promise; onStop?: () => void; @@ -176,8 +221,17 @@ export function InputBox({ const supportsThinking = fallbackModel.supports_thinking ?? false; const nextModelName = fallbackModel.name; const nextMode = getResolvedMode(context.mode, supportsThinking); - - if (context.model_name === nextModelName && context.mode === nextMode) { + const nextReasoningEffort = normalizeReasoningEffort( + context.reasoning_effort, + nextMode, + getModelReasoningEfforts(fallbackModel), + ); + + if ( + context.model_name === nextModelName && + context.mode === nextMode && + context.reasoning_effort === nextReasoningEffort + ) { return; } @@ -185,6 +239,7 @@ export function InputBox({ ...context, model_name: nextModelName, mode: nextMode, + reasoning_effort: nextReasoningEffort, }); }, [context, models, onContextChange]); @@ -207,17 +262,31 @@ export function InputBox({ [selectedModel], ); + const availableReasoningEfforts = useMemo( + () => getModelReasoningEfforts(selectedModel), + [selectedModel], + ); + const handleModelSelect = useCallback( (model_name: string) => { const model = models.find((m) => m.name === model_name); if (!model) { return; } + const mode = getResolvedMode( + context.mode, + model.supports_thinking ?? false, + ); + const reasoningEfforts = getModelReasoningEfforts(model); onContextChange?.({ ...context, model_name, - mode: getResolvedMode(context.mode, model.supports_thinking ?? false), - reasoning_effort: context.reasoning_effort, + mode, + reasoning_effort: normalizeReasoningEffort( + context.reasoning_effort, + mode, + reasoningEfforts, + ), }); setModelDialogOpen(false); }, @@ -226,24 +295,21 @@ export function InputBox({ const handleModeSelect = useCallback( (mode: InputMode) => { + const resolvedMode = getResolvedMode(mode, supportThinking); onContextChange?.({ ...context, - mode: getResolvedMode(mode, supportThinking), - reasoning_effort: - mode === "ultra" - ? "high" - : mode === "pro" - ? "medium" - : mode === "thinking" - ? "low" - : "minimal", + mode: resolvedMode, + reasoning_effort: getDefaultReasoningEffort( + resolvedMode, + availableReasoningEfforts, + ), }); }, - [onContextChange, context, supportThinking], + [onContextChange, context, supportThinking, availableReasoningEfforts], ); const handleReasoningEffortSelect = useCallback( - (effort: "minimal" | "low" | "medium" | "high") => { + (effort: ReasoningEffort) => { onContextChange?.({ ...context, reasoning_effort: effort, @@ -268,12 +334,18 @@ export function InputBox({ // Guard against submitting before the initial model auto-selection // effect has flushed thread settings to storage/state. if (resolvedModelName && context.model_name !== resolvedModelName) { + const nextMode = getResolvedMode( + context.mode, + selectedModel?.supports_thinking ?? false, + ); onContextChange?.({ ...context, model_name: resolvedModelName, - mode: getResolvedMode( - context.mode, - selectedModel?.supports_thinking ?? false, + mode: nextMode, + reasoning_effort: normalizeReasoningEffort( + context.reasoning_effort, + nextMode, + getModelReasoningEfforts(selectedModel), ), }); return new Promise((resolve, reject) => { @@ -291,7 +363,7 @@ export function InputBox({ onSubmit, onStop, resolvedModelName, - selectedModel?.supports_thinking, + selectedModel, status, ], ); @@ -438,6 +510,13 @@ export function InputBox({ return () => controller.abort(); }, [context.model_name, disabled, isMock, status, threadId]); + const currentMode = getResolvedMode(context.mode, supportThinking); + const selectedReasoningEffort = normalizeReasoningEffort( + context.reasoning_effort, + currentMode, + availableReasoningEfforts, + ); + return (
- {supportReasoningEffort && context.mode !== "flash" && ( - - -
- {t.inputBox.reasoningEffort}: - {context.reasoning_effort === "minimal" && - " " + t.inputBox.reasoningEffortMinimal} - {context.reasoning_effort === "low" && - " " + t.inputBox.reasoningEffortLow} - {context.reasoning_effort === "medium" && - " " + t.inputBox.reasoningEffortMedium} - {context.reasoning_effort === "high" && - " " + t.inputBox.reasoningEffortHigh} -
-
- - - - {t.inputBox.reasoningEffort} - - - handleReasoningEffortSelect("minimal")} - > -
-
- {t.inputBox.reasoningEffortMinimal} -
-
- {t.inputBox.reasoningEffortMinimalDescription} -
-
- {context.reasoning_effort === "minimal" ? ( - - ) : ( -
- )} - - handleReasoningEffortSelect("low")} - > -
-
- {t.inputBox.reasoningEffortLow} -
-
- {t.inputBox.reasoningEffortLowDescription} -
-
- {context.reasoning_effort === "low" ? ( - - ) : ( -
- )} - - handleReasoningEffortSelect("medium")} - > -
-
- {t.inputBox.reasoningEffortMedium} -
-
- {t.inputBox.reasoningEffortMediumDescription} -
-
- {context.reasoning_effort === "medium" || - !context.reasoning_effort ? ( - - ) : ( -
- )} - - handleReasoningEffortSelect("high")} - > -
-
- {t.inputBox.reasoningEffortHigh} -
-
- {t.inputBox.reasoningEffortHighDescription} -
-
- {context.reasoning_effort === "high" ? ( - - ) : ( -
- )} - - - - - - )} + {supportReasoningEffort && + currentMode !== "flash" && + availableReasoningEfforts.length > 0 && ( + + +
+ {t.inputBox.reasoningEffort}:{" "} + {getReasoningEffortLabel( + t.inputBox, + selectedReasoningEffort, + )} +
+
+ + + + {t.inputBox.reasoningEffort} + + + {availableReasoningEfforts.map((effort) => ( + handleReasoningEffortSelect(effort)} + > +
+
+ {getReasoningEffortLabel(t.inputBox, effort)} +
+
+ {getReasoningEffortDescription( + t.inputBox, + effort, + )} +
+
+ {selectedReasoningEffort === effort ? ( + + ) : ( +
+ )} + + ))} + + + + + )} = { + flash: undefined, + thinking: "low", + pro: "medium", + ultra: "high", +}; + +export function getModelReasoningEfforts( + model?: Pick, +): ReasoningEffort[] { + if (!model?.supports_reasoning_effort) { + return []; + } + if (model.reasoning_efforts?.length) { + return model.reasoning_efforts; + } + return DEFAULT_REASONING_EFFORTS; +} + +export function getDefaultReasoningEffort( + mode: ReasoningMode, + allowedEfforts: readonly ReasoningEffort[], +): ReasoningEffort | undefined { + if (mode === "flash" || allowedEfforts.length === 0) { + return undefined; + } + + const preferred = DEFAULT_REASONING_EFFORT_BY_MODE[mode]; + if (preferred && allowedEfforts.includes(preferred)) { + return preferred; + } + + return allowedEfforts[0]; +} + +export function normalizeReasoningEffort( + effort: ReasoningEffort | undefined, + mode: ReasoningMode, + allowedEfforts: readonly ReasoningEffort[], +): ReasoningEffort | undefined { + if (mode === "flash") { + return undefined; + } + if (effort && allowedEfforts.includes(effort)) { + return effort; + } + return getDefaultReasoningEffort(mode, allowedEfforts); +} diff --git a/frontend/src/core/models/types.ts b/frontend/src/core/models/types.ts index 2c888ed8d9..3c3b20cf5b 100644 --- a/frontend/src/core/models/types.ts +++ b/frontend/src/core/models/types.ts @@ -1,3 +1,5 @@ +import type { ReasoningEffort } from "../threads"; + export interface Model { id: string; name: string; @@ -6,6 +8,7 @@ export interface Model { description?: string | null; supports_thinking?: boolean; supports_reasoning_effort?: boolean; + reasoning_efforts?: ReasoningEffort[] | null; } export interface TokenUsageSettings { diff --git a/frontend/src/core/settings/local.ts b/frontend/src/core/settings/local.ts index aa370c0533..e2088c23ad 100644 --- a/frontend/src/core/settings/local.ts +++ b/frontend/src/core/settings/local.ts @@ -1,5 +1,5 @@ import type { TokenUsageInlineMode } from "../messages/usage-model"; -import type { AgentThreadContext } from "../threads"; +import type { AgentThreadContext, ReasoningEffort } from "../threads"; export const DEFAULT_LOCAL_SETTINGS: LocalSettings = { notification: { @@ -42,7 +42,7 @@ export interface LocalSettings { > & { model_name?: string | undefined; mode: "flash" | "thinking" | "pro" | "ultra" | undefined; - reasoning_effort?: "minimal" | "low" | "medium" | "high"; + reasoning_effort?: ReasoningEffort; }; } diff --git a/frontend/src/core/threads/types.ts b/frontend/src/core/threads/types.ts index dafb073494..d5f8d751b0 100644 --- a/frontend/src/core/threads/types.ts +++ b/frontend/src/core/threads/types.ts @@ -2,6 +2,14 @@ import type { Message, Thread } from "@langchain/langgraph-sdk"; import type { Todo } from "../todos"; +export type ReasoningEffort = + | "minimal" + | "low" + | "medium" + | "high" + | "max" + | "xhigh"; + export interface AgentThreadState extends Record { title: string; messages: Message[]; @@ -15,7 +23,7 @@ export interface AgentThreadContext extends Record { thinking_enabled: boolean; is_plan_mode: boolean; subagent_enabled: boolean; - reasoning_effort?: "minimal" | "low" | "medium" | "high"; + reasoning_effort?: ReasoningEffort; agent_name?: string; } diff --git a/frontend/tests/unit/core/models/reasoning-effort.test.ts b/frontend/tests/unit/core/models/reasoning-effort.test.ts new file mode 100644 index 0000000000..af51dac5da --- /dev/null +++ b/frontend/tests/unit/core/models/reasoning-effort.test.ts @@ -0,0 +1,50 @@ +import { expect, test } from "vitest"; + +import { + getDefaultReasoningEffort, + getModelReasoningEfforts, + normalizeReasoningEffort, +} from "@/core/models/reasoning-effort"; +import type { Model } from "@/core/models/types"; + +function model(overrides: Partial): Model { + return { + id: "deepseek", + name: "deepseek", + model: "deepseek-v4-pro", + display_name: "DeepSeek", + ...overrides, + }; +} + +test("uses provider-specific reasoning effort values when configured", () => { + const efforts = getModelReasoningEfforts( + model({ + supports_reasoning_effort: true, + reasoning_efforts: ["low", "medium", "high", "max", "xhigh"], + }), + ); + + expect(efforts).toEqual(["low", "medium", "high", "max", "xhigh"]); + expect(efforts).not.toContain("minimal"); +}); + +test("falls back to built-in reasoning efforts for legacy model configs", () => { + expect( + getModelReasoningEfforts(model({ supports_reasoning_effort: true })), + ).toEqual(["minimal", "low", "medium", "high"]); +}); + +test("normalizes stale local reasoning effort to the mode default", () => { + const efforts = ["low", "medium", "high", "max", "xhigh"] as const; + + expect(normalizeReasoningEffort("minimal", "pro", efforts)).toBe("medium"); + expect(normalizeReasoningEffort("minimal", "ultra", efforts)).toBe("high"); +}); + +test("does not send reasoning effort in flash mode", () => { + const efforts = ["minimal", "low", "medium", "high"] as const; + + expect(getDefaultReasoningEffort("flash", efforts)).toBeUndefined(); + expect(normalizeReasoningEffort("minimal", "flash", efforts)).toBeUndefined(); +});