Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ Lead-agent middlewares are assembled in strict append order across `packages/har
12. **TitleMiddleware** - Auto-generates thread title after first complete exchange and normalizes structured message content before prompting the title model
13. **MemoryMiddleware** - Queues conversations for async memory update (filters to user + final AI responses)
14. **ViewImageMiddleware** - Injects base64 image data before LLM call (conditional on vision support)
15. **DeferredToolFilterMiddleware** - Hides deferred tool schemas from the bound model until tool search is enabled (optional)
15. **DeferredToolFilterMiddleware** - Hides deferred (MCP) tool schemas from the bound model using a build-time deferred-name set + catalog hash, reading per-thread promotions from `ThreadState.promoted` (hash-scoped, no ContextVar); a tool becomes bound on subsequent turns after `tool_search` returns its schema (optional, if `tool_search.enabled`)
16. **SubagentLimitMiddleware** - Truncates excess `task` tool calls from model response to enforce `MAX_CONCURRENT_SUBAGENTS` limit (optional, if `subagent_enabled`)
17. **LoopDetectionMiddleware** - Detects repeated tool-call loops; hard-stop responses clear both structured `tool_calls` and raw provider tool-call metadata before forcing a final text answer
18. **ClarificationMiddleware** - Intercepts `ask_clarification` tool calls, interrupts via `Command(goto=END)` (must be last)
Expand Down
44 changes: 35 additions & 9 deletions backend/packages/harness/deerflow/agents/lead_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ def _build_middlewares(
custom_middlewares: list[AgentMiddleware] | None = None,
*,
app_config: AppConfig | None = None,
deferred_setup=None,
):
"""Build middleware chain based on runtime configuration.

Expand Down Expand Up @@ -318,11 +319,13 @@ def _build_middlewares(
if model_config is not None and model_config.supports_vision:
middlewares.append(ViewImageMiddleware())

# Add DeferredToolFilterMiddleware to hide deferred tool schemas from model binding
if resolved_app_config.tool_search.enabled:
# Hide deferred tool schemas from model binding until tool_search promotes them.
# The deferred set + catalog hash come from the build-time setup (assembled
# after tool-policy filtering); promotion is read from graph state.
if deferred_setup is not None and deferred_setup.deferred_names:
from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware

middlewares.append(DeferredToolFilterMiddleware())
middlewares.append(DeferredToolFilterMiddleware(deferred_setup.deferred_names, deferred_setup.catalog_hash))

# Add SubagentLimitMiddleware to truncate excess parallel task calls
subagent_enabled = cfg.get("subagent_enabled", False)
Expand Down Expand Up @@ -353,6 +356,23 @@ def _build_middlewares(
return middlewares


def _assemble_deferred(filtered_tools, *, enabled: bool):
"""Build the final tool list + deferred setup from a policy-filtered list.

Call AFTER tool-policy filtering so the deferred catalog never exposes a
tool the agent is not allowed to use. Fail-closed: if tool_search is enabled
and MCP tools survived filtering but no deferred set was recovered, raise
rather than silently binding their full schemas to the model.
"""
from deerflow.tools.builtins.tool_search import _is_mcp_tool, build_deferred_tool_setup

setup = build_deferred_tool_setup(filtered_tools, enabled=enabled)
if enabled and not setup.deferred_names and any(_is_mcp_tool(t) for t in filtered_tools):
raise RuntimeError("tool_search enabled and MCP tools survived policy filtering, but no deferred set was recovered — refusing to bind MCP schemas (fail-closed).")
final_tools = list(filtered_tools) + ([setup.tool_search_tool] if setup.tool_search_tool else [])
return final_tools, setup
Comment on lines +369 to +373
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intentional — and a good thing to lock down. tool_search is appended after policy filtering, but it's only ever produced when MCP tools survive that filter: build_deferred_tool_setup returns a None tool when no deferred tool remains, and its catalog is derived from the already policy-filtered list, so it can never expose a tool the allowlist denied. Coupling tool_search's presence to surviving deferred tools also removes an upstream footgun where an allowlist that permitted MCP tools but omitted tool_search would strip the search tool and leave those deferred tools permanently unreachable.

Locked the contract with two regression tests in tests/test_deferred_tool_crosscontext.pytest_policy_denied_mcp_yields_no_tool_search_end_to_end and test_tool_search_appended_after_policy_but_never_exposes_denied_tool — in 92d463e.



def _available_skill_names(agent_config, is_bootstrap: bool) -> set[str] | None:
if is_bootstrap:
return {"bootstrap"}
Expand Down Expand Up @@ -460,16 +480,19 @@ def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):

if is_bootstrap:
# Special bootstrap agent with minimal prompt for initial custom agent creation flow
tools = get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=resolved_app_config) + [setup_agent]
raw_tools = get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=resolved_app_config) + [setup_agent]
filtered = filter_tools_by_skill_allowed_tools(raw_tools, skills_for_tool_policy)
final_tools, setup = _assemble_deferred(filtered, enabled=resolved_app_config.tool_search.enabled)
return create_agent(
model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=resolved_app_config, attach_tracing=False),
tools=filter_tools_by_skill_allowed_tools(tools, skills_for_tool_policy),
middleware=_build_middlewares(config, model_name=model_name, app_config=resolved_app_config),
tools=final_tools,
middleware=_build_middlewares(config, model_name=model_name, app_config=resolved_app_config, deferred_setup=setup),
system_prompt=apply_prompt_template(
subagent_enabled=subagent_enabled,
max_concurrent_subagents=max_concurrent_subagents,
available_skills=set(["bootstrap"]),
app_config=resolved_app_config,
deferred_names=setup.deferred_names,
),
state_schema=ThreadState,
)
Expand All @@ -478,17 +501,20 @@ def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):
# The default agent (no agent_name) does not see this tool.
extra_tools = [update_agent] if agent_name else []
# Default lead agent (unchanged behavior)
tools = get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled, app_config=resolved_app_config)
raw_tools = get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled, app_config=resolved_app_config)
filtered = filter_tools_by_skill_allowed_tools(raw_tools + extra_tools, skills_for_tool_policy)
final_tools, setup = _assemble_deferred(filtered, enabled=resolved_app_config.tool_search.enabled)
return create_agent(
model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort, app_config=resolved_app_config, attach_tracing=False),
tools=filter_tools_by_skill_allowed_tools(tools + extra_tools, skills_for_tool_policy),
middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name, app_config=resolved_app_config),
tools=final_tools,
middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name, app_config=resolved_app_config, deferred_setup=setup),
system_prompt=apply_prompt_template(
subagent_enabled=subagent_enabled,
max_concurrent_subagents=max_concurrent_subagents,
agent_name=agent_name,
available_skills=set(agent_config.skills) if agent_config and agent_config.skills is not None else None,
app_config=resolved_app_config,
deferred_names=setup.deferred_names,
),
state_schema=ThreadState,
)
34 changes: 9 additions & 25 deletions backend/packages/harness/deerflow/agents/lead_agent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,33 +684,16 @@ def _build_self_update_section(agent_name: str | None) -> str:
"""


def get_deferred_tools_prompt_section(*, app_config: AppConfig | None = None) -> str:
"""Generate <available-deferred-tools> block for the system prompt.
def get_deferred_tools_prompt_section(*, deferred_names: frozenset[str] = frozenset()) -> str:
"""Generate <available-deferred-tools> from an explicit deferred-name set.

Lists only deferred tool names so the agent knows what exists
and can use tool_search to load them.
Returns empty string when tool_search is disabled or no tools are deferred.
Lists only names so the agent knows what exists and can use tool_search to
load them. Returns empty string when there are no deferred tools. The set is
computed at agent build time (after tool-policy filtering) and passed in.
"""
from deerflow.tools.builtins.tool_search import get_deferred_registry

if app_config is None:
try:
from deerflow.config import get_app_config

config = get_app_config()
except Exception:
return ""
else:
config = app_config

if not config.tool_search.enabled:
if not deferred_names:
return ""

registry = get_deferred_registry()
if not registry:
return ""

names = "\n".join(e.name for e in registry.entries)
names = "\n".join(sorted(deferred_names))
return f"<available-deferred-tools>\n{names}\n</available-deferred-tools>"


Expand Down Expand Up @@ -772,6 +755,7 @@ def apply_prompt_template(
agent_name: str | None = None,
available_skills: set[str] | None = None,
app_config: AppConfig | None = None,
deferred_names: frozenset[str] = frozenset(),
) -> str:
# Include subagent section only if enabled (from runtime parameter)
n = max_concurrent_subagents
Expand Down Expand Up @@ -799,7 +783,7 @@ def apply_prompt_template(
skills_section = get_skills_prompt_section(available_skills, app_config=app_config)

# Get deferred tools section (tool_search)
deferred_tools_section = get_deferred_tools_prompt_section(app_config=app_config)
deferred_tools_section = get_deferred_tools_prompt_section(deferred_names=deferred_names)

# Build ACP agent section only if ACP agents are configured
acp_section = _build_acp_section(app_config=app_config)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Middleware to filter deferred tool schemas from model binding.

When tool_search is enabled, MCP tools are registered in the DeferredToolRegistry
and passed to ToolNode for execution, but their schemas should NOT be sent to the
LLM via bind_tools (that's the whole point of deferral — saving context tokens).

This middleware intercepts wrap_model_call and removes deferred tools from
request.tools so that model.bind_tools only receives active tool schemas.
The agent discovers deferred tools at runtime via the tool_search tool.
When tool_search is enabled, MCP tools are still passed to ToolNode for
execution, but their schemas must NOT be sent to the LLM via bind_tools until
the model has discovered them via tool_search. This middleware removes the
still-deferred tools from request.tools before model binding, and blocks tool
calls to tools that have not been promoted yet.

The deferred name set and the catalog hash are injected at construction time
(no ContextVar). Promotion state is read from graph state (``state["promoted"]``),
scoped by catalog hash so a stale persisted promotion cannot expose a renamed
or drifted tool.
"""

import logging
Expand All @@ -24,47 +27,49 @@


class DeferredToolFilterMiddleware(AgentMiddleware[AgentState]):
"""Remove deferred tools from request.tools before model binding.
"""Hide deferred tool schemas from the bound model until promoted.

ToolNode still holds all tools (including deferred) for execution routing,
but the LLM only sees active tool schemas — deferred tools are discoverable
via tool_search at runtime.
but the LLM only sees active tool schemas plus tools that have already been
promoted (recorded in ``state["promoted"]`` under the current catalog hash).
"""

def _filter_tools(self, request: ModelRequest) -> ModelRequest:
from deerflow.tools.builtins.tool_search import get_deferred_registry

registry = get_deferred_registry()
if not registry:
return request
def __init__(self, deferred_names: frozenset[str], catalog_hash: str | None):
super().__init__()
self._deferred = deferred_names
self._catalog_hash = catalog_hash

deferred_names = registry.deferred_names
active_tools = [t for t in request.tools if getattr(t, "name", None) not in deferred_names]
def _promoted(self, state) -> set[str]:
promoted = (state or {}).get("promoted")
if promoted and promoted.get("catalog_hash") == self._catalog_hash:
return set(promoted.get("names") or [])
return set()

if len(active_tools) < len(request.tools):
logger.debug(f"Filtered {len(request.tools) - len(active_tools)} deferred tool schema(s) from model binding")
def _hidden(self, state) -> set[str]:
return set(self._deferred) - self._promoted(state)

return request.override(tools=active_tools)
def _filter_tools(self, request: ModelRequest) -> ModelRequest:
if not self._deferred:
return request
hide = self._hidden(request.state)
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Verified against the pinned langchain==1.2.15: state is a declared dataclass field on ModelRequest (model, messages, system_message, tool_choice, tools, response_format, state, runtime, model_settings), so request.state is the supported access path here, not an attribute that can be missing.

The suggested fallback would actually break this path. ModelRequest.runtime is a Runtime, which has no state attribute (its fields are context, store, stream_writer, previous, execution_info, server_info) — so request.runtime.state is what would raise AttributeError. There are also no request.runtime.state usages anywhere in this repo, so the premise that other handlers read state that way doesn't hold here. Keeping request.state.

if not hide:
return request
active = [t for t in request.tools if getattr(t, "name", None) not in hide]
if len(active) < len(request.tools):
logger.debug("Filtered %d deferred tool schema(s) from model binding", len(request.tools) - len(active))
return request.override(tools=active)

def _blocked_tool_message(self, request: ToolCallRequest) -> ToolMessage | None:
from deerflow.tools.builtins.tool_search import get_deferred_registry

registry = get_deferred_registry()
if not registry:
return None

tool_name = str(request.tool_call.get("name") or "")
if not tool_name:
if not self._deferred:
return None

if not registry.contains(tool_name):
name = str(request.tool_call.get("name") or "")
if not name or name not in self._hidden(request.state):
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the _filter_tools thread: on the pinned langgraph==1.1.9, state is a declared dataclass field on ToolCallRequest (tool_call, tool, state, runtime), so request.state is correct. The block path is exercised by test_blocked_message_for_unpromoted_deferred_call plus the end-to-end promotion test, both green in CI.

For completeness: here request.runtime is a ToolRuntime, which does expose .state, so the suggested fallback wouldn't crash on this path — but it's unnecessary, and a getattr(request, "state", None) fallback would silently swallow a real wiring regression, which runs counter to this PR's fail-closed design. Keeping request.state.

return None

tool_call_id = str(request.tool_call.get("id") or "missing_tool_call_id")
return ToolMessage(
content=(f"Error: Tool '{tool_name}' is deferred and has not been promoted yet. Call tool_search first to expose and promote this tool's schema, then retry."),
content=(f"Error: Tool '{name}' is deferred and has not been promoted yet. Call tool_search first to expose and promote this tool's schema, then retry."),
tool_call_id=tool_call_id,
name=tool_name,
name=name,
status="error",
)

Expand Down
27 changes: 27 additions & 0 deletions backend/packages/harness/deerflow/agents/thread_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,32 @@ def merge_todos(existing: list | None, new: list | None) -> list | None:
return new


class PromotedTools(TypedDict):
catalog_hash: str
names: list[str]


def merge_promoted(existing: PromotedTools | None, new: PromotedTools | None) -> PromotedTools | None:
"""Reducer for deferred-tool promotions, scoped by catalog hash.

- new None/empty -> preserve existing (node didn't touch promotions).
- catalog_hash changed -> replace wholesale, dropping stale names (prevents a
persisted bare name from exposing a different tool after catalog drift).
- same catalog_hash -> union names, dedupe, preserve order.
"""
if not new:
return existing
if existing is None or existing.get("catalog_hash") != new["catalog_hash"]:
return {
"catalog_hash": new["catalog_hash"],
"names": list(dict.fromkeys(new["names"])),
}
return {
"catalog_hash": existing["catalog_hash"],
"names": list(dict.fromkeys(existing["names"] + new["names"])),
}


class ThreadState(AgentState):
sandbox: NotRequired[SandboxState | None]
thread_data: NotRequired[ThreadDataState | None]
Expand All @@ -66,3 +92,4 @@ class ThreadState(AgentState):
todos: Annotated[list | None, merge_todos]
uploaded_files: NotRequired[list[dict] | None]
viewed_images: Annotated[dict[str, ViewedImageData], merge_viewed_images] # image_path -> {base64, mime_type}
promoted: Annotated[PromotedTools | None, merge_promoted]
9 changes: 6 additions & 3 deletions backend/packages/harness/deerflow/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_core.runnables import RunnableConfig

from deerflow.agents.lead_agent.agent import _build_middlewares
from deerflow.agents.lead_agent.agent import _assemble_deferred, _build_middlewares
from deerflow.agents.lead_agent.prompt import apply_prompt_template
from deerflow.agents.thread_state import ThreadState
from deerflow.config.agents_config import AGENT_NAME_PATTERN
Expand Down Expand Up @@ -237,19 +237,22 @@ def _ensure_agent(self, config: RunnableConfig):
subagent_enabled = cfg.get("subagent_enabled", False)
max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)

tools = self._get_tools(model_name=model_name, subagent_enabled=subagent_enabled)
final_tools, deferred_setup = _assemble_deferred(tools, enabled=self._app_config.tool_search.enabled)
kwargs: dict[str, Any] = {
# attach_tracing=False because ``stream()`` injects tracing
# callbacks at the graph invocation root so a single embedded run
# produces one trace with correct session_id / user_id propagation.
# Attaching them again on the model would emit duplicate spans.
"model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled, attach_tracing=False),
"tools": self._get_tools(model_name=model_name, subagent_enabled=subagent_enabled),
"middleware": _build_middlewares(config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares),
"tools": final_tools,
"middleware": _build_middlewares(config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares, deferred_setup=deferred_setup),
"system_prompt": apply_prompt_template(
subagent_enabled=subagent_enabled,
max_concurrent_subagents=max_concurrent_subagents,
agent_name=self._agent_name,
available_skills=self._available_skills,
deferred_names=deferred_setup.deferred_names,
),
"state_schema": ThreadState,
}
Expand Down
Loading
Loading