pydantic · DouweM · May 22, 2026 · Apr 4, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/docs/message-history.md b/docs/message-history.md
@@ -334,6 +334,75 @@ print(result2.all_messages())
 """
 ```
 
+## Injecting messages mid-run
+
+Tools, capability hooks, and external code driving an agent run can inject extra
+[`ModelRequestPart`][pydantic_ai.messages.ModelRequestPart]s into the conversation
+mid-run via a pending message queue. Use this when something happens during a run
+that the agent should know about — a tool wants to add follow-up context, an external
+event needs to redirect the agent's plan, or background work needs to reach the agent
+when it completes.
+
+Enqueued parts are bundled into a [`PendingMessage`][pydantic_ai.messages.PendingMessage]
+and drained automatically based on a `priority`:
+
+- `'steering'` (default): drained into the next [`ModelRequest`][pydantic_ai.messages.ModelRequest] before the model call. Use when the new context should influence the agent's *next* step.
+- `'follow_up'`: drained only when the agent would otherwise end. The agent run continues with a new model request that includes the follow-up parts. Use when the agent shouldn't stop while there's still pending work.
+
+### From inside a tool or hook
+
+Use [`RunContext.enqueue`][pydantic_ai.tools.RunContext.enqueue] when you have a
+`RunContext` in scope:
+
+```python {title="enqueue_from_tool.py"}
+from pydantic_ai import Agent, RunContext, SystemPromptPart
+
+agent = Agent('test')
+
+
+@agent.tool
+def trigger_alert(ctx: RunContext[None]) -> str:
+    ctx.enqueue(SystemPromptPart('Alert: production is degraded, prioritize triage.'))
+    return 'alert raised'
+```
+
+The steering message is appended to the agent's message history and is visible to the
+model on the next request, alongside any tool returns from the same step.
+
+### From external code driving `agent.iter()`
+
+Use [`AgentRun.enqueue`][pydantic_ai.run.AgentRun.enqueue] when you're driving a run
+from outside (e.g. forwarding events from a webhook, chat platform, or job queue):
+
+```python {title="enqueue_from_agent_run.py"}
+from pydantic_ai import Agent, UserPromptPart
+
+agent = Agent('test')
+
+
+async def main():
+    async with agent.iter('Start drafting the report') as agent_run:
+        agent_run.enqueue(
+            UserPromptPart('Change of plan: focus on Q3 revenue first.'),
+            priority='steering',
+        )
+        async for _ in agent_run:
+            ...
+```
+
+[`AgentRun.pending_messages`][pydantic_ai.run.AgentRun.pending_messages] exposes the
+current queue for inspection.
+
+!!! info "Limitations"
+    - Follow-up messages need [`Agent.run`][pydantic_ai.agent.AbstractAgent.run] or
+      explicit [`AgentRun.next()`][pydantic_ai.run.AgentRun.next] driving — they
+      aren't drained inside a bare `async for node in agent_run:` loop. Steering
+      messages work in either case.
+    - Inside a [Temporal](durable_execution/temporal.md) workflow, tools run in
+      activities and don't share state with the workflow, so `ctx.enqueue` from a
+      tool doesn't currently propagate back to the run. Enqueue from the workflow
+      context (e.g. via `AgentRun.enqueue`) instead.
+
 ## Processing Message History
 
 Sometimes you may want to modify the message history before it's sent to the model. This could be for privacy

diff --git a/pydantic_ai_slim/pydantic_ai/__init__.py b/pydantic_ai_slim/pydantic_ai/__init__.py
@@ -93,6 +93,8 @@
     PartDeltaEvent,
     PartEndEvent,
     PartStartEvent,
+    PendingMessage,
+    PendingMessagePriority,
     RetryPromptPart,
     SystemPromptPart,
     TextContent,
@@ -232,6 +234,8 @@
     'PartDeltaEvent',
     'PartEndEvent',
     'PartStartEvent',
+    'PendingMessage',
+    'PendingMessagePriority',
     'RetryPromptPart',
     'SystemPromptPart',
     'TextContent',

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -84,6 +84,8 @@ class GraphAgentState:
     """Last-resolved `max_tokens` from model settings, used only in error messages."""
     last_model_request_parameters: models.ModelRequestParameters | None = None
     """Last-resolved model request parameters, used for OTel span attributes."""
+    pending_messages: list[_messages.PendingMessage] = dataclasses.field(default_factory=list[_messages.PendingMessage])
+    """Queue of messages waiting to be injected into the conversation."""
 
     def check_incomplete_tool_call(self) -> None:
         """Raise `IncompleteToolCall` if the last model response was truncated mid-tool-call."""
@@ -1267,6 +1269,7 @@ def build_run_context(ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT
         run_id=ctx.state.run_id,
         metadata=ctx.state.metadata,
         tool_manager=ctx.deps.tool_manager,
+        pending_messages=ctx.state.pending_messages,
     )
     validation_context = build_validation_context(ctx.deps.validation_context, run_context)
     run_context = replace(run_context, validation_context=validation_context)

diff --git a/pydantic_ai_slim/pydantic_ai/_run_context.py b/pydantic_ai_slim/pydantic_ai/_run_context.py
@@ -13,6 +13,7 @@
 from pydantic_ai._instrumentation import DEFAULT_INSTRUMENTATION_VERSION
 
 from . import _utils, messages as _messages
+from .messages import PendingMessage, PendingMessagePriority
 
 if TYPE_CHECKING:
     from .agent.abstract import AbstractAgent
@@ -92,6 +93,14 @@ class RunContext(Generic[RunContextAgentDepsT]):
     `after_model_request`). Currently `None` in tool hooks, output validators,
     and during agent construction.
     """
+    pending_messages: list[PendingMessage] = field(default_factory=list[PendingMessage])
+    """Queue of messages waiting to be injected into the conversation.
+
+    Messages are drained automatically: `'steering'` messages before the next model
+    request, `'follow_up'` messages when the agent would otherwise end.
+
+    Use [`enqueue`][pydantic_ai.tools.RunContext.enqueue] to add messages.
+    """
 
     tool_manager: ToolManager[RunContextAgentDepsT] | None = None
     """The tool manager for the current run step.
@@ -109,6 +118,21 @@ def last_attempt(self) -> bool:
         """Whether this is the last attempt at running this tool before an error is raised."""
         return self.retry == self.max_retries
 
+    def enqueue(
+        self,
+        *parts: _messages.ModelRequestPart,
+        priority: PendingMessagePriority = 'steering',
+    ) -> None:
+        """Enqueue message parts to be injected into the conversation.
+
+        Args:
+            *parts: One or more message parts (e.g. `SystemPromptPart`, `UserPromptPart`).
+            priority: When to inject:
+                `'steering'` (default) — before the next model request.
+                `'follow_up'` — when the agent would otherwise end.
+        """
+        self.pending_messages.append(PendingMessage(parts=parts, priority=priority))
+
     __repr__ = _utils.dataclasses_no_defaults_repr
 
 

diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
@@ -47,7 +47,11 @@
 from .._output import OutputToolset
 from .._template import TemplateStr, validate_from_spec_args
 from ..builtin_tools import AbstractBuiltinTool
-from ..capabilities import AbstractCapability, CombinedCapability
+from ..capabilities import (
+    AbstractCapability,
+    CombinedCapability,
+    PendingMessageDrainCapability,
+)
 from ..capabilities._ordering import has_capability_type
 from ..capabilities._tool_search import ToolSearch as ToolSearchCap
 from ..capabilities.builtin_tool import BuiltinTool as BuiltinToolCap
@@ -1157,6 +1161,7 @@ def _merged_meta(ctx: RunContext[AgentDepsT]) -> dict[str, Any]:
             messages=state.message_history,
             tracer=tracer,
             run_step=0,
+            pending_messages=state.pending_messages,
         )
 
         # Determine root capability: override > agent default
@@ -2665,7 +2670,10 @@ async def run_mcp_servers(
 )
 """AgentSpec fields that are not supported at run/override time."""
 
-_AUTO_INJECT_CAPABILITY_TYPES: tuple[type[AbstractCapability[Any]], ...] = (ToolSearchCap,)
+_AUTO_INJECT_CAPABILITY_TYPES: tuple[type[AbstractCapability[Any]], ...] = (
+    ToolSearchCap,
+    PendingMessageDrainCapability,
+)
 """Infrastructure capabilities auto-injected when not already present."""
 
 

diff --git a/pydantic_ai_slim/pydantic_ai/capabilities/__init__.py b/pydantic_ai_slim/pydantic_ai/capabilities/__init__.py
@@ -1,5 +1,6 @@
 from typing import Any
 
+from ._pending_messages import PendingMessageDrainCapability
 from .abstract import (
     AbstractCapability,
     AgentNode,
@@ -64,6 +65,7 @@
 
 __all__ = [
     'AbstractCapability',
+    'PendingMessageDrainCapability',
     'AgentNode',
     'CapabilityOrdering',
     'CapabilityPosition',

diff --git a/pydantic_ai_slim/pydantic_ai/capabilities/_pending_messages.py b/pydantic_ai_slim/pydantic_ai/capabilities/_pending_messages.py
@@ -0,0 +1,92 @@
+"""Auto-injected capability that drains the pending message queue at appropriate times."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+from pydantic_ai.capabilities.abstract import AbstractCapability, CapabilityOrdering
+from pydantic_ai.messages import ModelRequest, PendingMessage, PendingMessagePriority
+from pydantic_ai.tools import RunContext
+
+if TYPE_CHECKING:
+    from pydantic_ai import _agent_graph
+    from pydantic_ai.models import ModelRequestContext
+    from pydantic_ai.result import FinalResult
+    from pydantic_graph import End
+
+
+def _drain_by_priority(
+    queue: list[PendingMessage],
+    priority: PendingMessagePriority,
+) -> list[PendingMessage]:
+    """Remove and return all messages with the given priority from the queue."""
+    drained: list[PendingMessage] = []
+    remaining: list[PendingMessage] = []
+    for msg in queue:
+        if msg.priority == priority:
+            drained.append(msg)
+        else:
+            remaining.append(msg)
+    queue[:] = remaining
+    return drained
+
+
+class PendingMessageDrainCapability(AbstractCapability[Any]):
+    """Drains the pending message queue at appropriate times.
+
+    - Steering messages are injected before each model request.
+    - Follow-up messages are injected when the agent would otherwise end,
+      redirecting to a new ModelRequestNode to continue the conversation.
+
+    This capability is always auto-injected and placed outermost via
+    [`CapabilityOrdering`][pydantic_ai.capabilities.abstract.CapabilityOrdering]
+    so it wraps around other capabilities. This ensures steering messages are
+    drained into the model request before user capabilities see it, and follow-up
+    redirection runs after all other `after_node_run` hooks (which run in reverse).
+    """
+
+    def get_ordering(self) -> CapabilityOrdering:
+        # Outermost so steering messages are drained into the request before other
+        # capabilities see it, and follow-up redirection runs after all other
+        # after_node_run hooks (which run in reverse order).
+        return CapabilityOrdering(position='outermost')
+
+    async def before_model_request(
+        self,
+        ctx: RunContext[Any],
+        request_context: ModelRequestContext,
+    ) -> ModelRequestContext:
+        """Drain steering messages into the model request.
+
+        Appends to both `request_context.messages` (so the model sees them in this
+        request) and `ctx.messages` (so they persist in the agent's message history).
+        """
+        drained = _drain_by_priority(ctx.pending_messages, 'steering')
+        if drained:
+            parts = [part for msg in drained for part in msg.parts]
+            steering_request = ModelRequest(parts=parts)
+            request_context.messages.append(steering_request)
+            ctx.messages.append(steering_request)
+        return request_context
+
+    async def after_node_run(
+        self,
+        ctx: RunContext[Any],
+        *,
+        node: _agent_graph.AgentNode[Any, Any],
+        result: _agent_graph.AgentNode[Any, Any] | End[FinalResult[Any]],
+    ) -> _agent_graph.AgentNode[Any, Any] | End[FinalResult[Any]]:
+        """Drain follow-up messages when the agent would otherwise end."""
+        from pydantic_ai._agent_graph import ModelRequestNode
+        from pydantic_graph import End
+
+        if not isinstance(result, End):
+            return result
+
+        follow_ups = _drain_by_priority(ctx.pending_messages, 'follow_up')
+        if not follow_ups:
+            return result
+
+        parts = [part for msg in follow_ups for part in msg.parts]
+        request = ModelRequest(parts=parts)
+        return ModelRequestNode(request=request)
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -2036,6 +2036,38 @@ def provider_request_id(self) -> str | None:
 ModelMessage = Annotated[ModelRequest | ModelResponse, pydantic.Discriminator('kind')]
 """Any message sent to or returned by a model."""
 
+
+PendingMessagePriority = Literal['steering', 'follow_up']
+"""Priority level for a pending message.
+
+- `'steering'`: Drained into the next model request (before the model call).
+- `'follow_up'`: Drained only when the agent would otherwise end, preventing
+    premature termination while follow-up work is pending.
+"""
+
+
+@dataclass
+class PendingMessage:
+    """A message queued for injection into the agent conversation.
+
+    Pending messages are enqueued via [`RunContext.enqueue`][pydantic_ai.tools.RunContext.enqueue]
+    or [`AgentRun.enqueue`][pydantic_ai.run.AgentRun.enqueue] and are
+    automatically drained at the appropriate time during the agent run.
+    """
+
+    parts: Sequence[ModelRequestPart]
+    """The message parts to inject."""
+
+    _: KW_ONLY
+
+    priority: PendingMessagePriority = 'steering'
+    """When to drain this message:
+
+    - `'steering'`: injected before the next model request.
+    - `'follow_up'`: injected only when the agent would otherwise finish.
+    """
+
+
 ModelMessagesTypeAdapter = pydantic.TypeAdapter(
     list[ModelMessage], config=pydantic.ConfigDict(defer_build=True, ser_json_bytes='base64', val_json_bytes='base64')
 )

diff --git a/pydantic_ai_slim/pydantic_ai/run.py b/pydantic_ai_slim/pydantic_ai/run.py
@@ -394,6 +394,30 @@ def run_id(self) -> str:
         """The unique identifier for the agent run."""
         return self._graph_run.state.run_id
 
+    @property
+    def pending_messages(self) -> list[_messages.PendingMessage]:
+        """Queue of messages waiting to be injected into the conversation.
+
+        Messages are drained automatically: `'steering'` messages before the next model
+        request, `'follow_up'` messages when the agent would otherwise end.
+        """
+        return self._graph_run.state.pending_messages
+
+    def enqueue(
+        self,
+        *parts: _messages.ModelRequestPart,
+        priority: _messages.PendingMessagePriority = 'steering',
+    ) -> None:
+        """Enqueue message parts to be injected into the conversation.
+
+        Args:
+            *parts: One or more message parts (e.g. `SystemPromptPart`, `UserPromptPart`).
+            priority: When to inject:
+                `'steering'` (default) — before the next model request.
+                `'follow_up'` — when the agent would otherwise end.
+        """
+        self._graph_run.state.pending_messages.append(_messages.PendingMessage(parts=parts, priority=priority))
+
     def __repr__(self) -> str:  # pragma: no cover
         result = self._graph_run.output
         result_repr = '<run not finished>' if result is None else repr(result.output)