diff --git a/README.md b/README.md index 4d4e151..f8b975e 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ Extras for specific capabilities: ```bash uv add "pydantic-ai-harness[codemode]" # CodeMode (adds the Monty sandbox) +uv add "pydantic-ai-harness[logfire]" # ManagedPrompt (Logfire-managed prompts) ``` The `code-mode` extra is also supported as an alias. diff --git a/pydantic_ai_harness/__init__.py b/pydantic_ai_harness/__init__.py index 4f6f62d..e51895a 100644 --- a/pydantic_ai_harness/__init__.py +++ b/pydantic_ai_harness/__init__.py @@ -5,9 +5,10 @@ if TYPE_CHECKING: from .code_mode import CodeMode from .filesystem import FileSystem + from .logfire import ManagedPrompt from .shell import Shell -__all__ = ['CodeMode', 'FileSystem', 'Shell'] +__all__ = ['CodeMode', 'FileSystem', 'ManagedPrompt', 'Shell'] def __getattr__(name: str) -> object: @@ -15,11 +16,15 @@ def __getattr__(name: str) -> object: from .code_mode import CodeMode return CodeMode - elif name == 'FileSystem': + if name == 'FileSystem': from .filesystem import FileSystem return FileSystem - elif name == 'Shell': + if name == 'ManagedPrompt': + from .logfire import ManagedPrompt + + return ManagedPrompt + if name == 'Shell': from .shell import Shell return Shell diff --git a/pydantic_ai_harness/logfire/README.md b/pydantic_ai_harness/logfire/README.md new file mode 100644 index 0000000..7612301 --- /dev/null +++ b/pydantic_ai_harness/logfire/README.md @@ -0,0 +1,204 @@ +# Logfire-backed capabilities + +Drive agent configuration from [Logfire managed variables](https://logfire.pydantic.dev/docs/reference/advanced/managed-variables/), +so you can iterate on it from the Logfire UI -- versioned, labelled, and rolled out -- without redeploying. + +Install the extra: + +```bash +pip install 'pydantic-ai-harness[logfire]' +``` + +## `ManagedPrompt` + +Back an agent's instructions with a Logfire-managed +[Prompt](https://logfire.pydantic.dev/docs/reference/advanced/prompt-management/). + +> A broader, first-party `Managed` capability is in flight in +> [pydantic-ai#5107](https://github.com/pydantic/pydantic-ai/pull/5107) and will eventually be +> importable as `pydantic_ai.managed.logfire.Managed` -- covering instructions, model settings, +> and whole-spec variables. Until then, `ManagedPrompt` is the supported path for backing +> instructions with a Logfire-managed prompt. + +### The problem + +Prompts are critical to agent behavior, but iterating on them through the normal +edit → review → deploy loop is slow, and you can't easily A/B test a change or roll it +back the moment it misbehaves in production. + +### The solution + +`ManagedPrompt` declares the backing managed variable for you and resolves it **once per +run**, feeding the value into the agent's instructions. The resolution happens inside the +run's `wrap_run` hook using the +[`ResolvedVariable`](https://logfire.pydantic.dev/docs/reference/advanced/managed-variables/) +as a context manager that stays open for the whole run -- so the selected label and version +are attached as baggage to every child span of the agent run. You get a direct correlation +between a run's behavior and the exact prompt version that produced it, plus instant +iteration and rollback from the Logfire UI. + +### Usage + +Pass the prompt name and a default value. The name `support_agent` is declared as the managed +variable `prompt__support_agent` -- the naming Logfire's Prompt management uses (hyphens in a +name become underscores). The default keeps the agent working until a remote value is published. + +```python +import logfire +from pydantic_ai import Agent + +from pydantic_ai_harness.logfire import ManagedPrompt + +logfire.configure() + +agent = Agent( + 'openai:gpt-5', + capabilities=[ + ManagedPrompt( + 'support_agent', + default='You are a helpful customer support agent. Be friendly and concise.', + label='production', + ) + ], +) + +result = agent.run_sync('My order never arrived.') +print(result.output) +``` + +### Targeting + +For deterministic A/B assignment (the same user always sees the same label), pass a +`targeting_key`. It can be a static string or a callable that derives the key from the +[`RunContext`](https://ai.pydantic.dev/api/tools/#pydantic_ai.tools.RunContext) -- handy +when the key lives in your agent's `deps`: + +```python +from dataclasses import dataclass + +from pydantic_ai import Agent + +from pydantic_ai_harness.logfire import ManagedPrompt + + +@dataclass +class Deps: + user_id: str + + +agent = Agent( + 'openai:gpt-5', + deps_type=Deps, + capabilities=[ + ManagedPrompt( + 'support_agent', + default='You are a helpful customer support agent.', + targeting_key=lambda ctx: ctx.deps.user_id, + ), + ], +) +``` + +Pass `attributes` (or a callable returning them) for condition-based targeting rules. +When `label` is omitted, the variable's rollout and targeting rules pick the label; +when both `targeting_key` and `attributes` are omitted, Logfire falls back to its own +targeting context and then to the active trace id. + +### Templating with deps + +By default the resolved prompt is used verbatim. Pass `render_template=True` to render it as a +Handlebars template against the agent's `deps` — the same mechanism as +[`TemplateStr`](https://ai.pydantic.dev/api/#pydantic_ai.TemplateStr) — so `{{field}}` is filled +from `deps`: + +```python +from dataclasses import dataclass + +from pydantic_ai import Agent + +from pydantic_ai_harness.logfire import ManagedPrompt + + +@dataclass +class Deps: + customer_name: str + + +agent = Agent( + 'openai:gpt-5', + deps_type=Deps, + capabilities=[ + ManagedPrompt( + 'support_agent', + default='You are helping {{customer_name}}. Be friendly and concise.', + render_template=True, + ), + ], +) +``` + +Rendering requires `pydantic-handlebars` (install `pydantic-ai-slim[spec]`). It is off by default. + +### Prompt-cache trade-off + +The resolved value lands in the agent's **system instructions**. Provider prompt caches (Anthropic, +OpenAI, etc.) key strictly by prefix -- `tools → system → messages` -- so any change to the system +block invalidates the cached prefix for the affected runs. + +| Mode | Cache impact | +| --- | --- | +| Pinned `label='production'`, no rollout split | **Cache-stable.** The value only changes on a deliberate prompt rollout, which is the same cost as a redeploy. | +| Percentage rollout across labels (no `label=`) | Different runs land on different labels → splits the cache into one lane per label. | +| `targeting_key` per user/tenant with multiple labels in play | Cache lanes per assigned label; deterministic per key but still N lanes overall. | +| Mid-traffic label flip in the Logfire UI | One-shot cold-invalidation for everyone on that label. | + +In short: pinning a `label` keeps the cache hot; using `ManagedPrompt` as an A/B platform is opt-in +cache cost. If you don't need rollouts, `label='production'` is the recommended default. + +### Using your own variable + +Declaring the same name more than once is fine -- each `ManagedPrompt` builds its own backing +variable, so sharing a prompt across several agents just works. Pass an existing +[`logfire.variables.Variable`](https://logfire.pydantic.dev/docs/reference/advanced/managed-variables/) +as the first argument instead of a name when you want to declare the variable yourself -- +for example a `template_var`, or one registered for `variables_push`: + +```python +import logfire +from pydantic_ai import Agent + +from pydantic_ai_harness.logfire import ManagedPrompt + +logfire.configure() + +support_prompt = logfire.var( + name='prompt__support_agent', + type=str, + default='You are a helpful customer support agent. Be friendly and concise.', +) + +agent = Agent('openai:gpt-5', capabilities=[ManagedPrompt(support_prompt, label='production')]) +``` + +When `name` is a prompt name, pass `logfire_instance=` to declare the variable on a specific +Logfire instance instead of the module-level default. + +### Notes + +- The prompt resolves to a `str`. By default it's used verbatim; set `render_template=True` + to render `{{...}}` against `deps` (see [Templating with deps](#templating-with-deps)). +- Resolution is isolated per run via a context variable, so a single capability instance + is safe to share across concurrent runs. +- `ManagedPrompt.resolved` exposes the active run's `ResolvedVariable` (value, label, version, + reason) for inspection -- e.g. from inside a tool. +- The capability runs outermost (wrapping `Instrumentation`) so the resolved variable's baggage + covers the agent run span as well as its children. On recent Logfire versions both the + selected label and the version are propagated as separate baggage attributes. +- Resolution happens **once per run**. A label flip or rollout change that lands in Logfire + mid-run is not picked up until the next run starts -- the trade-off for run-stable + instructions and a single baggage scope across all child spans. +- For Logfire-side targeting that lives outside the agent (e.g. set once per request handler), + use Logfire's + [`targeting_context`](https://logfire.pydantic.dev/docs/reference/advanced/managed-variables/) + in an outer scope; `ManagedPrompt` only needs `targeting_key`/`attributes` when the key + comes from the agent's `RunContext`. diff --git a/pydantic_ai_harness/logfire/__init__.py b/pydantic_ai_harness/logfire/__init__.py new file mode 100644 index 0000000..6da79de --- /dev/null +++ b/pydantic_ai_harness/logfire/__init__.py @@ -0,0 +1,5 @@ +"""Logfire-backed capabilities: drive agent configuration from Logfire managed variables.""" + +from pydantic_ai_harness.logfire._managed_prompt import ManagedPrompt + +__all__ = ['ManagedPrompt'] diff --git a/pydantic_ai_harness/logfire/_managed_prompt.py b/pydantic_ai_harness/logfire/_managed_prompt.py new file mode 100644 index 0000000..32afcf3 --- /dev/null +++ b/pydantic_ai_harness/logfire/_managed_prompt.py @@ -0,0 +1,210 @@ +"""Back an agent's instructions with a Logfire-managed prompt.""" + +from __future__ import annotations + +import warnings +from collections.abc import Callable, Mapping +from contextvars import ContextVar +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + +import logfire +from logfire.variables import Variable +from pydantic_ai import TemplateStr +from pydantic_ai.capabilities import AbstractCapability, CapabilityOrdering, Instrumentation +from pydantic_ai.tools import AgentDepsT, RunContext + +if TYPE_CHECKING: + from logfire import Logfire + from logfire.variables import ResolvedVariable + from pydantic_ai.capabilities.abstract import WrapRunHandler + from pydantic_ai.run import AgentRunResult + + +# Logfire exposes a managed prompt with slug `` as a variable named `prompt__`, +# with hyphens replaced by underscores (see the Logfire prompt-management docs). `prompt__` +# is reserved for these system-managed prompts. +_PROMPT_VARIABLE_PREFIX = 'prompt__' + + +def _new_resolved_var() -> ContextVar[ResolvedVariable[str] | None]: + # `None` means nothing has been resolved for the active run. + return ContextVar('managed_prompt_resolved', default=None) + + +@dataclass +class ManagedPrompt(AbstractCapability[AgentDepsT]): + """Back an agent's instructions with a Logfire-managed prompt. + + **Prompt-cache trade-off:** the resolved value lands in the system instructions block, so any + Logfire-side change to the prompt (new version rollout, label flip, A/B targeting) invalidates + the provider's prompt cache for the affected runs. Pin a `label` (e.g. `'production'`) for the + cache-stable path; treat percentage rollouts and per-user targeting as opt-in cache cost. See + the README's "Prompt-cache trade-off" section for the full picture. + + Pass the managed prompt name and a default value and the capability declares the backing + [managed variable](https://logfire.pydantic.dev/docs/reference/advanced/managed-variables/) + for you -- a name of `support_agent` resolves the variable `prompt__support_agent`, matching + the naming Logfire's [Prompt management](https://logfire.pydantic.dev/docs/reference/advanced/prompt-management/) + uses. You can iterate on the prompt from the Logfire UI -- versioned, labelled, and rolled + out -- without redeploying, while the code default keeps the agent working when no remote + value is available. + + ```python + import logfire + from pydantic_ai import Agent + + from pydantic_ai_harness.logfire import ManagedPrompt + + logfire.configure() + + agent = Agent( + 'openai:gpt-5', + capabilities=[ + ManagedPrompt( + 'support_agent', + default='You are a helpful customer support agent. Be friendly and concise.', + label='production', + ) + ], + ) + result = agent.run_sync('My order never arrived.') + ``` + + The prompt value is resolved **once per run**, inside the run's + [`wrap_run`][pydantic_ai.capabilities.AbstractCapability.wrap_run] hook, using the + [`ResolvedVariable`][logfire.variables.ResolvedVariable] as a context manager that stays open for the + whole run -- so the selected label and version are attached as baggage to every child span + of the agent run. + + Declaring the same name more than once is fine -- each `ManagedPrompt` constructs its own + backing variable, so sharing a prompt across several agents just works. Pass an existing + [`logfire.variables.Variable`][logfire.variables.Variable] as `name` instead of a prompt name + when you want to use a variable you defined yourself (for example a `template_var`, or one + registered for [`variables_push`][logfire.Logfire.variables_push]). + """ + + name: str | Variable[str] + """The managed prompt name (declared as the variable `prompt__`), or a pre-built `logfire.Variable`.""" + + default: str | None = None + """Code-default prompt text. Required when `name` is a prompt name; ignored when `name` is a `Variable`.""" + + label: str | None = None + """Explicit targeting label on the Logfire managed prompt to resolve (e.g. `'production'`). + When `None`, the targeting rules on the managed variable select the label.""" + + targeting_key: str | Callable[[RunContext[AgentDepsT]], str | None] | None = None + """Stable key that seeds Logfire's deterministic rollout assignment -- the same key always + lands in the same percentage bucket, so a given user keeps the same label across runs. + Accepts a static value or a callable that derives it from the + [`RunContext`][pydantic_ai.tools.RunContext]. When `None`, Logfire falls back to its own + targeting context and then the active trace id.""" + + attributes: Mapping[str, Any] | Callable[[RunContext[AgentDepsT]], Mapping[str, Any] | None] | None = None + """Attributes for condition-based targeting rules, or a callable that derives them + from the [`RunContext`][pydantic_ai.tools.RunContext].""" + + render_template: bool = False + """When `True`, render the resolved prompt as a Handlebars template against the agent's + `deps` (the same mechanism as [`TemplateStr`][pydantic_ai.TemplateStr]); `{{field}}` is + filled from `deps`. Requires `pydantic-handlebars` (install `pydantic-ai-slim[spec]`). + Defaults to `False`, so the resolved prompt is used verbatim.""" + + logfire_instance: Logfire | None = None + """Logfire instance to resolve the variable on. When `None`, the global default instance + (the one backing the module-level [`logfire.var`][logfire.var]) is used. Ignored when + `name` is a `Variable`.""" + + _variable: Variable[str] = field(init=False, repr=False, compare=False) + """The managed variable backing the prompt (declared from the slug, or the one passed in).""" + + _resolved: ContextVar[ResolvedVariable[str] | None] = field( + default_factory=_new_resolved_var, init=False, repr=False, compare=False + ) + """Per-run resolution, isolated across concurrent runs via the context variable.""" + + def __post_init__(self) -> None: + if not isinstance(self.name, str): + if self.logfire_instance is not None: + warnings.warn( + '`logfire_instance` is ignored when `name` is a `Variable`; ' + 'the variable already carries its own Logfire instance.', + stacklevel=2, + ) + self._variable = self.name + return + + if self.default is None: + raise TypeError('`default` is required when `name` is a prompt name rather than a `Variable`.') + + # Strip the prefix if the user accidentally passed it so we can still apply + # hyphen-to-underscore normalization, then re-add the prefix below. + name = self.name + if name.startswith(_PROMPT_VARIABLE_PREFIX): + warnings.warn( + f'The {_PROMPT_VARIABLE_PREFIX!r} prefix is added automatically; ' + f'pass the bare prompt name rather than {name!r}.', + stacklevel=2, + ) + name = name[len(_PROMPT_VARIABLE_PREFIX) :] + + variable_name = f'{_PROMPT_VARIABLE_PREFIX}{name.replace("-", "_")}' + if not variable_name.isidentifier(): + raise ValueError( + f'Prompt name {self.name!r} produces an invalid variable name {variable_name!r}; ' + 'names may only contain letters, digits, hyphens, and underscores.' + ) + + # Construct the variable directly (rather than via `logfire.var`) so redeclaring the + # same name is idempotent: `logfire.var` registers in a per-instance registry and raises + # on a duplicate name, which would break sharing one prompt across agents. + instance = self.logfire_instance if self.logfire_instance is not None else logfire.DEFAULT_LOGFIRE_INSTANCE + self._variable = Variable(variable_name, type=str, default=self.default, logfire_instance=instance) + + @property + def resolved(self) -> ResolvedVariable[str] | None: + """The prompt resolution for the active run, or `None` outside a run. + + Exposes the full [`ResolvedVariable`][logfire.variables.ResolvedVariable] (`value`, `label`, + `version`, `reason`, ...) so callers can inspect which prompt version is in play. + """ + return self._resolved.get() + + def get_ordering(self) -> CapabilityOrdering: + """Run outermost so the prompt's baggage envelops the whole run, including the run span.""" + return CapabilityOrdering(position='outermost', wraps=[Instrumentation]) + + def get_instructions(self) -> Callable[[RunContext[AgentDepsT]], str | None]: + """Provide the resolved prompt to the agent's system prompt.""" + + def instructions(ctx: RunContext[AgentDepsT]) -> str | None: + resolved = self.resolved + if resolved is None: + # No active run -- contribute no instructions. + return None + if self.render_template: + return TemplateStr[AgentDepsT](resolved.value).render(ctx.deps) + return resolved.value + + return instructions + + async def wrap_run(self, ctx: RunContext[AgentDepsT], *, handler: WrapRunHandler) -> AgentRunResult[Any]: + """Resolve the prompt once and keep its baggage active for the duration of the run.""" + if callable(self.targeting_key): + targeting_key = self.targeting_key(ctx) + else: + targeting_key = self.targeting_key + + if callable(self.attributes): + attributes = self.attributes(ctx) + else: + attributes = self.attributes + + resolved = self._variable.get(targeting_key=targeting_key, attributes=attributes, label=self.label) + with resolved: + token = self._resolved.set(resolved) + try: + return await handler() + finally: + self._resolved.reset(token) diff --git a/pyproject.toml b/pyproject.toml index aac91c5..7ca3b76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,10 @@ temporal = [ dbos = [ 'pydantic-ai-slim[dbos]', ] +logfire = [ + 'logfire>=4.31.0', + 'pydantic-ai-slim[spec]>=1.95.1', +] [project.urls] Homepage = 'https://github.com/pydantic/pydantic-ai-harness' @@ -63,8 +67,9 @@ dev = [ 'pytest-anyio', 'coverage>=7.10.7', 'logfire[httpx]>=4.31.0', - "dirty-equals>=0.9.0", - "inline-snapshot>=0.32.5", + 'dirty-equals>=0.9.0', + 'inline-snapshot>=0.32.5', + 'pydantic-ai-slim[spec]>=1.95.1', "pytest-examples>=0.0.18", ] lint = [ @@ -113,8 +118,10 @@ quote-style = 'single' pythonVersion = '3.10' typeCheckingMode = 'strict' exclude = ['template', '.venv', 'mutants'] +# `reportUnusedFunction` is disabled for tests because fixtures and `@agent.tool_plain` +# helpers are registered via decorators and never referenced by name (matches pydantic-ai). executionEnvironments = [ - { root = 'tests', reportPrivateUsage = false }, + { root = 'tests', reportPrivateUsage = false, reportUnusedFunction = false }, ] [tool.pytest.ini_options] diff --git a/tests/logfire_variables/__init__.py b/tests/logfire_variables/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/logfire_variables/test_managed_prompt.py b/tests/logfire_variables/test_managed_prompt.py new file mode 100644 index 0000000..7d2b623 --- /dev/null +++ b/tests/logfire_variables/test_managed_prompt.py @@ -0,0 +1,464 @@ +"""Tests for the `ManagedPrompt` capability (source package `pydantic_ai_harness.logfire`). + +This directory is deliberately named `logfire_variables`, not `logfire`, even though it +tests the `pydantic_ai_harness.logfire` package. The pyright config scopes test-only report +overrides with `executionEnvironments = [{ root = 'tests' }]`, which makes `tests/` an import +root -- so a `tests/logfire/` directory would shadow the third-party `logfire` package for +every test file's `import logfire`. Keeping the directory off that name avoids the collision. + +Style follows `tests/code_mode/test_code_mode.py`: module-level +`pytestmark = pytest.mark.anyio` and an `anyio_backend` fixture. All resolution runs +against the code default (no Logfire provider is configured), which is exactly the +safety-net behavior `ManagedPrompt` relies on. Each test uses a unique slug because the +default Logfire instance keeps its variable registry across `configure()` calls. +""" + +from __future__ import annotations + +from collections.abc import Generator +from contextlib import contextmanager +from dataclasses import dataclass +from typing import Any +from unittest.mock import patch + +import logfire +import pytest +from inline_snapshot import snapshot +from logfire.testing import CaptureLogfire +from logfire.variables import LabeledValue, Rollout, VariableConfig, VariablesConfig +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from pydantic_ai import Agent, RunContext +from pydantic_ai.capabilities import Instrumentation +from pydantic_ai.messages import ModelMessage, ModelRequest +from pydantic_ai.models.test import TestModel +from pydantic_ai.usage import RunUsage + +from pydantic_ai_harness import ManagedPrompt +from pydantic_ai_harness.logfire import ManagedPrompt as ManagedPromptFromPackage + +pytestmark = pytest.mark.anyio + +DEFAULT = 'You are a helpful assistant.' + + +@pytest.fixture(autouse=True, scope='module') +def _configure_logfire() -> None: + """Configure Logfire once so variable resolution does not warn (warnings are errors).""" + logfire.configure(send_to_logfire=False, console=False) + + +@pytest.fixture +def anyio_backend() -> str: + return 'asyncio' + + +def instructions_seen(result_messages: list[ModelMessage]) -> list[str]: + """Collect the rendered instructions from each `ModelRequest` in a run.""" + return [m.instructions for m in result_messages if isinstance(m, ModelRequest) and m.instructions is not None] + + +# Span attributes whose values vary between runs (random ids, line numbers, the +# resolution span's merged-into-attributes JSON blob from Logfire) and would otherwise +# make snapshots non-deterministic. `attributes` here is the literal key Logfire emits +# on the resolve span containing the serialized targeting attributes -- it shadows the +# enclosing span attributes dict by name, so the pop targets the inner one. +_VOLATILE_SPAN_ATTRIBUTES = ( + 'attributes', + 'code.lineno', + 'gen_ai.conversation.id', + 'gen_ai.agent.call.id', +) + + +@contextmanager +def _variables_provider_configured(capfire: CaptureLogfire, variables_config: VariablesConfig) -> Generator[None]: + """Reconfigure Logfire with a local variables provider for the duration of the block. + + Restores the module's baseline configuration on exit so the change does not leak + into other tests in this module (or any module collected after it). + """ + logfire.configure( + send_to_logfire=False, + console=False, + variables=logfire.LocalVariablesOptions(config=variables_config), + additional_span_processors=[SimpleSpanProcessor(capfire.exporter)], + ) + try: + yield + finally: + logfire.configure(send_to_logfire=False, console=False) + + +def span_attributes(capfire: CaptureLogfire) -> list[dict[str, Any]]: + """Each exported span as `{name, attributes}`, with volatile attributes dropped. + + Names identify which span the attributes belong to; everything else (ids, timing, + parentage) is omitted to keep the snapshots focused and stable. + """ + result: list[dict[str, Any]] = [] + for span in capfire.exporter.exported_spans_as_dict(): + attributes = span['attributes'] + for key in _VOLATILE_SPAN_ATTRIBUTES: + attributes.pop(key, None) + result.append({'name': span['name'], 'attributes': attributes}) + return result + + +def test_public_reexport() -> None: + assert ManagedPrompt is ManagedPromptFromPackage + + +def test_slug_becomes_prompt_variable_name() -> None: + capability = ManagedPrompt('support_agent', default=DEFAULT) + assert capability._variable.name == 'prompt__support_agent' + + +def test_hyphenated_slug_is_normalized() -> None: + capability = ManagedPrompt('welcome-email', default=DEFAULT) + assert capability._variable.name == 'prompt__welcome_email' + + +def test_slug_requires_default() -> None: + with pytest.raises(TypeError, match='`default` is required'): + ManagedPrompt('no_default_slug') + + +def test_explicit_logfire_instance_is_used() -> None: + capability = ManagedPrompt('with_instance', default=DEFAULT, logfire_instance=logfire.DEFAULT_LOGFIRE_INSTANCE) + assert capability._variable.name == 'prompt__with_instance' + + +def test_duplicate_slug_is_allowed() -> None: + # Each ManagedPrompt builds its own backing variable, so the same slug can be declared + # repeatedly (e.g. shared across agents) without the duplicate-registration error + # `logfire.var` would raise. + first = ManagedPrompt('shared_slug', default=DEFAULT) + second = ManagedPrompt('shared_slug', default=DEFAULT) + assert first._variable.name == second._variable.name == 'prompt__shared_slug' + + +def test_prompt_prefix_in_slug_warns_and_is_stripped() -> None: + with pytest.warns(UserWarning, match='added automatically'): + capability = ManagedPrompt('prompt__already_prefixed', default=DEFAULT) + assert capability._variable.name == 'prompt__already_prefixed' + + +def test_invalid_slug_raises() -> None: + with pytest.raises(ValueError, match='invalid variable name'): + ManagedPrompt('has spaces', default=DEFAULT) + + +async def test_resolves_default_into_instructions() -> None: + agent = Agent(TestModel(), capabilities=[ManagedPrompt('default_slug', default=DEFAULT)]) + + result = await agent.run('hello') + + assert instructions_seen(result.all_messages()) == [DEFAULT] + + +async def test_accepts_prebuilt_variable() -> None: + var = logfire.var(name='prompt__prebuilt', type=str, default=DEFAULT) + agent = Agent(TestModel(), capabilities=[ManagedPrompt(var)]) + + result = await agent.run('hello') + + assert instructions_seen(result.all_messages()) == [DEFAULT] + + +async def test_override_is_reflected() -> None: + capability = ManagedPrompt('override_slug', default=DEFAULT) + agent = Agent(TestModel(), capabilities=[capability]) + + with capability._variable.override('Be terse.'): + result = await agent.run('hello') + + assert instructions_seen(result.all_messages()) == ['Be terse.'] + + +async def test_records_variable_resolution_span(capfire: CaptureLogfire) -> None: + agent = Agent(TestModel(), capabilities=[ManagedPrompt('span_slug', default=DEFAULT)]) + + await agent.run('hello') + + # Without `Instrumentation` the only span is the one Logfire records for resolving the + # prompt variable -- the resolved value, label, version, and reason are captured as attributes. + assert span_attributes(capfire) == snapshot( + [ + { + 'name': 'Resolve variable prompt__span_slug', + 'attributes': { + 'code.filepath': '_managed_prompt.py', + 'code.function': 'wrap_run', + 'targeting_key': 'null', + 'logfire.msg_template': 'Resolve variable prompt__span_slug', + 'logfire.msg': 'Resolve variable prompt__span_slug', + 'logfire.span_type': 'span', + 'name': 'prompt__span_slug', + 'value': '"You are a helpful assistant."', + 'label': 'null', + 'version': 'null', + 'reason': 'no_provider', + 'logfire.json_schema': '{"type":"object","properties":{"name":{},"targeting_key":{"type":"null"},"attributes":{"type":"object"},"value":{},"label":{"type":"null"},"version":{"type":"null"},"reason":{}}}', + }, + } + ] + ) + + +async def test_baggage_propagates_to_run_and_child_spans(capfire: CaptureLogfire) -> None: + # `Instrumentation` produces the agent run / model request / tool spans; `ManagedPrompt` + # runs outermost so its `logfire.variables.prompt__baggage_slug` baggage lands on all of them. + # The resolution span itself precedes the open baggage context, so it carries no baggage attribute. + agent = Agent( + TestModel(), + capabilities=[ManagedPrompt('baggage_slug', default=DEFAULT), Instrumentation()], + ) + + @agent.tool_plain + def noop() -> str: + return 'ok' + + await agent.run('hello') + + assert span_attributes(capfire) == snapshot( + [ + { + 'name': 'Resolve variable prompt__baggage_slug', + 'attributes': { + 'code.filepath': '_managed_prompt.py', + 'code.function': 'wrap_run', + 'targeting_key': 'null', + 'logfire.msg_template': 'Resolve variable prompt__baggage_slug', + 'logfire.msg': 'Resolve variable prompt__baggage_slug', + 'logfire.span_type': 'span', + 'name': 'prompt__baggage_slug', + 'value': '"You are a helpful assistant."', + 'label': 'null', + 'version': 'null', + 'reason': 'no_provider', + 'logfire.json_schema': '{"type":"object","properties":{"name":{},"targeting_key":{"type":"null"},"attributes":{"type":"object"},"value":{},"label":{"type":"null"},"version":{"type":"null"},"reason":{}}}', + }, + }, + { + 'name': 'chat test', + 'attributes': { + 'gen_ai.operation.name': 'chat', + 'gen_ai.provider.name': 'test', + 'gen_ai.system': 'test', + 'gen_ai.request.model': 'test', + 'model_request_parameters': '{"function_tools":[{"name":"noop","parameters_json_schema":{"additionalProperties":false,"properties":{},"type":"object"},"description":null,"outer_typed_dict_key":null,"strict":null,"sequential":false,"kind":"function","metadata":null,"timeout":null,"defer_loading":false,"unless_native":null,"with_native":null,"tool_kind":null,"return_schema":null,"include_return_schema":null}],"native_tools":[],"output_mode":"text","output_object":null,"output_tools":[],"prompted_output_template":null,"allow_text_output":true,"allow_image_output":false,"instruction_parts":[{"content":"You are a helpful assistant.","dynamic":true,"part_kind":"instruction"}],"thinking":null}', + 'gen_ai.agent.name': 'agent', + 'gen_ai.tool.definitions': '[{"type":"function","name":"noop","parameters":{"additionalProperties":false,"properties":{},"type":"object"}}]', + 'logfire.span_type': 'span', + 'logfire.msg': 'chat test', + 'logfire.variables.prompt__baggage_slug': '', + 'gen_ai.input.messages': '[{"role": "user", "parts": [{"type": "text", "content": "hello"}]}]', + 'gen_ai.output.messages': '[{"role": "assistant", "parts": [{"type": "tool_call", "id": "pyd_ai_tool_call_id__noop", "name": "noop", "arguments": {}}]}]', + 'gen_ai.system_instructions': '[{"type": "text", "content": "You are a helpful assistant."}]', + 'logfire.json_schema': '{"type": "object", "properties": {"gen_ai.input.messages": {"type": "array"}, "gen_ai.output.messages": {"type": "array"}, "gen_ai.system_instructions": {"type": "array"}, "model_request_parameters": {"type": "object"}}}', + 'gen_ai.usage.input_tokens': 51, + 'gen_ai.usage.output_tokens': 2, + 'gen_ai.response.model': 'test', + }, + }, + { + 'name': 'running tool', + 'attributes': { + 'gen_ai.operation.name': 'execute_tool', + 'gen_ai.tool.name': 'noop', + 'gen_ai.tool.call.id': 'pyd_ai_tool_call_id__noop', + 'tool_arguments': '{}', + 'gen_ai.agent.name': 'agent', + 'logfire.msg': 'running tool: noop', + 'logfire.json_schema': '{"type":"object","properties":{"tool_arguments":{"type":"object"},"tool_response":{"type":"object"},"gen_ai.tool.name":{},"gen_ai.tool.call.id":{}}}', + 'logfire.span_type': 'span', + 'logfire.variables.prompt__baggage_slug': '', + 'tool_response': 'ok', + }, + }, + { + 'name': 'chat test', + 'attributes': { + 'gen_ai.operation.name': 'chat', + 'gen_ai.provider.name': 'test', + 'gen_ai.system': 'test', + 'gen_ai.request.model': 'test', + 'model_request_parameters': '{"function_tools":[{"name":"noop","parameters_json_schema":{"additionalProperties":false,"properties":{},"type":"object"},"description":null,"outer_typed_dict_key":null,"strict":null,"sequential":false,"kind":"function","metadata":null,"timeout":null,"defer_loading":false,"unless_native":null,"with_native":null,"tool_kind":null,"return_schema":null,"include_return_schema":null}],"native_tools":[],"output_mode":"text","output_object":null,"output_tools":[],"prompted_output_template":null,"allow_text_output":true,"allow_image_output":false,"instruction_parts":[{"content":"You are a helpful assistant.","dynamic":true,"part_kind":"instruction"}],"thinking":null}', + 'gen_ai.agent.name': 'agent', + 'gen_ai.tool.definitions': '[{"type":"function","name":"noop","parameters":{"additionalProperties":false,"properties":{},"type":"object"}}]', + 'logfire.span_type': 'span', + 'logfire.msg': 'chat test', + 'logfire.variables.prompt__baggage_slug': '', + 'gen_ai.input.messages': '[{"role": "user", "parts": [{"type": "text", "content": "hello"}]}, {"role": "assistant", "parts": [{"type": "tool_call", "id": "pyd_ai_tool_call_id__noop", "name": "noop", "arguments": {}}]}, {"role": "user", "parts": [{"type": "tool_call_response", "id": "pyd_ai_tool_call_id__noop", "name": "noop", "result": "ok"}]}]', + 'gen_ai.output.messages': '[{"role": "assistant", "parts": [{"type": "text", "content": "{\\"noop\\":\\"ok\\"}"}]}]', + 'gen_ai.system_instructions': '[{"type": "text", "content": "You are a helpful assistant."}]', + 'logfire.json_schema': '{"type": "object", "properties": {"gen_ai.input.messages": {"type": "array"}, "gen_ai.output.messages": {"type": "array"}, "gen_ai.system_instructions": {"type": "array"}, "model_request_parameters": {"type": "object"}}}', + 'gen_ai.usage.input_tokens': 52, + 'gen_ai.usage.output_tokens': 6, + 'gen_ai.response.model': 'test', + }, + }, + { + 'name': 'agent run', + 'attributes': { + 'model_name': 'test', + 'agent_name': 'agent', + 'gen_ai.agent.name': 'agent', + 'gen_ai.operation.name': 'invoke_agent', + 'logfire.msg': 'agent run', + 'logfire.span_type': 'span', + 'logfire.variables.prompt__baggage_slug': '', + 'final_result': '{"noop":"ok"}', + 'gen_ai.usage.input_tokens': 103, + 'gen_ai.usage.output_tokens': 8, + 'pydantic_ai.all_messages': '[{"role":"user","parts":[{"type":"text","content":"hello"}]},{"role":"assistant","parts":[{"type":"tool_call","id":"pyd_ai_tool_call_id__noop","name":"noop","arguments":{}}]},{"role":"user","parts":[{"type":"tool_call_response","id":"pyd_ai_tool_call_id__noop","name":"noop","result":"ok"}]},{"role":"assistant","parts":[{"type":"text","content":"{\\"noop\\":\\"ok\\"}"}]}]', + 'gen_ai.system_instructions': '[{"type": "text", "content": "You are a helpful assistant."}]', + 'logfire.json_schema': '{"type":"object","properties":{"pydantic_ai.all_messages":{"type":"array"},"gen_ai.system_instructions":{"type":"array"},"final_result":{"type":"object"}}}', + 'logfire.metrics': '{"gen_ai.client.token.usage": {"details": [{"attributes": {"gen_ai.operation.name": "chat", "gen_ai.provider.name": "test", "gen_ai.request.model": "test", "gen_ai.response.model": "test", "gen_ai.system": "test", "gen_ai.token.type": "input"}, "total": 103}, {"attributes": {"gen_ai.operation.name": "chat", "gen_ai.provider.name": "test", "gen_ai.request.model": "test", "gen_ai.response.model": "test", "gen_ai.system": "test", "gen_ai.token.type": "output"}, "total": 8}], "total": 111}}', + }, + }, + ] + ) + + +async def test_resolved_once_per_run_across_multiple_model_requests() -> None: + capability = ManagedPrompt('once_slug', default=DEFAULT) + agent = Agent(TestModel(), capabilities=[capability]) + + @agent.tool_plain + def noop() -> str: + return 'ok' + + with patch.object(capability._variable, 'get', wraps=capability._variable.get) as spy: + result = await agent.run('hello') + + # TestModel issues one request to call the tool and another for the final output, + # so instructions render twice, but the variable is resolved exactly once. + assert len(instructions_seen(result.all_messages())) == 2 + assert spy.call_count == 1 + + +async def test_label_and_callable_targeting_and_attributes() -> None: + capability = ManagedPrompt( + 'targeting_slug', + default=DEFAULT, + label='production', + targeting_key=lambda ctx: f'run:{ctx.run_step}', + attributes=lambda ctx: {'tier': 'enterprise'}, + ) + agent = Agent(TestModel(), capabilities=[capability]) + + with patch.object(capability._variable, 'get', wraps=capability._variable.get) as spy: + await agent.run('hello') + + spy.assert_called_once_with( + targeting_key='run:0', + attributes={'tier': 'enterprise'}, + label='production', + ) + + +async def test_static_targeting_and_attributes() -> None: + capability = ManagedPrompt( + 'static_slug', + default=DEFAULT, + targeting_key='tenant-123', + attributes={'tier': 'free'}, + ) + agent = Agent(TestModel(), capabilities=[capability]) + + with patch.object(capability._variable, 'get', wraps=capability._variable.get) as spy: + await agent.run('hello') + + spy.assert_called_once_with( + targeting_key='tenant-123', + attributes={'tier': 'free'}, + label=None, + ) + + +def test_instructions_none_outside_run() -> None: + capability: ManagedPrompt[None] = ManagedPrompt('outside_slug', default=DEFAULT) + instructions = capability.get_instructions() + ctx = RunContext[None]( + deps=None, + model=TestModel(), + usage=RunUsage(), + prompt=None, + messages=[], + run_step=0, + ) + + # Outside of `wrap_run` nothing has been resolved, so no instructions are contributed. + assert capability.resolved is None + assert instructions(ctx) is None + + +async def test_render_template_fills_from_deps() -> None: + @dataclass + class Deps: + name: str + + capability: ManagedPrompt[Deps] = ManagedPrompt('render_slug', default='Hello {{name}}!', render_template=True) + agent = Agent(TestModel(), deps_type=Deps, capabilities=[capability]) + + result = await agent.run('hi', deps=Deps(name='Alice')) + + assert instructions_seen(result.all_messages()) == ['Hello Alice!'] + + +async def test_resolved_property_exposes_active_resolution() -> None: + capability = ManagedPrompt('exposed_slug', default=DEFAULT) + agent = Agent(TestModel(), capabilities=[capability]) + captured: list[str | None] = [] + + @agent.tool_plain + def grab() -> str: + # `resolved` exposes the full ResolvedVariable for the active run. + resolved = capability.resolved + captured.append(resolved.value if resolved is not None else None) + return 'ok' + + await agent.run('hello') + + assert captured == [DEFAULT] + # The resolution is cleared once the run completes. + assert capability.resolved is None + + +async def test_provider_backed_resolution_uses_remote_value_and_label(capfire: CaptureLogfire) -> None: + config = VariablesConfig( + variables={ + 'prompt__remote_slug': VariableConfig( + name='prompt__remote_slug', + labels={'production': LabeledValue(version=2, serialized_value='"You are the PRODUCTION prompt."')}, + rollout=Rollout(labels={'production': 1.0}), + overrides=[], + ) + } + ) + with _variables_provider_configured(capfire, config): + agent = Agent( + TestModel(), + capabilities=[ManagedPrompt('remote_slug', default='fallback', label='production'), Instrumentation()], + ) + + result = await agent.run('hello') + + # The remote value -- not the code default -- backs the instructions. + assert instructions_seen(result.all_messages()) == ['You are the PRODUCTION prompt.'] + + spans = capfire.exporter.exported_spans_as_dict() + resolution = next(s for s in spans if s['attributes'].get('logfire.msg') == 'Resolve variable prompt__remote_slug') + assert resolution['attributes']['reason'] == 'resolved' + assert resolution['attributes']['value'] == '"You are the PRODUCTION prompt."' + assert resolution['attributes']['label'] == 'production' + # Child spans are tagged with the resolved label via baggage. + tagged = {s['name'] for s in spans if s['attributes'].get('logfire.variables.prompt__remote_slug') == 'production'} + assert {'agent run', 'chat test'} <= tagged + + +def test_logfire_instance_with_prebuilt_variable_warns() -> None: + var = logfire.var(name='prompt__instance_conflict', type=str, default=DEFAULT) + with pytest.warns(UserWarning, match='is ignored when `name` is a `Variable`'): + ManagedPrompt(var, logfire_instance=logfire.DEFAULT_LOGFIRE_INSTANCE) diff --git a/uv.lock b/uv.lock index b979f7b..9a847a2 100644 --- a/uv.lock +++ b/uv.lock @@ -7,14 +7,6 @@ resolution-markers = [ "python_full_version < '3.13'", ] -[options] - -[options.exclude-newer-package] -pydantic-ai-slim = false -pydantic-graph = false -pydantic-ai = false -pydantic-evals = false - [[package]] name = "annotated-doc" version = "0.0.4" @@ -999,6 +991,10 @@ codemode = [ dbos = [ { name = "pydantic-ai-slim", extra = ["dbos"] }, ] +logfire = [ + { name = "logfire" }, + { name = "pydantic-ai-slim", extra = ["spec"] }, +] temporal = [ { name = "pydantic-ai-slim", extra = ["temporal"] }, ] @@ -1011,6 +1007,7 @@ dev = [ { name = "inline-snapshot" }, { name = "logfire", extra = ["httpx"] }, { name = "pydantic-ai-harness", extra = ["code-mode"] }, + { name = "pydantic-ai-slim", extra = ["spec"] }, { name = "pytest" }, { name = "pytest-anyio" }, { name = "pytest-examples" }, @@ -1022,13 +1019,15 @@ lint = [ [package.metadata] requires-dist = [ + { name = "logfire", marker = "extra == 'logfire'", specifier = ">=4.31.0" }, { name = "pydantic-ai-slim", specifier = ">=1.95.1" }, { name = "pydantic-ai-slim", extras = ["dbos"], marker = "extra == 'dbos'" }, + { name = "pydantic-ai-slim", extras = ["spec"], marker = "extra == 'logfire'", specifier = ">=1.95.1" }, { name = "pydantic-ai-slim", extras = ["temporal"], marker = "extra == 'temporal'" }, { name = "pydantic-monty", marker = "extra == 'code-mode'", specifier = ">=0.0.16" }, { name = "pydantic-monty", marker = "extra == 'codemode'", specifier = ">=0.0.16" }, ] -provides-extras = ["code-mode", "codemode", "dbos", "temporal"] +provides-extras = ["code-mode", "codemode", "dbos", "logfire", "temporal"] [package.metadata.requires-dev] dev = [ @@ -1038,6 +1037,7 @@ dev = [ { name = "inline-snapshot", specifier = ">=0.32.5" }, { name = "logfire", extras = ["httpx"], specifier = ">=4.31.0" }, { name = "pydantic-ai-harness", extras = ["code-mode"] }, + { name = "pydantic-ai-slim", extras = ["spec"], specifier = ">=1.95.1" }, { name = "pytest", specifier = ">=9.0.0" }, { name = "pytest-anyio" }, { name = "pytest-examples", specifier = ">=0.0.18" }, @@ -1070,6 +1070,10 @@ wheels = [ dbos = [ { name = "dbos" }, ] +spec = [ + { name = "pydantic-handlebars" }, + { name = "pyyaml" }, +] temporal = [ { name = "temporalio" }, ] @@ -1205,6 +1209,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d6/61/1f91e2797b7667c2ef70657fcb8b8a517890269a413a5cdc2d9a06dce4c7/pydantic_graph-1.95.1-py3-none-any.whl", hash = "sha256:612efc7e3458f12fbc44f7d484e166419883b3567e3005e48283899519423938", size = 73049, upload-time = "2026-05-13T18:58:11.728Z" }, ] +[[package]] +name = "pydantic-handlebars" +version = "0.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/73/e55a1fe1a8788a5fa82d9209e796f4111e28f2d2fecab7173aa6d80516ad/pydantic_handlebars-0.2.1.tar.gz", hash = "sha256:d4124cfbf7d6e3bded9331a08ccccf6f29f3e3a93665b35b5d6061650aeeb49f", size = 176949, upload-time = "2026-05-25T01:24:38.354Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/11/364bc401f1d8fdb3947079fc43ffdfbfc9132d065981a03a95d2e87440c4/pydantic_handlebars-0.2.1-py3-none-any.whl", hash = "sha256:c713427d6498cf4b66814447d54753a2748f8a8d3a9f00c194192ddb3df61e52", size = 50476, upload-time = "2026-05-25T01:24:37.104Z" }, +] + [[package]] name = "pydantic-monty" version = "0.0.17"