Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions docs/models/google.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,49 @@ avg_logprobs = result.response.provider_details.get('avg_logprobs')

See the [Google Dev Blog](https://developers.googleblog.com/unlock-gemini-reasoning-with-logprobs-on-vertex-ai/) for more information.

### Context caching (`google_cached_content`)

When you've created a Gemini [cached content resource](https://ai.google.dev/gemini-api/docs/caching), pass its resource name through [`google_cached_content`][pydantic_ai.models.google.GoogleModelSettings.google_cached_content] to reuse it across requests:

```python
from pydantic_ai import Agent
from pydantic_ai.models.google import GoogleModel, GoogleModelSettings

model_settings = GoogleModelSettings(
google_cached_content='projects/p/locations/global/cachedContents/your-cache-id',
)

agent = Agent(GoogleModel('gemini-2.5-pro'), model_settings=model_settings)
...
```

!!! warning "Cached fields are owned by the cache resource"
The cache resource owns `system_instruction`, `tools`, and `tool_config` — Pydantic AI strips them from outgoing requests when `google_cached_content` is set, so agent instructions and registered tools are ignored on cached requests. A `UserWarning` is emitted whenever stripping drops a field, so the mismatch is discoverable.

??? example "Create a cached content resource"
Pydantic AI doesn't wrap the cache-management API — create the resource with the underlying [google-genai](https://googleapis.github.io/python-genai/) SDK, then pass its name through `google_cached_content`:

```python {test="skip"}
from google.genai.types import Content, CreateCachedContentConfig, Part

from pydantic_ai.providers.google import GoogleProvider

provider = GoogleProvider(api_key='your-api-key')

cache = provider.client.caches.create(
model='gemini-2.5-flash',
config=CreateCachedContentConfig(
system_instruction='You are a geography expert. Be concise.',
contents=[Content(role='user', parts=[Part(text='...long context to cache...')])],
ttl='3600s',
),
)
print(cache.name)
#> cachedContents/abc123...
```

Caches have a minimum size (≈1024 tokens for `gemini-2.5-flash`, ≈4096 for `gemini-2.5-pro`) and a TTL — see the [Gemini caching docs](https://ai.google.dev/gemini-api/docs/caching) for the current thresholds, pricing, and `list` / `update` / `delete` operations.

## Streaming cancellation

!!! warning "Cancellation limitations"
Expand Down
54 changes: 49 additions & 5 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,16 @@ class GoogleModelSettings(ModelSettings, total=False):
google_cached_content: str
"""The name of the cached content to use for the model.

When set, `system_instruction`, `tools`, and `tool_config` are omitted from
the outgoing request — the cached content resource owns those fields, and
both the Gemini API and Vertex AI reject requests that supply them
alongside `cached_content` (`400 INVALID_ARGUMENT`: "Tool config, tools and
system instruction should not be set in the request when using cached
content."). Any tools registered on the agent and any system prompt are
therefore ignored on requests that go through the cache; a `UserWarning`
is emitted whenever stripping actually drops a field so the mismatch is
discoverable.

See <https://ai.google.dev/gemini-api/docs/caching> for more information.
"""

Expand Down Expand Up @@ -325,6 +335,31 @@ def _get_deprecated_google_service_tier(model_settings: GoogleModelSettings) ->
return None


def _warn_on_cached_content_strips(
cached_content: str | None,
system_instruction: ContentDict | None,
tools: list[ToolDict] | None,
) -> None:
"""Emit a `UserWarning` when `google_cached_content` would strip a field that the caller populated."""
if not cached_content:
return
dropped: list[str] = []
if system_instruction is not None:
dropped.append('system_instruction')
if tools is not None:
dropped.extend(('tools', 'tool_config'))
if dropped:
names = ', '.join(f'`{n}`' for n in dropped)
warnings.warn(
f'`google_cached_content` is set; the cached content resource owns '
f'{names}, so these fields are stripped from the outgoing request '
f'and any agent instructions or registered tools are ignored. '
f'See https://ai.google.dev/gemini-api/docs/caching.',
Comment thread
dsfaccini marked this conversation as resolved.
UserWarning,
stacklevel=3,
)


def _get_deprecated_google_vertex_service_tier(model_settings: GoogleModelSettings) -> GoogleCloudServiceTier | None:
"""Return `google_vertex_service_tier`, emitting a `PydanticAIDeprecationWarning` when it is set.

Expand Down Expand Up @@ -844,6 +879,12 @@ async def _build_content_and_config(
if model_request_parameters.function_tools and not self.profile.supports_tools:
raise UserError('Tools are not supported by this model.')

# `google_cached_content` will strip `tools` (and `tool_config` / `system_instruction`)
# below — resolve it up front so `prompted` output-mode sees the post-strip tool set
# and still enables JSON mode when the cache effectively leaves the request tool-less.
cached_content = model_settings.get('google_cached_content')
effective_tools = None if cached_content else tools

response_mime_type = None
response_schema = None
if model_request_parameters.output_mode == 'native':
Expand All @@ -855,7 +896,7 @@ async def _build_content_and_config(
output_object = model_request_parameters.output_object
assert output_object is not None
response_schema = self._map_response_schema(output_object)
elif model_request_parameters.output_mode == 'prompted' and not tools:
elif model_request_parameters.output_mode == 'prompted' and not effective_tools:
if not self.profile.supports_json_object_output:
raise UserError('JSON output is not supported by this model.')
response_mime_type = 'application/json'
Expand Down Expand Up @@ -884,9 +925,12 @@ async def _build_content_and_config(
else:
raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout')

# See `GoogleModelSettings.google_cached_content` for why these three fields are stripped.
_warn_on_cached_content_strips(cached_content, system_instruction, tools)

config = GenerateContentConfigDict(
http_options=http_options,
system_instruction=system_instruction,
system_instruction=None if cached_content else system_instruction,
temperature=model_settings.get('temperature'),
top_p=model_settings.get('top_p'),
top_k=model_settings.get('top_k'),
Expand All @@ -899,9 +943,9 @@ async def _build_content_and_config(
thinking_config=self._translate_thinking(model_settings, model_request_parameters),
labels=model_settings.get('google_labels'),
media_resolution=model_settings.get('google_video_resolution'),
cached_content=model_settings.get('google_cached_content'),
tools=cast(ToolListUnionDict, tools),
tool_config=tool_config,
cached_content=cached_content,
tools=cast(ToolListUnionDict, effective_tools) if effective_tools is not None else None,
tool_config=None if cached_content else tool_config,
Comment thread
dsfaccini marked this conversation as resolved.
response_mime_type=response_mime_type,
response_json_schema=response_schema,
response_modalities=modalities,
Expand Down
Loading
Loading