pydantic · dsfaccini · Jun 2, 2026 · May 27, 2026 · May 28, 2026 · May 28, 2026
diff --git a/docs/models/google.md b/docs/models/google.md
@@ -411,6 +411,49 @@ avg_logprobs = result.response.provider_details.get('avg_logprobs')
 
 See the [Google Dev Blog](https://developers.googleblog.com/unlock-gemini-reasoning-with-logprobs-on-vertex-ai/) for more information.
 
+### Context caching (`google_cached_content`)
+
+When you've created a Gemini [cached content resource](https://ai.google.dev/gemini-api/docs/caching), pass its resource name through [`google_cached_content`][pydantic_ai.models.google.GoogleModelSettings.google_cached_content] to reuse it across requests:
+
+```python
+from pydantic_ai import Agent
+from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
+
+model_settings = GoogleModelSettings(
+    google_cached_content='projects/p/locations/global/cachedContents/your-cache-id',
+)
+
+agent = Agent(GoogleModel('gemini-2.5-pro'), model_settings=model_settings)
+...
+```
+
+!!! warning "Cached fields are owned by the cache resource"
+    The cache resource owns `system_instruction`, `tools`, and `tool_config` — Pydantic AI strips them from outgoing requests when `google_cached_content` is set, so agent instructions and registered tools are ignored on cached requests. A `UserWarning` is emitted whenever stripping drops a field, so the mismatch is discoverable.
+
+??? example "Create a cached content resource"
+    Pydantic AI doesn't wrap the cache-management API — create the resource with the underlying [google-genai](https://googleapis.github.io/python-genai/) SDK, then pass its name through `google_cached_content`:
+
+    ```python {test="skip"}
+    from google.genai.types import Content, CreateCachedContentConfig, Part
+
+    from pydantic_ai.providers.google import GoogleProvider
+
+    provider = GoogleProvider(api_key='your-api-key')
+
+    cache = provider.client.caches.create(
+        model='gemini-2.5-flash',
+        config=CreateCachedContentConfig(
+            system_instruction='You are a geography expert. Be concise.',
+            contents=[Content(role='user', parts=[Part(text='...long context to cache...')])],
+            ttl='3600s',
+        ),
+    )
+    print(cache.name)
+    #> cachedContents/abc123...
+    ```
+
+    Caches have a minimum size (≈1024 tokens for `gemini-2.5-flash`, ≈4096 for `gemini-2.5-pro`) and a TTL — see the [Gemini caching docs](https://ai.google.dev/gemini-api/docs/caching) for the current thresholds, pricing, and `list` / `update` / `delete` operations.
+
 ## Streaming cancellation
 
 !!! warning "Cancellation limitations"

diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -270,6 +270,16 @@ class GoogleModelSettings(ModelSettings, total=False):
     google_cached_content: str
     """The name of the cached content to use for the model.
 
+    When set, `system_instruction`, `tools`, and `tool_config` are omitted from
+    the outgoing request — the cached content resource owns those fields, and
+    both the Gemini API and Vertex AI reject requests that supply them
+    alongside `cached_content` (`400 INVALID_ARGUMENT`: "Tool config, tools and
+    system instruction should not be set in the request when using cached
+    content."). Any tools registered on the agent and any system prompt are
+    therefore ignored on requests that go through the cache; a `UserWarning`
+    is emitted whenever stripping actually drops a field so the mismatch is
+    discoverable.
+
     See <https://ai.google.dev/gemini-api/docs/caching> for more information.
     """
 
@@ -325,6 +335,31 @@ def _get_deprecated_google_service_tier(model_settings: GoogleModelSettings) ->
     return None
 
 
+def _warn_on_cached_content_strips(
+    cached_content: str | None,
+    system_instruction: ContentDict | None,
+    tools: list[ToolDict] | None,
+) -> None:
+    """Emit a `UserWarning` when `google_cached_content` would strip a field that the caller populated."""
+    if not cached_content:
+        return
+    dropped: list[str] = []
+    if system_instruction is not None:
+        dropped.append('system_instruction')
+    if tools is not None:
+        dropped.extend(('tools', 'tool_config'))
+    if dropped:
+        names = ', '.join(f'`{n}`' for n in dropped)
+        warnings.warn(
+            f'`google_cached_content` is set; the cached content resource owns '
+            f'{names}, so these fields are stripped from the outgoing request '
+            f'and any agent instructions or registered tools are ignored. '
+            f'See https://ai.google.dev/gemini-api/docs/caching.',
+            UserWarning,
+            stacklevel=3,
+        )
+
+
 def _get_deprecated_google_vertex_service_tier(model_settings: GoogleModelSettings) -> GoogleCloudServiceTier | None:
     """Return `google_vertex_service_tier`, emitting a `PydanticAIDeprecationWarning` when it is set.
 
@@ -844,6 +879,12 @@ async def _build_content_and_config(
         if model_request_parameters.function_tools and not self.profile.supports_tools:
             raise UserError('Tools are not supported by this model.')
 
+        # `google_cached_content` will strip `tools` (and `tool_config` / `system_instruction`)
+        # below — resolve it up front so `prompted` output-mode sees the post-strip tool set
+        # and still enables JSON mode when the cache effectively leaves the request tool-less.
+        cached_content = model_settings.get('google_cached_content')
+        effective_tools = None if cached_content else tools
+
         response_mime_type = None
         response_schema = None
         if model_request_parameters.output_mode == 'native':
@@ -855,7 +896,7 @@ async def _build_content_and_config(
             output_object = model_request_parameters.output_object
             assert output_object is not None
             response_schema = self._map_response_schema(output_object)
-        elif model_request_parameters.output_mode == 'prompted' and not tools:
+        elif model_request_parameters.output_mode == 'prompted' and not effective_tools:
             if not self.profile.supports_json_object_output:
                 raise UserError('JSON output is not supported by this model.')
             response_mime_type = 'application/json'
@@ -884,9 +925,12 @@ async def _build_content_and_config(
             else:
                 raise UserError('Google does not support setting ModelSettings.timeout to a httpx.Timeout')
 
+        # See `GoogleModelSettings.google_cached_content` for why these three fields are stripped.
+        _warn_on_cached_content_strips(cached_content, system_instruction, tools)
+
         config = GenerateContentConfigDict(
             http_options=http_options,
-            system_instruction=system_instruction,
+            system_instruction=None if cached_content else system_instruction,
             temperature=model_settings.get('temperature'),
             top_p=model_settings.get('top_p'),
             top_k=model_settings.get('top_k'),
@@ -899,9 +943,9 @@ async def _build_content_and_config(
             thinking_config=self._translate_thinking(model_settings, model_request_parameters),
             labels=model_settings.get('google_labels'),
             media_resolution=model_settings.get('google_video_resolution'),
-            cached_content=model_settings.get('google_cached_content'),
-            tools=cast(ToolListUnionDict, tools),
-            tool_config=tool_config,
+            cached_content=cached_content,
+            tools=cast(ToolListUnionDict, effective_tools) if effective_tools is not None else None,
+            tool_config=None if cached_content else tool_config,
             response_mime_type=response_mime_type,
             response_json_schema=response_schema,
             response_modalities=modalities,