Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release Notes

## [unreleased]

Integrations:

* Capture Gemini `cache_read`, `thoughts` and `tool_use_prompt` tokens in `instrument_google_genai`; compute `operation.cost` via `genai-prices` when available.

## [v4.33.0] (2026-05-13)

CLI:
Expand Down
15 changes: 15 additions & 0 deletions docs/integrations/llms/google-genai.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,18 @@ This creates a span which shows the conversation in the Logfire UI:
to `true`, the spans will simply contain `<elided>` where the prompts and completions would be.

[`logfire.instrument_google_genai()`][logfire.Logfire.instrument_google_genai] uses the `GoogleGenAiSdkInstrumentor().instrument()` method of the [`opentelemetry-instrumentation-google-genai`](https://pypi.org/project/opentelemetry-instrumentation-google-genai/) package.

## Token usage details

When a span captures a Gemini call via `logfire.instrument_google_genai()`, the
following attributes may appear depending on the response:

- `gen_ai.usage.input_tokens` — total prompt tokens (already includes cached, see below)
- `gen_ai.usage.output_tokens` — completion tokens
- `gen_ai.usage.cache_read.input_tokens` — tokens served from context cache (cache hit)
- `gen_ai.usage.details.thoughts_tokens` — reasoning tokens (Gemini 2.5 / 3.x)
- `gen_ai.usage.details.tool_use_prompt_tokens` — tokens used for tool definitions
- `operation.cost` — calculated price in USD (requires [`genai-prices`](https://pypi.org/project/genai-prices/))

Note that, unlike Anthropic, the Gemini API's `prompt_token_count` already includes
the cached tokens; Logfire does not sum them again.
62 changes: 62 additions & 0 deletions logfire/_internal/integrations/google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,68 @@ def wrapped_to_dict(obj: object) -> object:
pass


try:
from opentelemetry.instrumentation.google_genai import generate_content as _gc_module

_Helper = _gc_module._GenerateContentInstrumentationHelper # pyright: ignore[reportPrivateUsage]
_original_maybe_update = _Helper._maybe_update_token_counts # pyright: ignore[reportPrivateUsage]
_original_create_final = _Helper.create_final_attributes

def _wrapped_maybe_update_token_counts(self: Any, response: Any) -> None:
_original_maybe_update(self, response)
try:
metadata = getattr(response, 'usage_metadata', None)
if metadata is None:
return
# "keep last non-zero" — streaming sends partial chunks; cached/thoughts/tool_use
# counts typically only appear in the final chunk.
if cached := getattr(metadata, 'cached_content_token_count', None):
self._lf_cache_read = cached
if thoughts := getattr(metadata, 'thoughts_token_count', None):
self._lf_thoughts = thoughts
if tool_use := getattr(metadata, 'tool_use_prompt_token_count', None):
self._lf_tool_use_prompt = tool_use
self._lf_response = response
except Exception: # pragma: no cover
pass

def _wrapped_create_final_attributes(self: Any) -> dict[str, Any]:
attrs = _original_create_final(self)
try:
if cached := getattr(self, '_lf_cache_read', None):
attrs['gen_ai.usage.cache_read.input_tokens'] = cached
if thoughts := getattr(self, '_lf_thoughts', None):
attrs['gen_ai.usage.details.thoughts_tokens'] = thoughts
if tool_use := getattr(self, '_lf_tool_use_prompt', None):
attrs['gen_ai.usage.details.tool_use_prompt_tokens'] = tool_use
response = getattr(self, '_lf_response', None)
if response is not None:
try:
from genai_prices import calc_price, extract_usage

# genai_prices expects the camelCase JSON keys ('usageMetadata', 'modelVersion');
# google-genai pydantic models use snake_case fields with camelCase aliases.
usage_data = extract_usage(response.model_dump(by_alias=True), provider_id='google')
if usage_data.model is not None:
attrs['operation.cost'] = float(
calc_price(
usage_data.usage,
model_ref=usage_data.model.id,
provider_id='google',
).total_price
)
except Exception:
pass
except Exception: # pragma: no cover
pass
return attrs

_Helper._maybe_update_token_counts = _wrapped_maybe_update_token_counts # pyright: ignore[reportPrivateUsage]
_Helper.create_final_attributes = _wrapped_create_final_attributes
except Exception: # pragma: no cover
pass


Part: TypeAlias = 'dict[str, Any] | str'


Expand Down
155 changes: 155 additions & 0 deletions tests/otel_integrations/test_google_genai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import warnings
from typing import Any
from unittest import mock
from unittest.mock import patch

Expand Down Expand Up @@ -108,6 +109,7 @@ def get_current_weather(location: str) -> str:
'gen_ai.usage.input_tokens': 58,
'gen_ai.usage.output_tokens': 9,
'gen_ai.response.finish_reasons': ('stop',),
'operation.cost': 9.4e-06,
'logfire.metrics': IsPartialDict(),
'events': [
{'content': 'help', 'role': 'system'},
Expand Down Expand Up @@ -200,6 +202,7 @@ def get_current_weather(location: str) -> str:
'gen_ai.usage.input_tokens': 39,
'gen_ai.usage.output_tokens': 7,
'gen_ai.response.finish_reasons': ('stop',),
'operation.cost': 6.7e-06,
'logfire.metrics': IsPartialDict(),
'events': [
{'content': '<elided>', 'role': 'user'},
Expand Down Expand Up @@ -258,6 +261,8 @@ class ResponseData(pydantic.BaseModel):
'gen_ai.usage.input_tokens': 2,
'gen_ai.usage.output_tokens': 13,
'gen_ai.response.finish_reasons': ('stop',),
'gen_ai.usage.details.thoughts_tokens': 58,
'operation.cost': 0.0001781,
'logfire.metrics': IsPartialDict(),
'events': [
{'content': 'Hi', 'role': 'user'},
Expand All @@ -278,6 +283,156 @@ class ResponseData(pydantic.BaseModel):
)


def _stub_generate_content(response: Any) -> Any:
def _generate(self: Any, **kwargs: Any) -> Any:
return response

return _generate


def _build_fake_genai_response(
*,
model_version: str = 'gemini-2.5-flash',
prompt_token_count: int = 1000,
candidates_token_count: int = 200,
cached_content_token_count: int | None = None,
thoughts_token_count: int | None = None,
tool_use_prompt_token_count: int | None = None,
):
from google.genai.types import (
Candidate,
Content,
FinishReason,
GenerateContentResponse,
GenerateContentResponseUsageMetadata,
Part,
)

return GenerateContentResponse(
model_version=model_version,
usage_metadata=GenerateContentResponseUsageMetadata(
prompt_token_count=prompt_token_count,
candidates_token_count=candidates_token_count,
cached_content_token_count=cached_content_token_count,
thoughts_token_count=thoughts_token_count,
tool_use_prompt_token_count=tool_use_prompt_token_count,
total_token_count=(prompt_token_count or 0) + (candidates_token_count or 0),
),
candidates=[
Candidate(
content=Content(parts=[Part.from_text(text='hi back')], role='model'),
finish_reason=FinishReason.STOP,
)
],
)


@pytest.fixture
def reset_google_genai_instrumentation():
"""Force re-instrumentation so monkeypatched `Models.generate_content` is captured.

The upstream `_MethodsSnapshot` captures `Models.generate_content` at instrument
time. The instrumentor is a process-wide singleton with an
`is_instrumented_by_opentelemetry` flag that gates re-instrumentation. We clear
the flag (the proper `uninstrument()` call asserts on a snapshot that the
upstream `__init__` resets to None on every `GoogleGenAiSdkInstrumentor()` call,
which makes it unreliable in a test suite) so the next `instrument()` call
re-creates the snapshot and picks up the mock.
"""
from opentelemetry.instrumentation.google_genai import GoogleGenAiSdkInstrumentor

instrumentor = GoogleGenAiSdkInstrumentor()
instrumentor._is_instrumented_by_opentelemetry = False # pyright: ignore[reportPrivateUsage]
yield
instrumentor._is_instrumented_by_opentelemetry = False # pyright: ignore[reportPrivateUsage]


def test_instrument_google_genai_cache_and_thinking_tokens(
exporter: TestExporter, monkeypatch: pytest.MonkeyPatch, reset_google_genai_instrumentation: None
) -> None:
from google.genai import Client
from google.genai.models import Models

fake_response = _build_fake_genai_response(
prompt_token_count=1000,
candidates_token_count=200,
cached_content_token_count=750,
thoughts_token_count=80,
tool_use_prompt_token_count=30,
)
monkeypatch.setattr(Models, 'generate_content', _stub_generate_content(fake_response))

logfire.instrument_google_genai()

client = Client(api_key='fake')
client.models.generate_content(model='gemini-2.5-flash', contents='hi') # type: ignore

[span] = exporter.exported_spans_as_dict(parse_json_attributes=True)
attrs = span['attributes']
assert attrs['gen_ai.usage.input_tokens'] == 1000
assert attrs['gen_ai.usage.output_tokens'] == 200
assert attrs['gen_ai.usage.cache_read.input_tokens'] == 750
assert attrs['gen_ai.usage.details.thoughts_tokens'] == 80
assert attrs['gen_ai.usage.details.tool_use_prompt_tokens'] == 30
# operation.cost depends on the current Gemini 2.5 Flash pricing table in
# genai_prices; just confirm it was computed and is a sensible positive value.
assert isinstance(attrs['operation.cost'], float)
assert attrs['operation.cost'] > 0


def test_instrument_google_genai_no_cache_metadata(
exporter: TestExporter, monkeypatch: pytest.MonkeyPatch, reset_google_genai_instrumentation: None
) -> None:
from google.genai import Client
from google.genai.models import Models

fake_response = _build_fake_genai_response(
prompt_token_count=58,
candidates_token_count=9,
)
monkeypatch.setattr(Models, 'generate_content', _stub_generate_content(fake_response))

logfire.instrument_google_genai()

client = Client(api_key='fake')
client.models.generate_content(model='gemini-2.5-flash', contents='hi') # type: ignore

[span] = exporter.exported_spans_as_dict(parse_json_attributes=True)
attrs = span['attributes']
assert 'gen_ai.usage.cache_read.input_tokens' not in attrs
assert 'gen_ai.usage.details.thoughts_tokens' not in attrs
assert 'gen_ai.usage.details.tool_use_prompt_tokens' not in attrs
assert attrs['gen_ai.usage.input_tokens'] == 58
assert attrs['gen_ai.usage.output_tokens'] == 9


def test_instrument_google_genai_cost_silent_failure(
exporter: TestExporter, monkeypatch: pytest.MonkeyPatch, reset_google_genai_instrumentation: None
) -> None:
from google.genai import Client
from google.genai.models import Models

fake_response = _build_fake_genai_response(
model_version='gemini-unknown-999',
prompt_token_count=1000,
candidates_token_count=200,
cached_content_token_count=750,
thoughts_token_count=80,
)
monkeypatch.setattr(Models, 'generate_content', _stub_generate_content(fake_response))

logfire.instrument_google_genai()

client = Client(api_key='fake')
client.models.generate_content(model='gemini-unknown-999', contents='hi') # type: ignore

[span] = exporter.exported_spans_as_dict(parse_json_attributes=True)
attrs = span['attributes']
assert 'operation.cost' not in attrs
assert attrs['gen_ai.usage.cache_read.input_tokens'] == 750
assert attrs['gen_ai.usage.details.thoughts_tokens'] == 80


def test_span_event_logger_with_none_parts(exporter: TestExporter) -> None:
"""Test that SpanEventLogger handles parts=None gracefully.

Expand Down
Loading