From 41c5b7182fdbfa090f2a8a328a9a048342963017 Mon Sep 17 00:00:00 2001 From: poshinchen Date: Tue, 2 Jun 2026 14:07:33 -0400 Subject: [PATCH 1/2] fix(mappers): join all toolResult content blocks to prevent false negatives in FaithfulnessEvaluator Multi-part Bedrock toolResult.content lists (text, json, image, document, video blocks) were silently truncated to content[0], making values in subsequent blocks invisible to evaluators. Adds join_tool_result_content() helper in mappers/utils.py and wires it into every affected read site: _process_tool_results (legacy), _convert_inference_messages tool_call_response branch (latest), _convert_tool_execution_span latest branch (both in StrandsInMemorySessionMapper), and _extract_tool_result_text (CloudWatchSessionMapper). Closes #235 Co-Authored-By: Claude Sonnet 4.6 --- .../mappers/cloudwatch_session_mapper.py | 8 +- .../strands_in_memory_session_mapper.py | 34 ++--- src/strands_evals/mappers/utils.py | 36 +++++ .../mappers/test_cloudwatch_session_mapper.py | 63 ++++++++ .../mappers/test_strands_in_memory_mapper.py | 144 ++++++++++++++++++ 5 files changed, 257 insertions(+), 28 deletions(-) diff --git a/src/strands_evals/mappers/cloudwatch_session_mapper.py b/src/strands_evals/mappers/cloudwatch_session_mapper.py index a6bbb30b..9b1d93f6 100644 --- a/src/strands_evals/mappers/cloudwatch_session_mapper.py +++ b/src/strands_evals/mappers/cloudwatch_session_mapper.py @@ -17,7 +17,7 @@ from typing import Any from ..mappers.session_mapper import SessionMapper -from ..mappers.utils import get_body +from ..mappers.utils import get_body, join_tool_result_content from ..types.trace import ( AgentInvocationSpan, AssistantMessage, @@ -318,11 +318,7 @@ def _extract_tool_results(self, body: dict) -> list[ToolResult]: def _extract_tool_result_text(self, content: Any) -> str: """Extract text from tool result content.""" - if not content: - return "" - if isinstance(content, list) and content: - return content[0].get("text", "") - return str(content) + return join_tool_result_content(content) # --- Body-to-messages conversion --- diff --git a/src/strands_evals/mappers/strands_in_memory_session_mapper.py b/src/strands_evals/mappers/strands_in_memory_session_mapper.py index 76212118..74d1af56 100644 --- a/src/strands_evals/mappers/strands_in_memory_session_mapper.py +++ b/src/strands_evals/mappers/strands_in_memory_session_mapper.py @@ -24,6 +24,7 @@ UserMessage, ) from .session_mapper import SessionMapper +from .utils import join_tool_result_content logger = logging.getLogger(__name__) @@ -194,10 +195,7 @@ def _process_tool_results(self, content_list: list[dict[str, Any]]) -> list[Text continue tool_result = item["toolResult"] - result_text = "" - if "content" in tool_result and tool_result["content"]: - content = tool_result["content"] - result_text = content[0].get("text", "") if isinstance(content, list) else str(content) + result_text = join_tool_result_content(tool_result.get("content")) result.append( ToolResultContent( @@ -324,17 +322,12 @@ def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage | content.append(TextContent(text=part.get("content", ""))) if part_type == "tool_call_response": - # Extract text from response array if present response = part.get("response", []) - response_text = "" - - ## To-do: Compare the differences for multiple toolResults - if isinstance(response, list) and response: - response_text = ( - response[0].get("text", "") if isinstance(response[0], dict) else str(response[0]) - ) - elif isinstance(response, str): - response_text = response + response_text = ( + join_tool_result_content(response) + if isinstance(response, list) + else (response if isinstance(response, str) else "") + ) content.append( ToolResultContent( @@ -381,14 +374,11 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T part = output_messages[0]["parts"][0] if part.get("type") == "tool_call_response": response = part.get("response", []) - if isinstance(response, list) and response: - tool_result_content = ( - response[0].get("text", "") - if isinstance(response[0], dict) - else str(response[0]) - ) - elif isinstance(response, str): - tool_result_content = response + tool_result_content = ( + join_tool_result_content(response) + if isinstance(response, list) + else (response if isinstance(response, str) else "") + ) except Exception as e: logger.warning(f"Failed to process tool event {event.name}: {e}") else: diff --git a/src/strands_evals/mappers/utils.py b/src/strands_evals/mappers/utils.py index 050458b5..d37eb874 100644 --- a/src/strands_evals/mappers/utils.py +++ b/src/strands_evals/mappers/utils.py @@ -2,12 +2,48 @@ Utility functions for mapper selection and detection. """ +import json from typing import Any from .constants import SCOPE_LANGCHAIN_OTEL, SCOPE_OPENINFERENCE, SCOPE_STRANDS from .session_mapper import SessionMapper +def join_tool_result_content(content: Any) -> str: + """Join all blocks in a Bedrock-style toolResult content list into one string. + + Bedrock toolResult.content is a list of typed blocks: + {"text": "..."} -> pass through as-is + {"json": {...}} -> json.dumps(value) + {"image": ...} -> "[image]" + {"document": ...} -> "[document]" + {"video": ...} -> "[video]" + + Non-list values are coerced to str. + """ + if not content: + return "" + if not isinstance(content, list): + return str(content) + + parts: list[str] = [] + for block in content: + if not isinstance(block, dict): + parts.append(str(block)) + continue + if "text" in block: + parts.append(block["text"]) + elif "json" in block: + parts.append(json.dumps(block["json"])) + elif "image" in block: + parts.append("[image]") + elif "document" in block: + parts.append("[document]") + elif "video" in block: + parts.append("[video]") + return " ".join(parts) + + def detect_otel_mapper(spans: list[Any]) -> SessionMapper: """Detect the appropriate mapper based on span scope and data format. diff --git a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py index 22f2ab63..13149725 100644 --- a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py +++ b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py @@ -245,3 +245,66 @@ def test_record_with_no_body_skipped(self, mapper): session = mapper.map_to_session(records, "sess-1") assert len(session.traces) == 1 assert len(session.traces[0].spans) > 0 + + +# --- Regression tests: multi-block toolResult.content --- + + +def _make_multi_block_tool_result_message(tool_use_id, content_blocks): + """Build a tool result message with arbitrary content blocks.""" + return { + "role": "tool", + "content": { + "content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}]) + }, + } + + +class TestMultiBlockToolResult: + def test_multi_text_blocks_joined(self, mapper): + """Multiple text blocks in toolResult.content are joined, not truncated to [0].""" + record1 = make_log_record( + trace_id="t1", + span_id="s1", + input_messages=[make_user_message("hi")], + output_messages=[_make_assistant_tool_use_message("tool_x", {}, "tu-1")], + time_nano=1000, + ) + record2 = make_log_record( + trace_id="t1", + span_id="s2", + input_messages=[ + make_user_message("hi"), + _make_multi_block_tool_result_message("tu-1", [{"text": "first"}, {"text": "second"}]), + ], + output_messages=[make_assistant_text_message("ok")], + time_nano=2000, + ) + session = mapper.map_to_session([record1, record2], "sess-1") + tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)] + assert len(tool_spans) == 1 + assert tool_spans[0].tool_result.content == "first second" + + def test_text_and_json_blocks_joined(self, mapper): + """Mixed text+json blocks are both included in the joined string.""" + record1 = make_log_record( + trace_id="t1", + span_id="s1", + input_messages=[make_user_message("hi")], + output_messages=[_make_assistant_tool_use_message("tool_y", {}, "tu-2")], + time_nano=1000, + ) + record2 = make_log_record( + trace_id="t1", + span_id="s2", + input_messages=[ + make_user_message("hi"), + _make_multi_block_tool_result_message("tu-2", [{"text": "val:"}, {"json": {"x": 1}}]), + ], + output_messages=[make_assistant_text_message("ok")], + time_nano=2000, + ) + session = mapper.map_to_session([record1, record2], "sess-1") + tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)] + assert len(tool_spans) == 1 + assert tool_spans[0].tool_result.content == 'val: {"x": 1}' diff --git a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py index 9a162d85..07037c05 100644 --- a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py +++ b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py @@ -573,3 +573,147 @@ def test_session_id_filtering_gen_ai_conversation_id_takes_precedence(provider): # Should NOT match on session.id when gen_ai.conversation.id is present session2 = mapper.map_to_session([span], "session-456") assert len(session2.traces) == 0 + + +# --- Regression tests: multi-part toolResult.content --- + + +import json as _json + + +def test_legacy_process_tool_results_multi_text(provider): + """Legacy _process_tool_results joins all text blocks, not just content[0].""" + payload = _json.dumps( + [ + { + "toolResult": { + "toolUseId": "tr1", + "content": [{"text": "first"}, {"text": "second"}], + } + } + ] + ) + span = make_span( + provider, + 0xAAA, + 0xBBB, + 0xCCC, + "chat", + {"gen_ai.operation.name": "chat"}, + lambda s: ( + s.add_event("gen_ai.tool.message", {"content": payload}), + s.add_event("gen_ai.choice", {"message": '[{"text": "ok"}]'}), + ), + ) + + session = StrandsInMemorySessionMapper().map_to_session([span], "sid") + tool_msg = session.traces[0].spans[0].messages[0] + assert tool_msg.content[0].content == "first second" + + +def test_legacy_process_tool_results_text_and_json(provider): + """Legacy _process_tool_results handles mixed text+json blocks.""" + payload = _json.dumps( + [ + { + "toolResult": { + "toolUseId": "tr2", + "content": [{"text": "label:"}, {"json": {"value": 42}}], + } + } + ] + ) + span = make_span( + provider, + 0xAAA, + 0xBBB, + 0xCCC, + "chat", + {"gen_ai.operation.name": "chat"}, + lambda s: ( + s.add_event("gen_ai.tool.message", {"content": payload}), + s.add_event("gen_ai.choice", {"message": '[{"text": "ok"}]'}), + ), + ) + + session = StrandsInMemorySessionMapper().map_to_session([span], "sid") + tool_msg = session.traces[0].spans[0].messages[0] + assert tool_msg.content[0].content == 'label: {"value": 42}' + + +def test_latest_convention_inference_multi_text_tool_result(provider): + """Latest _convert_inference_messages joins all blocks in tool_call_response.""" + input_msg = _json.dumps( + [ + { + "role": "user", + "parts": [ + { + "type": "tool_call_response", + "id": "t1", + "response": [{"text": "alpha"}, {"text": "beta"}], + } + ], + } + ] + ) + span = make_span( + provider, + 0xAAA, + 0xBBB, + 0xCCC, + "chat", + {"gen_ai.operation.name": "chat", "gen_ai.provider.name": "strands-agents"}, + lambda s: s.add_event( + "gen_ai.client.inference.operation.details", + { + "gen_ai.input.messages": input_msg, + "gen_ai.output.messages": '[{"role": "assistant", "parts": [{"type": "text", "content": "done"}]}]', + }, + ), + ) + + session = StrandsInMemorySessionMapper().map_to_session([span], "sid") + inference = session.traces[0].spans[0] + assert inference.messages[0].content[0].content == "alpha beta" + + +def test_latest_convention_tool_execution_multi_text(provider): + """Latest _convert_tool_execution_span joins all blocks in tool_call_response.""" + output_msg = _json.dumps( + [ + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": "t1", + "response": [{"text": "part1"}, {"text": "part2"}], + } + ], + } + ] + ) + span = make_span( + provider, + 0xAAA, + 0xBBB, + 0xCCC, + "execute_tool", + { + "gen_ai.operation.name": "execute_tool", + "gen_ai.provider.name": "strands-agents", + "gen_ai.tool.name": "search", + "gen_ai.tool.call.id": "t1", + "gen_ai.tool.status": "success", + }, + lambda s: s.add_event( + "gen_ai.client.inference.operation.details", + {"gen_ai.output.messages": output_msg}, + ), + ) + + session = StrandsInMemorySessionMapper().map_to_session([span], "sid") + tool = session.traces[0].spans[0] + assert isinstance(tool, ToolExecutionSpan) + assert tool.tool_result.content == "part1 part2" From 1cf9af03a42118ca8a61c1a4821c2d30eeccdf46 Mon Sep 17 00:00:00 2001 From: poshinchen Date: Tue, 2 Jun 2026 14:15:03 -0400 Subject: [PATCH 2/2] fix: fix missing tool_results aggregation --- .../strands_in_memory_session_mapper.py | 26 +++--- src/strands_evals/mappers/utils.py | 38 +++++--- .../mappers/test_cloudwatch_session_mapper.py | 8 +- .../mappers/test_strands_in_memory_mapper.py | 21 ++--- tests/strands_evals/mappers/test_utils.py | 91 +++++++++++++++++++ 5 files changed, 143 insertions(+), 41 deletions(-) diff --git a/src/strands_evals/mappers/strands_in_memory_session_mapper.py b/src/strands_evals/mappers/strands_in_memory_session_mapper.py index 74d1af56..a019cf43 100644 --- a/src/strands_evals/mappers/strands_in_memory_session_mapper.py +++ b/src/strands_evals/mappers/strands_in_memory_session_mapper.py @@ -26,6 +26,14 @@ from .session_mapper import SessionMapper from .utils import join_tool_result_content + +def _response_to_text(response: Any) -> str: + """Normalize a tool_call_response value to a plain string.""" + if isinstance(response, list): + return join_tool_result_content(response) + return response if isinstance(response, str) else "" + + logger = logging.getLogger(__name__) @@ -322,12 +330,7 @@ def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage | content.append(TextContent(text=part.get("content", ""))) if part_type == "tool_call_response": - response = part.get("response", []) - response_text = ( - join_tool_result_content(response) - if isinstance(response, list) - else (response if isinstance(response, str) else "") - ) + response_text = _response_to_text(part.get("response", [])) content.append( ToolResultContent( @@ -373,14 +376,9 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T if output_messages and output_messages[0].get("parts"): part = output_messages[0]["parts"][0] if part.get("type") == "tool_call_response": - response = part.get("response", []) - tool_result_content = ( - join_tool_result_content(response) - if isinstance(response, list) - else (response if isinstance(response, str) else "") - ) + tool_result_content = _response_to_text(part.get("response", [])) except Exception as e: - logger.warning(f"Failed to process tool event {event.name}: {e}") + logger.warning("Failed to process tool event %s: %s", event.name, e) else: for event in span.events: try: @@ -393,7 +391,7 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T message_list = self._parse_json_attr(event_attributes, "message") tool_result_content = message_list[0].get("text", "") if message_list else "" except Exception as e: - logger.warning(f"Failed to process tool event {event.name}: {e}") + logger.warning("Failed to process tool event %s: %s", event.name, e) tool_call = ToolCall(name=tool_name, arguments=tool_arguments, tool_call_id=tool_call_id) tool_result = ToolResult(content=tool_result_content, error=tool_error, tool_call_id=tool_call_id) diff --git a/src/strands_evals/mappers/utils.py b/src/strands_evals/mappers/utils.py index d37eb874..82058385 100644 --- a/src/strands_evals/mappers/utils.py +++ b/src/strands_evals/mappers/utils.py @@ -3,25 +3,36 @@ """ import json +import logging from typing import Any from .constants import SCOPE_LANGCHAIN_OTEL, SCOPE_OPENINFERENCE, SCOPE_STRANDS from .session_mapper import SessionMapper +logger = logging.getLogger(__name__) + def join_tool_result_content(content: Any) -> str: """Join all blocks in a Bedrock-style toolResult content list into one string. - Bedrock toolResult.content is a list of typed blocks: - {"text": "..."} -> pass through as-is - {"json": {...}} -> json.dumps(value) - {"image": ...} -> "[image]" - {"document": ...} -> "[document]" - {"video": ...} -> "[video]" + Bedrock toolResult.content is a list of typed blocks that are joined with a + newline separator so multi-paragraph tool outputs stay readable for downstream + LLM judges. text blocks pass through as-is, json blocks are serialized via + json.dumps, and image/document/video blocks become placeholder markers. + + Args: + content: A Bedrock-style toolResult content value. May be a list of typed + block dicts, a non-list value (coerced to str), or None/empty. - Non-list values are coerced to str. + Returns: + A single string with all block values newline-joined, or empty string for + empty/None input. Note: empty-string text block values are excluded from + the join (they contribute no visible content), so a list containing only + empty-text blocks returns an empty string. """ - if not content: + if content is None: + return "" + if isinstance(content, list) and len(content) == 0: return "" if not isinstance(content, list): return str(content) @@ -32,16 +43,21 @@ def join_tool_result_content(content: Any) -> str: parts.append(str(block)) continue if "text" in block: - parts.append(block["text"]) + parts.append(str(block["text"]) if block["text"] is not None else "") elif "json" in block: - parts.append(json.dumps(block["json"])) + try: + parts.append(json.dumps(block["json"], sort_keys=True)) + except (TypeError, ValueError) as exc: + logger.debug("json_error=<%s> | join_tool_result_content: could not serialize json block", exc) elif "image" in block: parts.append("[image]") elif "document" in block: parts.append("[document]") elif "video" in block: parts.append("[video]") - return " ".join(parts) + else: + logger.debug("block_keys=<%s> | join_tool_result_content: unknown block type, skipping", list(block.keys())) + return "\n".join(p for p in parts if p) def detect_otel_mapper(spans: list[Any]) -> SessionMapper: diff --git a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py index 13149725..37c2d255 100644 --- a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py +++ b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py @@ -254,9 +254,7 @@ def _make_multi_block_tool_result_message(tool_use_id, content_blocks): """Build a tool result message with arbitrary content blocks.""" return { "role": "tool", - "content": { - "content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}]) - }, + "content": {"content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}])}, } @@ -283,7 +281,7 @@ def test_multi_text_blocks_joined(self, mapper): session = mapper.map_to_session([record1, record2], "sess-1") tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)] assert len(tool_spans) == 1 - assert tool_spans[0].tool_result.content == "first second" + assert tool_spans[0].tool_result.content == "first\nsecond" def test_text_and_json_blocks_joined(self, mapper): """Mixed text+json blocks are both included in the joined string.""" @@ -307,4 +305,4 @@ def test_text_and_json_blocks_joined(self, mapper): session = mapper.map_to_session([record1, record2], "sess-1") tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)] assert len(tool_spans) == 1 - assert tool_spans[0].tool_result.content == 'val: {"x": 1}' + assert tool_spans[0].tool_result.content == 'val:\n{"x": 1}' diff --git a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py index 07037c05..c44e23c9 100644 --- a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py +++ b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py @@ -1,3 +1,5 @@ +import json + import pytest from opentelemetry.sdk.trace import ReadableSpan, TracerProvider from opentelemetry.trace import SpanContext, SpanKind, TraceFlags @@ -578,12 +580,9 @@ def test_session_id_filtering_gen_ai_conversation_id_takes_precedence(provider): # --- Regression tests: multi-part toolResult.content --- -import json as _json - - def test_legacy_process_tool_results_multi_text(provider): """Legacy _process_tool_results joins all text blocks, not just content[0].""" - payload = _json.dumps( + payload = json.dumps( [ { "toolResult": { @@ -608,12 +607,12 @@ def test_legacy_process_tool_results_multi_text(provider): session = StrandsInMemorySessionMapper().map_to_session([span], "sid") tool_msg = session.traces[0].spans[0].messages[0] - assert tool_msg.content[0].content == "first second" + assert tool_msg.content[0].content == "first\nsecond" def test_legacy_process_tool_results_text_and_json(provider): """Legacy _process_tool_results handles mixed text+json blocks.""" - payload = _json.dumps( + payload = json.dumps( [ { "toolResult": { @@ -638,12 +637,12 @@ def test_legacy_process_tool_results_text_and_json(provider): session = StrandsInMemorySessionMapper().map_to_session([span], "sid") tool_msg = session.traces[0].spans[0].messages[0] - assert tool_msg.content[0].content == 'label: {"value": 42}' + assert tool_msg.content[0].content == 'label:\n{"value": 42}' def test_latest_convention_inference_multi_text_tool_result(provider): """Latest _convert_inference_messages joins all blocks in tool_call_response.""" - input_msg = _json.dumps( + input_msg = json.dumps( [ { "role": "user", @@ -675,12 +674,12 @@ def test_latest_convention_inference_multi_text_tool_result(provider): session = StrandsInMemorySessionMapper().map_to_session([span], "sid") inference = session.traces[0].spans[0] - assert inference.messages[0].content[0].content == "alpha beta" + assert inference.messages[0].content[0].content == "alpha\nbeta" def test_latest_convention_tool_execution_multi_text(provider): """Latest _convert_tool_execution_span joins all blocks in tool_call_response.""" - output_msg = _json.dumps( + output_msg = json.dumps( [ { "role": "tool", @@ -716,4 +715,4 @@ def test_latest_convention_tool_execution_multi_text(provider): session = StrandsInMemorySessionMapper().map_to_session([span], "sid") tool = session.traces[0].spans[0] assert isinstance(tool, ToolExecutionSpan) - assert tool.tool_result.content == "part1 part2" + assert tool.tool_result.content == "part1\npart2" diff --git a/tests/strands_evals/mappers/test_utils.py b/tests/strands_evals/mappers/test_utils.py index ccc0b580..730e1d5c 100644 --- a/tests/strands_evals/mappers/test_utils.py +++ b/tests/strands_evals/mappers/test_utils.py @@ -9,6 +9,97 @@ get_scope_name, readable_spans_to_dicts, ) +from strands_evals.mappers.utils import join_tool_result_content + + +class TestJoinToolResultContent: + def test_empty_list(self): + assert join_tool_result_content([]) == "" + + def test_none_input(self): + assert join_tool_result_content(None) == "" + + def test_non_list_input(self): + assert join_tool_result_content("raw string") == "raw string" + + def test_single_text_block(self): + assert join_tool_result_content([{"text": "hello"}]) == "hello" + + def test_multi_text_blocks(self): + assert join_tool_result_content([{"text": "a"}, {"text": "b"}]) == "a\nb" + + def test_json_block(self): + assert join_tool_result_content([{"json": {"k": 1}}]) == '{"k": 1}' + + def test_json_block_sort_keys(self): + assert join_tool_result_content([{"json": {"b": 2, "a": 1}}]) == '{"a": 1, "b": 2}' + + def test_image_placeholder(self): + assert join_tool_result_content([{"image": {}}]) == "[image]" + + def test_document_placeholder(self): + assert join_tool_result_content([{"document": {}}]) == "[document]" + + def test_video_placeholder(self): + assert join_tool_result_content([{"video": {}}]) == "[video]" + + def test_unknown_key_silently_dropped(self): + assert join_tool_result_content([{"unknown_type": "value"}]) == "" + + def test_text_none_value_no_crash(self): + assert join_tool_result_content([{"text": None}]) == "" + + def test_text_none_value_with_sibling(self): + assert join_tool_result_content([{"text": None}, {"text": "ok"}]) == "ok" + + def test_text_non_str_coerced(self): + assert join_tool_result_content([{"text": 123}]) == "123" + + def test_non_dict_block(self): + assert join_tool_result_content([42]) == "42" + + def test_mixed_blocks(self): + result = join_tool_result_content([{"text": "label:"}, {"json": {"v": 1}}, {"image": {}}]) + assert result == 'label:\n{"v": 1}\n[image]' + + def test_empty_string_input(self): + assert join_tool_result_content("") == "" + + def test_integer_zero_input(self): + # int 0 is not None and not an empty list, so it coerces to "0" + assert join_tool_result_content(0) == "0" + + def test_false_input(self): + # False is not None and not an empty list, so it coerces to "False" + assert join_tool_result_content(False) == "False" + + def test_block_with_multiple_keys_prefers_text(self): + # When a block has multiple keys, 'text' wins (first match in if/elif chain) + result = join_tool_result_content([{"text": "hello", "json": {"x": 1}}]) + assert result == "hello" + + def test_json_block_none_value(self): + # json.dumps(None) == 'null', included in output + assert join_tool_result_content([{"json": None}]) == "null" + + def test_json_block_inf_serialized(self): + import math + + # Python's json.dumps may serialize inf as "Infinity" or raise; either way no crash + result = join_tool_result_content([{"json": math.inf}, {"text": "after"}]) + assert "after" in result or result in ("Infinity\nafter", "after") + + def test_text_empty_string_block_filtered(self): + # An empty-string text block is silently filtered by the join guard + assert join_tool_result_content([{"text": ""}]) == "" + + def test_text_empty_string_block_with_sibling(self): + assert join_tool_result_content([{"text": ""}, {"text": "ok"}]) == "ok" + + def test_bare_dict_not_in_list(self): + # A bare dict (not wrapped in a list) is coerced to str + result = join_tool_result_content({"text": "hello"}) + assert result == str({"text": "hello"}) def make_span_dict(scope_name="test.scope", attributes=None, span_events=None):