strands-agents · poshinchen · Jun 6, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/src/strands_evals/mappers/cloudwatch_session_mapper.py b/src/strands_evals/mappers/cloudwatch_session_mapper.py
@@ -17,7 +17,7 @@
 from typing import Any
 
 from ..mappers.session_mapper import SessionMapper
-from ..mappers.utils import get_body
+from ..mappers.utils import get_body, join_tool_result_content
 from ..types.trace import (
     AgentInvocationSpan,
     AssistantMessage,
@@ -318,11 +318,7 @@ def _extract_tool_results(self, body: dict) -> list[ToolResult]:
 
     def _extract_tool_result_text(self, content: Any) -> str:
         """Extract text from tool result content."""
-        if not content:
-            return ""
-        if isinstance(content, list) and content:
-            return content[0].get("text", "")
-        return str(content)
+        return join_tool_result_content(content)
 
     # --- Body-to-messages conversion ---
 

diff --git a/src/strands_evals/mappers/strands_in_memory_session_mapper.py b/src/strands_evals/mappers/strands_in_memory_session_mapper.py
@@ -24,6 +24,15 @@
     UserMessage,
 )
 from .session_mapper import SessionMapper
+from .utils import join_tool_result_content
+
+
+def _response_to_text(response: Any) -> str:
+    """Normalize a tool_call_response value to a plain string."""
+    if isinstance(response, list):
+        return join_tool_result_content(response)
+    return response if isinstance(response, str) else ""
+
 
 logger = logging.getLogger(__name__)
 
@@ -194,10 +203,7 @@ def _process_tool_results(self, content_list: list[dict[str, Any]]) -> list[Text
                 continue
 
             tool_result = item["toolResult"]
-            result_text = ""
-            if "content" in tool_result and tool_result["content"]:
-                content = tool_result["content"]
-                result_text = content[0].get("text", "") if isinstance(content, list) else str(content)
+            result_text = join_tool_result_content(tool_result.get("content"))
 
             result.append(
                 ToolResultContent(
@@ -324,17 +330,7 @@ def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage |
                     content.append(TextContent(text=part.get("content", "")))
 
                 if part_type == "tool_call_response":
-                    # Extract text from response array if present
-                    response = part.get("response", [])
-                    response_text = ""
-
-                    ## To-do: Compare the differences for multiple toolResults
-                    if isinstance(response, list) and response:
-                        response_text = (
-                            response[0].get("text", "") if isinstance(response[0], dict) else str(response[0])
-                        )
-                    elif isinstance(response, str):
-                        response_text = response
+                    response_text = _response_to_text(part.get("response", []))
 
                     content.append(
                         ToolResultContent(
@@ -380,17 +376,9 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T
                             if output_messages and output_messages[0].get("parts"):
                                 part = output_messages[0]["parts"][0]
                                 if part.get("type") == "tool_call_response":
-                                    response = part.get("response", [])
-                                    if isinstance(response, list) and response:
-                                        tool_result_content = (
-                                            response[0].get("text", "")
-                                            if isinstance(response[0], dict)
-                                            else str(response[0])
-                                        )
-                                    elif isinstance(response, str):
-                                        tool_result_content = response
+                                    tool_result_content = _response_to_text(part.get("response", []))
                 except Exception as e:
-                    logger.warning(f"Failed to process tool event {event.name}: {e}")
+                    logger.warning("Failed to process tool event %s: %s", event.name, e)
         else:
             for event in span.events:
                 try:
@@ -403,7 +391,7 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T
                         message_list = self._parse_json_attr(event_attributes, "message")
                         tool_result_content = message_list[0].get("text", "") if message_list else ""
                 except Exception as e:
-                    logger.warning(f"Failed to process tool event {event.name}: {e}")
+                    logger.warning("Failed to process tool event %s: %s", event.name, e)
 
         tool_call = ToolCall(name=tool_name, arguments=tool_arguments, tool_call_id=tool_call_id)
         tool_result = ToolResult(content=tool_result_content, error=tool_error, tool_call_id=tool_call_id)

diff --git a/src/strands_evals/mappers/utils.py b/src/strands_evals/mappers/utils.py
@@ -2,11 +2,63 @@
 Utility functions for mapper selection and detection.
 """
 
+import json
+import logging
 from typing import Any
 
 from .constants import SCOPE_LANGCHAIN_OTEL, SCOPE_OPENINFERENCE, SCOPE_STRANDS
 from .session_mapper import SessionMapper
 
+logger = logging.getLogger(__name__)
+
+
+def join_tool_result_content(content: Any) -> str:
+    """Join all blocks in a Bedrock-style toolResult content list into one string.
+
+    Bedrock toolResult.content is a list of typed blocks that are joined with a
+    newline separator so multi-paragraph tool outputs stay readable for downstream
+    LLM judges. text blocks pass through as-is, json blocks are serialized via
+    json.dumps, and image/document/video blocks become placeholder markers.
+
+    Args:
+        content: A Bedrock-style toolResult content value. May be a list of typed
+            block dicts, a non-list value (coerced to str), or None/empty.
+
+    Returns:
+        A single string with all block values newline-joined, or empty string for
+        empty/None input. Note: empty-string text block values are excluded from
+        the join (they contribute no visible content), so a list containing only
+        empty-text blocks returns an empty string.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, list) and len(content) == 0:
+        return ""
+    if not isinstance(content, list):
+        return str(content)
+
+    parts: list[str] = []
+    for block in content:
+        if not isinstance(block, dict):
+            parts.append(str(block))
+            continue
+        if "text" in block:
+            parts.append(str(block["text"]) if block["text"] is not None else "")
+        elif "json" in block:
+            try:
+                parts.append(json.dumps(block["json"], sort_keys=True))
+            except (TypeError, ValueError) as exc:
+                logger.debug("json_error=<%s> | join_tool_result_content: could not serialize json block", exc)
+        elif "image" in block:
+            parts.append("[image]")
+        elif "document" in block:
+            parts.append("[document]")
+        elif "video" in block:
+            parts.append("[video]")
+        else:
+            logger.debug("block_keys=<%s> | join_tool_result_content: unknown block type, skipping", list(block.keys()))
+    return "\n".join(p for p in parts if p)
+
 
 def detect_otel_mapper(spans: list[Any]) -> SessionMapper:
     """Detect the appropriate mapper based on span scope and data format.

diff --git a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
@@ -245,3 +245,64 @@ def test_record_with_no_body_skipped(self, mapper):
         session = mapper.map_to_session(records, "sess-1")
         assert len(session.traces) == 1
         assert len(session.traces[0].spans) > 0
+
+
+# --- Regression tests: multi-block toolResult.content ---
+
+
+def _make_multi_block_tool_result_message(tool_use_id, content_blocks):
+    """Build a tool result message with arbitrary content blocks."""
+    return {
+        "role": "tool",
+        "content": {"content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}])},
+    }
+
+
+class TestMultiBlockToolResult:
+    def test_multi_text_blocks_joined(self, mapper):
+        """Multiple text blocks in toolResult.content are joined, not truncated to [0]."""
+        record1 = make_log_record(
+            trace_id="t1",
+            span_id="s1",
+            input_messages=[make_user_message("hi")],
+            output_messages=[_make_assistant_tool_use_message("tool_x", {}, "tu-1")],
+            time_nano=1000,
+        )
+        record2 = make_log_record(
+            trace_id="t1",
+            span_id="s2",
+            input_messages=[
+                make_user_message("hi"),
+                _make_multi_block_tool_result_message("tu-1", [{"text": "first"}, {"text": "second"}]),
+            ],
+            output_messages=[make_assistant_text_message("ok")],
+            time_nano=2000,
+        )
+        session = mapper.map_to_session([record1, record2], "sess-1")
+        tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)]
+        assert len(tool_spans) == 1
+        assert tool_spans[0].tool_result.content == "first\nsecond"
+
+    def test_text_and_json_blocks_joined(self, mapper):
+        """Mixed text+json blocks are both included in the joined string."""
+        record1 = make_log_record(
+            trace_id="t1",
+            span_id="s1",
+            input_messages=[make_user_message("hi")],
+            output_messages=[_make_assistant_tool_use_message("tool_y", {}, "tu-2")],
+            time_nano=1000,
+        )
+        record2 = make_log_record(
+            trace_id="t1",
+            span_id="s2",
+            input_messages=[
+                make_user_message("hi"),
+                _make_multi_block_tool_result_message("tu-2", [{"text": "val:"}, {"json": {"x": 1}}]),
+            ],
+            output_messages=[make_assistant_text_message("ok")],
+            time_nano=2000,
+        )
+        session = mapper.map_to_session([record1, record2], "sess-1")
+        tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)]
+        assert len(tool_spans) == 1
+        assert tool_spans[0].tool_result.content == 'val:\n{"x": 1}'
diff --git a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
@@ -1,3 +1,5 @@
+import json
+
 import pytest
 from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
 from opentelemetry.trace import SpanContext, SpanKind, TraceFlags
@@ -573,3 +575,144 @@ def test_session_id_filtering_gen_ai_conversation_id_takes_precedence(provider):
     # Should NOT match on session.id when gen_ai.conversation.id is present
     session2 = mapper.map_to_session([span], "session-456")
     assert len(session2.traces) == 0
+
+
+# --- Regression tests: multi-part toolResult.content ---
+
+
+def test_legacy_process_tool_results_multi_text(provider):
+    """Legacy _process_tool_results joins all text blocks, not just content[0]."""
+    payload = json.dumps(
+        [
+            {
+                "toolResult": {
+                    "toolUseId": "tr1",
+                    "content": [{"text": "first"}, {"text": "second"}],
+                }
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "chat",
+        {"gen_ai.operation.name": "chat"},
+        lambda s: (
+            s.add_event("gen_ai.tool.message", {"content": payload}),
+            s.add_event("gen_ai.choice", {"message": '[{"text": "ok"}]'}),
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    tool_msg = session.traces[0].spans[0].messages[0]
+    assert tool_msg.content[0].content == "first\nsecond"
+
+
+def test_legacy_process_tool_results_text_and_json(provider):
+    """Legacy _process_tool_results handles mixed text+json blocks."""
+    payload = json.dumps(
+        [
+            {
+                "toolResult": {
+                    "toolUseId": "tr2",
+                    "content": [{"text": "label:"}, {"json": {"value": 42}}],
+                }
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "chat",
+        {"gen_ai.operation.name": "chat"},
+        lambda s: (
+            s.add_event("gen_ai.tool.message", {"content": payload}),
+            s.add_event("gen_ai.choice", {"message": '[{"text": "ok"}]'}),
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    tool_msg = session.traces[0].spans[0].messages[0]
+    assert tool_msg.content[0].content == 'label:\n{"value": 42}'
+
+
+def test_latest_convention_inference_multi_text_tool_result(provider):
+    """Latest _convert_inference_messages joins all blocks in tool_call_response."""
+    input_msg = json.dumps(
+        [
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "type": "tool_call_response",
+                        "id": "t1",
+                        "response": [{"text": "alpha"}, {"text": "beta"}],
+                    }
+                ],
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "chat",
+        {"gen_ai.operation.name": "chat", "gen_ai.provider.name": "strands-agents"},
+        lambda s: s.add_event(
+            "gen_ai.client.inference.operation.details",
+            {
+                "gen_ai.input.messages": input_msg,
+                "gen_ai.output.messages": '[{"role": "assistant", "parts": [{"type": "text", "content": "done"}]}]',
+            },
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    inference = session.traces[0].spans[0]
+    assert inference.messages[0].content[0].content == "alpha\nbeta"
+
+
+def test_latest_convention_tool_execution_multi_text(provider):
+    """Latest _convert_tool_execution_span joins all blocks in tool_call_response."""
+    output_msg = json.dumps(
+        [
+            {
+                "role": "tool",
+                "parts": [
+                    {
+                        "type": "tool_call_response",
+                        "id": "t1",
+                        "response": [{"text": "part1"}, {"text": "part2"}],
+                    }
+                ],
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "execute_tool",
+        {
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.provider.name": "strands-agents",
+            "gen_ai.tool.name": "search",
+            "gen_ai.tool.call.id": "t1",
+            "gen_ai.tool.status": "success",
+        },
+        lambda s: s.add_event(
+            "gen_ai.client.inference.operation.details",
+            {"gen_ai.output.messages": output_msg},
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    tool = session.traces[0].spans[0]
+    assert isinstance(tool, ToolExecutionSpan)
+    assert tool.tool_result.content == "part1\npart2"