From 41c5b7182fdbfa090f2a8a328a9a048342963017 Mon Sep 17 00:00:00 2001
From: poshinchen <pschen@amazon.com>
Date: Tue, 2 Jun 2026 14:07:33 -0400
Subject: [PATCH 1/2] fix(mappers): join all toolResult content blocks to
 prevent false negatives in FaithfulnessEvaluator

Multi-part Bedrock toolResult.content lists (text, json, image, document, video blocks) were
silently truncated to content[0], making values in subsequent blocks invisible to evaluators.

Adds join_tool_result_content() helper in mappers/utils.py and wires it into every affected
read site: _process_tool_results (legacy), _convert_inference_messages tool_call_response
branch (latest), _convert_tool_execution_span latest branch (both in
StrandsInMemorySessionMapper), and _extract_tool_result_text (CloudWatchSessionMapper).

Closes #235

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../mappers/cloudwatch_session_mapper.py      |   8 +-
 .../strands_in_memory_session_mapper.py       |  34 ++---
 src/strands_evals/mappers/utils.py            |  36 +++++
 .../mappers/test_cloudwatch_session_mapper.py |  63 ++++++++
 .../mappers/test_strands_in_memory_mapper.py  | 144 ++++++++++++++++++
 5 files changed, 257 insertions(+), 28 deletions(-)

diff --git a/src/strands_evals/mappers/cloudwatch_session_mapper.py b/src/strands_evals/mappers/cloudwatch_session_mapper.py
index a6bbb30b..9b1d93f6 100644
--- a/src/strands_evals/mappers/cloudwatch_session_mapper.py
+++ b/src/strands_evals/mappers/cloudwatch_session_mapper.py
@@ -17,7 +17,7 @@
 from typing import Any
 
 from ..mappers.session_mapper import SessionMapper
-from ..mappers.utils import get_body
+from ..mappers.utils import get_body, join_tool_result_content
 from ..types.trace import (
     AgentInvocationSpan,
     AssistantMessage,
@@ -318,11 +318,7 @@ def _extract_tool_results(self, body: dict) -> list[ToolResult]:
 
     def _extract_tool_result_text(self, content: Any) -> str:
         """Extract text from tool result content."""
-        if not content:
-            return ""
-        if isinstance(content, list) and content:
-            return content[0].get("text", "")
-        return str(content)
+        return join_tool_result_content(content)
 
     # --- Body-to-messages conversion ---
 
diff --git a/src/strands_evals/mappers/strands_in_memory_session_mapper.py b/src/strands_evals/mappers/strands_in_memory_session_mapper.py
index 76212118..74d1af56 100644
--- a/src/strands_evals/mappers/strands_in_memory_session_mapper.py
+++ b/src/strands_evals/mappers/strands_in_memory_session_mapper.py
@@ -24,6 +24,7 @@
     UserMessage,
 )
 from .session_mapper import SessionMapper
+from .utils import join_tool_result_content
 
 logger = logging.getLogger(__name__)
 
@@ -194,10 +195,7 @@ def _process_tool_results(self, content_list: list[dict[str, Any]]) -> list[Text
                 continue
 
             tool_result = item["toolResult"]
-            result_text = ""
-            if "content" in tool_result and tool_result["content"]:
-                content = tool_result["content"]
-                result_text = content[0].get("text", "") if isinstance(content, list) else str(content)
+            result_text = join_tool_result_content(tool_result.get("content"))
 
             result.append(
                 ToolResultContent(
@@ -324,17 +322,12 @@ def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage |
                     content.append(TextContent(text=part.get("content", "")))
 
                 if part_type == "tool_call_response":
-                    # Extract text from response array if present
                     response = part.get("response", [])
-                    response_text = ""
-
-                    ## To-do: Compare the differences for multiple toolResults
-                    if isinstance(response, list) and response:
-                        response_text = (
-                            response[0].get("text", "") if isinstance(response[0], dict) else str(response[0])
-                        )
-                    elif isinstance(response, str):
-                        response_text = response
+                    response_text = (
+                        join_tool_result_content(response)
+                        if isinstance(response, list)
+                        else (response if isinstance(response, str) else "")
+                    )
 
                     content.append(
                         ToolResultContent(
@@ -381,14 +374,11 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T
                                 part = output_messages[0]["parts"][0]
                                 if part.get("type") == "tool_call_response":
                                     response = part.get("response", [])
-                                    if isinstance(response, list) and response:
-                                        tool_result_content = (
-                                            response[0].get("text", "")
-                                            if isinstance(response[0], dict)
-                                            else str(response[0])
-                                        )
-                                    elif isinstance(response, str):
-                                        tool_result_content = response
+                                    tool_result_content = (
+                                        join_tool_result_content(response)
+                                        if isinstance(response, list)
+                                        else (response if isinstance(response, str) else "")
+                                    )
                 except Exception as e:
                     logger.warning(f"Failed to process tool event {event.name}: {e}")
         else:
diff --git a/src/strands_evals/mappers/utils.py b/src/strands_evals/mappers/utils.py
index 050458b5..d37eb874 100644
--- a/src/strands_evals/mappers/utils.py
+++ b/src/strands_evals/mappers/utils.py
@@ -2,12 +2,48 @@
 Utility functions for mapper selection and detection.
 """
 
+import json
 from typing import Any
 
 from .constants import SCOPE_LANGCHAIN_OTEL, SCOPE_OPENINFERENCE, SCOPE_STRANDS
 from .session_mapper import SessionMapper
 
 
+def join_tool_result_content(content: Any) -> str:
+    """Join all blocks in a Bedrock-style toolResult content list into one string.
+
+    Bedrock toolResult.content is a list of typed blocks:
+      {"text": "..."}           -> pass through as-is
+      {"json": {...}}           -> json.dumps(value)
+      {"image": ...}            -> "[image]"
+      {"document": ...}         -> "[document]"
+      {"video": ...}            -> "[video]"
+
+    Non-list values are coerced to str.
+    """
+    if not content:
+        return ""
+    if not isinstance(content, list):
+        return str(content)
+
+    parts: list[str] = []
+    for block in content:
+        if not isinstance(block, dict):
+            parts.append(str(block))
+            continue
+        if "text" in block:
+            parts.append(block["text"])
+        elif "json" in block:
+            parts.append(json.dumps(block["json"]))
+        elif "image" in block:
+            parts.append("[image]")
+        elif "document" in block:
+            parts.append("[document]")
+        elif "video" in block:
+            parts.append("[video]")
+    return " ".join(parts)
+
+
 def detect_otel_mapper(spans: list[Any]) -> SessionMapper:
     """Detect the appropriate mapper based on span scope and data format.
 
diff --git a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
index 22f2ab63..13149725 100644
--- a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
+++ b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
@@ -245,3 +245,66 @@ def test_record_with_no_body_skipped(self, mapper):
         session = mapper.map_to_session(records, "sess-1")
         assert len(session.traces) == 1
         assert len(session.traces[0].spans) > 0
+
+
+# --- Regression tests: multi-block toolResult.content ---
+
+
+def _make_multi_block_tool_result_message(tool_use_id, content_blocks):
+    """Build a tool result message with arbitrary content blocks."""
+    return {
+        "role": "tool",
+        "content": {
+            "content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}])
+        },
+    }
+
+
+class TestMultiBlockToolResult:
+    def test_multi_text_blocks_joined(self, mapper):
+        """Multiple text blocks in toolResult.content are joined, not truncated to [0]."""
+        record1 = make_log_record(
+            trace_id="t1",
+            span_id="s1",
+            input_messages=[make_user_message("hi")],
+            output_messages=[_make_assistant_tool_use_message("tool_x", {}, "tu-1")],
+            time_nano=1000,
+        )
+        record2 = make_log_record(
+            trace_id="t1",
+            span_id="s2",
+            input_messages=[
+                make_user_message("hi"),
+                _make_multi_block_tool_result_message("tu-1", [{"text": "first"}, {"text": "second"}]),
+            ],
+            output_messages=[make_assistant_text_message("ok")],
+            time_nano=2000,
+        )
+        session = mapper.map_to_session([record1, record2], "sess-1")
+        tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)]
+        assert len(tool_spans) == 1
+        assert tool_spans[0].tool_result.content == "first second"
+
+    def test_text_and_json_blocks_joined(self, mapper):
+        """Mixed text+json blocks are both included in the joined string."""
+        record1 = make_log_record(
+            trace_id="t1",
+            span_id="s1",
+            input_messages=[make_user_message("hi")],
+            output_messages=[_make_assistant_tool_use_message("tool_y", {}, "tu-2")],
+            time_nano=1000,
+        )
+        record2 = make_log_record(
+            trace_id="t1",
+            span_id="s2",
+            input_messages=[
+                make_user_message("hi"),
+                _make_multi_block_tool_result_message("tu-2", [{"text": "val:"}, {"json": {"x": 1}}]),
+            ],
+            output_messages=[make_assistant_text_message("ok")],
+            time_nano=2000,
+        )
+        session = mapper.map_to_session([record1, record2], "sess-1")
+        tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)]
+        assert len(tool_spans) == 1
+        assert tool_spans[0].tool_result.content == 'val: {"x": 1}'
diff --git a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
index 9a162d85..07037c05 100644
--- a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
+++ b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
@@ -573,3 +573,147 @@ def test_session_id_filtering_gen_ai_conversation_id_takes_precedence(provider):
     # Should NOT match on session.id when gen_ai.conversation.id is present
     session2 = mapper.map_to_session([span], "session-456")
     assert len(session2.traces) == 0
+
+
+# --- Regression tests: multi-part toolResult.content ---
+
+
+import json as _json
+
+
+def test_legacy_process_tool_results_multi_text(provider):
+    """Legacy _process_tool_results joins all text blocks, not just content[0]."""
+    payload = _json.dumps(
+        [
+            {
+                "toolResult": {
+                    "toolUseId": "tr1",
+                    "content": [{"text": "first"}, {"text": "second"}],
+                }
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "chat",
+        {"gen_ai.operation.name": "chat"},
+        lambda s: (
+            s.add_event("gen_ai.tool.message", {"content": payload}),
+            s.add_event("gen_ai.choice", {"message": '[{"text": "ok"}]'}),
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    tool_msg = session.traces[0].spans[0].messages[0]
+    assert tool_msg.content[0].content == "first second"
+
+
+def test_legacy_process_tool_results_text_and_json(provider):
+    """Legacy _process_tool_results handles mixed text+json blocks."""
+    payload = _json.dumps(
+        [
+            {
+                "toolResult": {
+                    "toolUseId": "tr2",
+                    "content": [{"text": "label:"}, {"json": {"value": 42}}],
+                }
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "chat",
+        {"gen_ai.operation.name": "chat"},
+        lambda s: (
+            s.add_event("gen_ai.tool.message", {"content": payload}),
+            s.add_event("gen_ai.choice", {"message": '[{"text": "ok"}]'}),
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    tool_msg = session.traces[0].spans[0].messages[0]
+    assert tool_msg.content[0].content == 'label: {"value": 42}'
+
+
+def test_latest_convention_inference_multi_text_tool_result(provider):
+    """Latest _convert_inference_messages joins all blocks in tool_call_response."""
+    input_msg = _json.dumps(
+        [
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "type": "tool_call_response",
+                        "id": "t1",
+                        "response": [{"text": "alpha"}, {"text": "beta"}],
+                    }
+                ],
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "chat",
+        {"gen_ai.operation.name": "chat", "gen_ai.provider.name": "strands-agents"},
+        lambda s: s.add_event(
+            "gen_ai.client.inference.operation.details",
+            {
+                "gen_ai.input.messages": input_msg,
+                "gen_ai.output.messages": '[{"role": "assistant", "parts": [{"type": "text", "content": "done"}]}]',
+            },
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    inference = session.traces[0].spans[0]
+    assert inference.messages[0].content[0].content == "alpha beta"
+
+
+def test_latest_convention_tool_execution_multi_text(provider):
+    """Latest _convert_tool_execution_span joins all blocks in tool_call_response."""
+    output_msg = _json.dumps(
+        [
+            {
+                "role": "tool",
+                "parts": [
+                    {
+                        "type": "tool_call_response",
+                        "id": "t1",
+                        "response": [{"text": "part1"}, {"text": "part2"}],
+                    }
+                ],
+            }
+        ]
+    )
+    span = make_span(
+        provider,
+        0xAAA,
+        0xBBB,
+        0xCCC,
+        "execute_tool",
+        {
+            "gen_ai.operation.name": "execute_tool",
+            "gen_ai.provider.name": "strands-agents",
+            "gen_ai.tool.name": "search",
+            "gen_ai.tool.call.id": "t1",
+            "gen_ai.tool.status": "success",
+        },
+        lambda s: s.add_event(
+            "gen_ai.client.inference.operation.details",
+            {"gen_ai.output.messages": output_msg},
+        ),
+    )
+
+    session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
+    tool = session.traces[0].spans[0]
+    assert isinstance(tool, ToolExecutionSpan)
+    assert tool.tool_result.content == "part1 part2"

From 1cf9af03a42118ca8a61c1a4821c2d30eeccdf46 Mon Sep 17 00:00:00 2001
From: poshinchen <pschen@amazon.com>
Date: Tue, 2 Jun 2026 14:15:03 -0400
Subject: [PATCH 2/2] fix: fix missing tool_results aggregation

---
 .../strands_in_memory_session_mapper.py       | 26 +++---
 src/strands_evals/mappers/utils.py            | 38 +++++---
 .../mappers/test_cloudwatch_session_mapper.py |  8 +-
 .../mappers/test_strands_in_memory_mapper.py  | 21 ++---
 tests/strands_evals/mappers/test_utils.py     | 91 +++++++++++++++++++
 5 files changed, 143 insertions(+), 41 deletions(-)

diff --git a/src/strands_evals/mappers/strands_in_memory_session_mapper.py b/src/strands_evals/mappers/strands_in_memory_session_mapper.py
index 74d1af56..a019cf43 100644
--- a/src/strands_evals/mappers/strands_in_memory_session_mapper.py
+++ b/src/strands_evals/mappers/strands_in_memory_session_mapper.py
@@ -26,6 +26,14 @@
 from .session_mapper import SessionMapper
 from .utils import join_tool_result_content
 
+
+def _response_to_text(response: Any) -> str:
+    """Normalize a tool_call_response value to a plain string."""
+    if isinstance(response, list):
+        return join_tool_result_content(response)
+    return response if isinstance(response, str) else ""
+
+
 logger = logging.getLogger(__name__)
 
 
@@ -322,12 +330,7 @@ def _convert_inference_messages(self, otel_msg: dict[str, Any]) -> UserMessage |
                     content.append(TextContent(text=part.get("content", "")))
 
                 if part_type == "tool_call_response":
-                    response = part.get("response", [])
-                    response_text = (
-                        join_tool_result_content(response)
-                        if isinstance(response, list)
-                        else (response if isinstance(response, str) else "")
-                    )
+                    response_text = _response_to_text(part.get("response", []))
 
                     content.append(
                         ToolResultContent(
@@ -373,14 +376,9 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T
                             if output_messages and output_messages[0].get("parts"):
                                 part = output_messages[0]["parts"][0]
                                 if part.get("type") == "tool_call_response":
-                                    response = part.get("response", [])
-                                    tool_result_content = (
-                                        join_tool_result_content(response)
-                                        if isinstance(response, list)
-                                        else (response if isinstance(response, str) else "")
-                                    )
+                                    tool_result_content = _response_to_text(part.get("response", []))
                 except Exception as e:
-                    logger.warning(f"Failed to process tool event {event.name}: {e}")
+                    logger.warning("Failed to process tool event %s: %s", event.name, e)
         else:
             for event in span.events:
                 try:
@@ -393,7 +391,7 @@ def _convert_tool_execution_span(self, span: ReadableSpan, session_id: str) -> T
                         message_list = self._parse_json_attr(event_attributes, "message")
                         tool_result_content = message_list[0].get("text", "") if message_list else ""
                 except Exception as e:
-                    logger.warning(f"Failed to process tool event {event.name}: {e}")
+                    logger.warning("Failed to process tool event %s: %s", event.name, e)
 
         tool_call = ToolCall(name=tool_name, arguments=tool_arguments, tool_call_id=tool_call_id)
         tool_result = ToolResult(content=tool_result_content, error=tool_error, tool_call_id=tool_call_id)
diff --git a/src/strands_evals/mappers/utils.py b/src/strands_evals/mappers/utils.py
index d37eb874..82058385 100644
--- a/src/strands_evals/mappers/utils.py
+++ b/src/strands_evals/mappers/utils.py
@@ -3,25 +3,36 @@
 """
 
 import json
+import logging
 from typing import Any
 
 from .constants import SCOPE_LANGCHAIN_OTEL, SCOPE_OPENINFERENCE, SCOPE_STRANDS
 from .session_mapper import SessionMapper
 
+logger = logging.getLogger(__name__)
+
 
 def join_tool_result_content(content: Any) -> str:
     """Join all blocks in a Bedrock-style toolResult content list into one string.
 
-    Bedrock toolResult.content is a list of typed blocks:
-      {"text": "..."}           -> pass through as-is
-      {"json": {...}}           -> json.dumps(value)
-      {"image": ...}            -> "[image]"
-      {"document": ...}         -> "[document]"
-      {"video": ...}            -> "[video]"
+    Bedrock toolResult.content is a list of typed blocks that are joined with a
+    newline separator so multi-paragraph tool outputs stay readable for downstream
+    LLM judges. text blocks pass through as-is, json blocks are serialized via
+    json.dumps, and image/document/video blocks become placeholder markers.
+
+    Args:
+        content: A Bedrock-style toolResult content value. May be a list of typed
+            block dicts, a non-list value (coerced to str), or None/empty.
 
-    Non-list values are coerced to str.
+    Returns:
+        A single string with all block values newline-joined, or empty string for
+        empty/None input. Note: empty-string text block values are excluded from
+        the join (they contribute no visible content), so a list containing only
+        empty-text blocks returns an empty string.
     """
-    if not content:
+    if content is None:
+        return ""
+    if isinstance(content, list) and len(content) == 0:
         return ""
     if not isinstance(content, list):
         return str(content)
@@ -32,16 +43,21 @@ def join_tool_result_content(content: Any) -> str:
             parts.append(str(block))
             continue
         if "text" in block:
-            parts.append(block["text"])
+            parts.append(str(block["text"]) if block["text"] is not None else "")
         elif "json" in block:
-            parts.append(json.dumps(block["json"]))
+            try:
+                parts.append(json.dumps(block["json"], sort_keys=True))
+            except (TypeError, ValueError) as exc:
+                logger.debug("json_error=<%s> | join_tool_result_content: could not serialize json block", exc)
         elif "image" in block:
             parts.append("[image]")
         elif "document" in block:
             parts.append("[document]")
         elif "video" in block:
             parts.append("[video]")
-    return " ".join(parts)
+        else:
+            logger.debug("block_keys=<%s> | join_tool_result_content: unknown block type, skipping", list(block.keys()))
+    return "\n".join(p for p in parts if p)
 
 
 def detect_otel_mapper(spans: list[Any]) -> SessionMapper:
diff --git a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
index 13149725..37c2d255 100644
--- a/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
+++ b/tests/strands_evals/mappers/test_cloudwatch_session_mapper.py
@@ -254,9 +254,7 @@ def _make_multi_block_tool_result_message(tool_use_id, content_blocks):
     """Build a tool result message with arbitrary content blocks."""
     return {
         "role": "tool",
-        "content": {
-            "content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}])
-        },
+        "content": {"content": json.dumps([{"toolResult": {"content": content_blocks, "toolUseId": tool_use_id}}])},
     }
 
 
@@ -283,7 +281,7 @@ def test_multi_text_blocks_joined(self, mapper):
         session = mapper.map_to_session([record1, record2], "sess-1")
         tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)]
         assert len(tool_spans) == 1
-        assert tool_spans[0].tool_result.content == "first second"
+        assert tool_spans[0].tool_result.content == "first\nsecond"
 
     def test_text_and_json_blocks_joined(self, mapper):
         """Mixed text+json blocks are both included in the joined string."""
@@ -307,4 +305,4 @@ def test_text_and_json_blocks_joined(self, mapper):
         session = mapper.map_to_session([record1, record2], "sess-1")
         tool_spans = [s for s in session.traces[0].spans if isinstance(s, ToolExecutionSpan)]
         assert len(tool_spans) == 1
-        assert tool_spans[0].tool_result.content == 'val: {"x": 1}'
+        assert tool_spans[0].tool_result.content == 'val:\n{"x": 1}'
diff --git a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
index 07037c05..c44e23c9 100644
--- a/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
+++ b/tests/strands_evals/mappers/test_strands_in_memory_mapper.py
@@ -1,3 +1,5 @@
+import json
+
 import pytest
 from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
 from opentelemetry.trace import SpanContext, SpanKind, TraceFlags
@@ -578,12 +580,9 @@ def test_session_id_filtering_gen_ai_conversation_id_takes_precedence(provider):
 # --- Regression tests: multi-part toolResult.content ---
 
 
-import json as _json
-
-
 def test_legacy_process_tool_results_multi_text(provider):
     """Legacy _process_tool_results joins all text blocks, not just content[0]."""
-    payload = _json.dumps(
+    payload = json.dumps(
         [
             {
                 "toolResult": {
@@ -608,12 +607,12 @@ def test_legacy_process_tool_results_multi_text(provider):
 
     session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
     tool_msg = session.traces[0].spans[0].messages[0]
-    assert tool_msg.content[0].content == "first second"
+    assert tool_msg.content[0].content == "first\nsecond"
 
 
 def test_legacy_process_tool_results_text_and_json(provider):
     """Legacy _process_tool_results handles mixed text+json blocks."""
-    payload = _json.dumps(
+    payload = json.dumps(
         [
             {
                 "toolResult": {
@@ -638,12 +637,12 @@ def test_legacy_process_tool_results_text_and_json(provider):
 
     session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
     tool_msg = session.traces[0].spans[0].messages[0]
-    assert tool_msg.content[0].content == 'label: {"value": 42}'
+    assert tool_msg.content[0].content == 'label:\n{"value": 42}'
 
 
 def test_latest_convention_inference_multi_text_tool_result(provider):
     """Latest _convert_inference_messages joins all blocks in tool_call_response."""
-    input_msg = _json.dumps(
+    input_msg = json.dumps(
         [
             {
                 "role": "user",
@@ -675,12 +674,12 @@ def test_latest_convention_inference_multi_text_tool_result(provider):
 
     session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
     inference = session.traces[0].spans[0]
-    assert inference.messages[0].content[0].content == "alpha beta"
+    assert inference.messages[0].content[0].content == "alpha\nbeta"
 
 
 def test_latest_convention_tool_execution_multi_text(provider):
     """Latest _convert_tool_execution_span joins all blocks in tool_call_response."""
-    output_msg = _json.dumps(
+    output_msg = json.dumps(
         [
             {
                 "role": "tool",
@@ -716,4 +715,4 @@ def test_latest_convention_tool_execution_multi_text(provider):
     session = StrandsInMemorySessionMapper().map_to_session([span], "sid")
     tool = session.traces[0].spans[0]
     assert isinstance(tool, ToolExecutionSpan)
-    assert tool.tool_result.content == "part1 part2"
+    assert tool.tool_result.content == "part1\npart2"
diff --git a/tests/strands_evals/mappers/test_utils.py b/tests/strands_evals/mappers/test_utils.py
index ccc0b580..730e1d5c 100644
--- a/tests/strands_evals/mappers/test_utils.py
+++ b/tests/strands_evals/mappers/test_utils.py
@@ -9,6 +9,97 @@
     get_scope_name,
     readable_spans_to_dicts,
 )
+from strands_evals.mappers.utils import join_tool_result_content
+
+
+class TestJoinToolResultContent:
+    def test_empty_list(self):
+        assert join_tool_result_content([]) == ""
+
+    def test_none_input(self):
+        assert join_tool_result_content(None) == ""
+
+    def test_non_list_input(self):
+        assert join_tool_result_content("raw string") == "raw string"
+
+    def test_single_text_block(self):
+        assert join_tool_result_content([{"text": "hello"}]) == "hello"
+
+    def test_multi_text_blocks(self):
+        assert join_tool_result_content([{"text": "a"}, {"text": "b"}]) == "a\nb"
+
+    def test_json_block(self):
+        assert join_tool_result_content([{"json": {"k": 1}}]) == '{"k": 1}'
+
+    def test_json_block_sort_keys(self):
+        assert join_tool_result_content([{"json": {"b": 2, "a": 1}}]) == '{"a": 1, "b": 2}'
+
+    def test_image_placeholder(self):
+        assert join_tool_result_content([{"image": {}}]) == "[image]"
+
+    def test_document_placeholder(self):
+        assert join_tool_result_content([{"document": {}}]) == "[document]"
+
+    def test_video_placeholder(self):
+        assert join_tool_result_content([{"video": {}}]) == "[video]"
+
+    def test_unknown_key_silently_dropped(self):
+        assert join_tool_result_content([{"unknown_type": "value"}]) == ""
+
+    def test_text_none_value_no_crash(self):
+        assert join_tool_result_content([{"text": None}]) == ""
+
+    def test_text_none_value_with_sibling(self):
+        assert join_tool_result_content([{"text": None}, {"text": "ok"}]) == "ok"
+
+    def test_text_non_str_coerced(self):
+        assert join_tool_result_content([{"text": 123}]) == "123"
+
+    def test_non_dict_block(self):
+        assert join_tool_result_content([42]) == "42"
+
+    def test_mixed_blocks(self):
+        result = join_tool_result_content([{"text": "label:"}, {"json": {"v": 1}}, {"image": {}}])
+        assert result == 'label:\n{"v": 1}\n[image]'
+
+    def test_empty_string_input(self):
+        assert join_tool_result_content("") == ""
+
+    def test_integer_zero_input(self):
+        # int 0 is not None and not an empty list, so it coerces to "0"
+        assert join_tool_result_content(0) == "0"
+
+    def test_false_input(self):
+        # False is not None and not an empty list, so it coerces to "False"
+        assert join_tool_result_content(False) == "False"
+
+    def test_block_with_multiple_keys_prefers_text(self):
+        # When a block has multiple keys, 'text' wins (first match in if/elif chain)
+        result = join_tool_result_content([{"text": "hello", "json": {"x": 1}}])
+        assert result == "hello"
+
+    def test_json_block_none_value(self):
+        # json.dumps(None) == 'null', included in output
+        assert join_tool_result_content([{"json": None}]) == "null"
+
+    def test_json_block_inf_serialized(self):
+        import math
+
+        # Python's json.dumps may serialize inf as "Infinity" or raise; either way no crash
+        result = join_tool_result_content([{"json": math.inf}, {"text": "after"}])
+        assert "after" in result or result in ("Infinity\nafter", "after")
+
+    def test_text_empty_string_block_filtered(self):
+        # An empty-string text block is silently filtered by the join guard
+        assert join_tool_result_content([{"text": ""}]) == ""
+
+    def test_text_empty_string_block_with_sibling(self):
+        assert join_tool_result_content([{"text": ""}, {"text": "ok"}]) == "ok"
+
+    def test_bare_dict_not_in_list(self):
+        # A bare dict (not wrapped in a list) is coerced to str
+        result = join_tool_result_content({"text": "hello"})
+        assert result == str({"text": "hello"})
 
 
 def make_span_dict(scope_name="test.scope", attributes=None, span_events=None):