pydantic · dsfaccini · Jun 2, 2026 · May 26, 2026 · May 27, 2026 · May 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,4 @@ wheels/
 # Hypothesis
 .hypothesis/
 .vscode/
+mutants/
diff --git a/docs/mutation-testing.md b/docs/mutation-testing.md
@@ -0,0 +1,49 @@
+# Mutation Testing Results
+
+> Generated from commit `bd268c8` on 2026-05-26. Results may become stale as code
+> evolves — regenerate via `scripts/run-mutmut.sh run --max-children 1`.
+
+Covers `pydantic_ai_harness/filesystem/_toolset.py` and `pydantic_ai_harness/shell/_toolset.py`.
+
+Run with [mutmut](https://mutmut.readthedocs.io/) v3 via `scripts/run-mutmut.sh` (which
+installs mutmut ephemerally with `uv run --with` — no dev dependency required).
+
+## Summary
+
+| Metric | Value |
+|---|---|
+| Total mutants | 584 |
+| Killed | 524 |
+| Survived | 60 |
+| Kill rate | **89.7%** |
+
+## Equivalent Mutants (60 survivors)
+
+All 60 survivors are provably equivalent — no test can distinguish them from the original.
+
+| Category | Count | Why unkillable |
+|---|---|---|
+| Trampoline default params | 7 | mutmut v3 wraps functions; wrapper keeps original defaults, so mutated defaults are never observed |
+| `name=None` / omitted in `add_function()` | 18 | pydantic-ai falls back to `method.__name__`, which equals the original explicit name |
+| Encoding case `'utf-8'` → `'UTF-8'` | 10 | Python's codec lookup is case-insensitive |
+| Encoding omit/`None` (`utf-8` is default) | 11 | Default text encoding is UTF-8 on all supported platforms |
+| Unreachable `except` blocks (`pragma: no cover`) | 6 | `except ValueError/OSError` paths can't be triggered in the test environment |
+| `replace()` count removed/changed | 2 | Count is pre-validated as exactly 1 before the call |
+| `CancelScope(shield=True)` → `False`/`None` | 2 | Requires an outer cancellation to fire during the ~instant cleanup window |
+| Dead `returncode` branch | 1 | `proc.returncode` is never `None` after `await proc.wait()` |
+| `errors='replace'` mutations | 3 | Test data is valid UTF-8; the error handler is never invoked |
+
+## Limitations
+
+Trio-parametrized tests are excluded during mutation testing (`-k 'not trio'` in
+`pyproject.toml [tool.mutmut]`) because trio segfaults in mutmut's subprocess
+environment on Python 3.14 / macOS. This does not affect the kill rate — trio
+tests exercise the same code paths as the asyncio tests.
+
+## Running
+
+```bash
+scripts/run-mutmut.sh run --max-children 1
+scripts/run-mutmut.sh results
+scripts/run-mutmut.sh show <mutant-name>
+```
diff --git a/pydantic_ai_harness/__init__.py b/pydantic_ai_harness/__init__.py
@@ -1,16 +1,26 @@
-"""The batteries for your Pydantic AI agent -- the official capability library."""
+"""Pydantic AI capability library."""
 
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from .code_mode import CodeMode
+    from .filesystem import FileSystem
+    from .shell import Shell
 
-__all__ = ['CodeMode']
+__all__ = ['CodeMode', 'FileSystem', 'Shell']
 
 
 def __getattr__(name: str) -> object:
     if name == 'CodeMode':
         from .code_mode import CodeMode
 
         return CodeMode
+    elif name == 'FileSystem':
+        from .filesystem import FileSystem
+
+        return FileSystem
+    elif name == 'Shell':
+        from .shell import Shell
+
+        return Shell
     raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
diff --git a/pydantic_ai_harness/filesystem/__init__.py b/pydantic_ai_harness/filesystem/__init__.py
@@ -0,0 +1,6 @@
+"""Filesystem capability: gives agents configurable, sandboxed file system access."""
+
+from pydantic_ai_harness.filesystem._capability import FileSystem
+from pydantic_ai_harness.filesystem._toolset import FileSystemToolset
+
+__all__ = ['FileSystem', 'FileSystemToolset']
diff --git a/pydantic_ai_harness/filesystem/_capability.py b/pydantic_ai_harness/filesystem/_capability.py
@@ -0,0 +1,80 @@
+"""Filesystem capability that provides sandboxed file system access."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from pydantic_ai.capabilities import AbstractCapability
+from pydantic_ai.toolsets import AgentToolset
+
+from pydantic_ai_harness.filesystem._toolset import FileSystemToolset
+
+_DEFAULT_PROTECTED: list[str] = [
+    '.git/*',
+    '.env',
+    '.env.*',
+    '*.pem',
+    '*.key',
+    '**/secrets*',
+]
+
+
+@dataclass
+class FileSystem(AbstractCapability[Any]):
+    """File system access scoped to a root directory.
+
+    All paths are resolved relative to `root_dir`. Traversal above the root
+    is rejected. Symlinks are resolved before authorization.
+    """
+
+    root_dir: str | Path = '.'
+    """Root directory for all file operations. Defaults to the current directory."""
+
+    allowed_patterns: Sequence[str] = field(default_factory=list[str])
+    """If non-empty, only paths matching at least one glob pattern are accessible."""
+
+    denied_patterns: Sequence[str] = field(default_factory=list[str])
+    """Paths matching any of these glob patterns are rejected."""
+
+    protected_patterns: Sequence[str] = field(default_factory=lambda: list(_DEFAULT_PROTECTED))
+    """Paths matching these patterns are read-only (writes are rejected).
+
+    Defaults to protecting `.git/`, `.env`, key files, and secrets.
+    Set to an empty list to disable protection.
+    """
+
+    max_read_lines: int = 2000
+    """Maximum number of lines returned by a single `read_file` call."""
+
+    max_search_results: int = 1000
+    """Maximum number of matches returned by `search_files`."""
+
+    max_find_results: int = 1000
+    """Maximum number of matches returned by `find_files`."""
+
+    def __post_init__(self) -> None:
+        # Runtime validation: dataclass field annotations are advisory, not enforced.
+        # A config-driven caller could pass a string that would otherwise propagate.
+        values: dict[str, Any] = {
+            'max_read_lines': self.max_read_lines,
+            'max_search_results': self.max_search_results,
+            'max_find_results': self.max_find_results,
+        }
+        for name, value in values.items():
+            if not isinstance(value, int) or value <= 0:
+                raise ValueError(f'{name} must be a positive integer, got {value!r}')
+
+    def get_toolset(self) -> AgentToolset[Any]:
+        """Build and return the filesystem toolset."""
+        return FileSystemToolset(
+            root_dir=Path(self.root_dir),
+            allowed_patterns=self.allowed_patterns,
+            denied_patterns=self.denied_patterns,
+            protected_patterns=self.protected_patterns,
+            max_read_lines=self.max_read_lines,
+            max_search_results=self.max_search_results,
+            max_find_results=self.max_find_results,
+        )