pydantic · DouweM · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.agents/settings.local.json b/.agents/settings.local.json
diff --git a/PLAN.md b/PLAN.md
@@ -0,0 +1,106 @@
+# Memory Capability
+
+## Summary
+
+Implements a `Memory` capability (`AbstractCapability` subclass) providing
+persistent key-value memory across agent sessions. References issues #30 and #31.
+
+User-facing docs: [`docs/capabilities/memory.md`](docs/capabilities/memory.md).
+
+## Design
+
+### Architecture
+
+- **`Memory`** dataclass extends `AbstractCapability[AgentDepsT]`
+  - `get_instructions()` returns a dynamic callable injecting stored memories
+    into the system prompt at run start
+  - `get_toolset()` returns a `FunctionToolset` with five tools: `save_memory`,
+    `recall_memory`, `search_memories`, `list_memories`, `delete_memory`
+  - Per-tool description overrides via `tool_descriptions: dict[str, str]`
+  - Tool functions use closures over `self.store` and `self.recency_scorer`
+    (no dependency on agent `deps`)
+
+### Storage
+
+- **`MemoryStore`** Protocol: pluggable backend with six methods — `get`, `put`,
+  `delete`, `list_all`, `search`, `list_namespaces`
+- **`DictMemoryStore`**: dict-based, ephemeral, for tests/scratch (default)
+- **`FileMemoryStore(path)`**: JSON file on disk, reads on init, writes on every
+  mutation; drops expired entries on save
+- Both extend `_BaseDictStore` for shared logic
+- Custom backends: implement the Protocol. See
+  [`examples/memory/postgres_store.py`](examples/memory/postgres_store.py) for
+  a Postgres reference.
+
+### `MemoryEntry`
+
+Required: `key`, `content`. Optional fields:
+
+- `tags: list[str]` — LLM-set categorisation
+- `namespace: tuple[str, ...]` — hierarchical namespace, prefix-matched
+- `expires_at: str | None` — ISO 8601 wall-clock expiry (opt-in TTL)
+- `created_at`, `updated_at: str` — ISO 8601 timestamps
+- `summary: str | None` — preferred over `content` for prompt injection
+- `metadata: dict[str, object]` — structured attributes; filterable via search
+- `read_only: bool` — pin against agent modification (always injected)
+- `char_limit: int | None` — hard cap on `content` length, enforced at construction
+- `importance: float | None` — search-score booster
+
+`MemoryEntryDict` TypedDict covers the JSON serialisation form.
+
+### Search & retrieval
+
+- `search` score = keyword-match count + `entry.importance` (if set) +
+  `recency_scorer(entry)` (if provided)
+- Word-boundary regex matching across `key`, `content`, `tags` (case-insensitive)
+- `_` and `-` count as word separators
+- Default recency scorer: `exponential_decay(half_life_days=30, weight=0.5)`
+- `search`/`list_all` accept `namespace` (prefix match) and `filter` (metadata
+  equality) kwargs; `search` additionally accepts `recency_scorer`
+
+### Instructions injection
+
+- `read_only=True` entries always inject (bypass count cap, byte budget, and dedup)
+- Non-pinned entries respect `max_instructions_memories` (default 20) and
+  `byte_budget: int | None` (UTF-8 byte cap)
+- `entry.summary` is preferred over `entry.content` to save tokens
+- `dedup_recent_saves: bool = True` suppresses entries the LLM just saved in
+  this run's tool history, when the saved content still matches the store
+  entry (content-aware: if external state diverged, inject the current value)
+- Pinned entries are listed first
+- Disabled entirely via `inject_memories_in_instructions=False` (prompt-cache
+  mitigation for write-heavy workloads)
+
+### Spec serialisation
+
+- `Memory.get_serialization_name()` → `"Memory"`
+- `Memory.from_spec(backend='memory'|'file', path=..., ...)` for declarative config
+
+## Configuration
+
+| Field | Default | Description |
+|---|---|---|
+| `store` | `DictMemoryStore()` | Storage backend |
+| `inject_memories_in_instructions` | `True` | Include memories in system prompt |
+| `max_instructions_memories` | `20` | Cap on non-pinned memories injected |
+| `byte_budget` | `None` | Optional UTF-8 byte cap on injection block |
+| `recency_scorer` | `exponential_decay(half_life_days=30, weight=0.5)` | Or `None` to disable |
+| `tool_descriptions` | `{}` | Per-tool description overrides |
+| `dedup_recent_saves` | `True` | Suppress injection of entries just saved in this run |
+
+## Files
+
+- `src/pydantic_harness/memory.py` — capability, stores, entry model, recency helpers
+- `src/pydantic_harness/__init__.py` — re-exports
+- `tests/test_memory.py` — 150 tests covering all code paths
+- `examples/memory/*.py` — three runnable examples plus the Postgres reference
+- `docs/capabilities/memory.md` — user-facing docs
+
+## Future Work
+
+- **Semantic retrieval** — `SemanticMemoryStore` Protocol extension and an
+  `EmbeddingStore` reference (numpy/cosine, or pgvector). Deferred until a
+  concrete backend drives the API design — premature design tends to lock in
+  the wrong shape.
+- **Deferred capability loading** (PR #5230 in pydantic-ai) — once that lands,
+  declare `id`/`description` on `Memory` to opt into deferred loading.
diff --git a/examples/memory/coding_assistant.py b/examples/memory/coding_assistant.py
@@ -0,0 +1,101 @@
+"""Self-Improving Coding Assistant — procedural memory via instructions injection.
+
+Demonstrates: instructions injection as self-modifying prompt, scoping, search, delete.
+"""
+
+from __future__ import annotations
+
+import sys
+
+import logfire
+from pydantic_ai import Agent
+
+from pydantic_ai_harness.memory import DictMemoryStore, Memory
+
+logfire.configure(send_to_logfire='if-token-present')
+logfire.instrument_openai()  # pyright: ignore[reportUnknownMemberType]
+
+
+def main() -> None:
+    """Run the coding assistant example."""
+    store = DictMemoryStore()
+    memory = Memory(store=store, max_instructions_memories=10)
+
+    agent = Agent(
+        'openai:gpt-4o-mini',
+        capabilities=[memory],
+        system_prompt=(
+            'You are a coding assistant that learns from user corrections. '
+            'When the user gives you a coding rule or correction, save it as a memory '
+            'with namespace ["rules"] and tags like ["python", "style"] or ["typescript", "testing"]. '
+            'Use descriptive keys like "rule_python_fstrings" or "rule_ts_const". '
+            'When asked to write code, search your memories for relevant rules first.'
+        ),
+    )
+
+    # --- Teach rules ---
+    with logfire.span('teach-rules'):
+        result1 = agent.run_sync(
+            'Remember these coding rules:\n'
+            '1. Always use f-strings in Python, never .format() or % formatting\n'
+            '2. In TypeScript, prefer const over let, never use var\n'
+            '3. Always add type hints to Python function signatures'
+        )
+        print(f'Assistant: {result1.output}')
+
+    rules = store.list_all()
+    print(f'\nRules stored: {len(rules)}')
+    for r in rules:
+        print(f'  [{r.key}] {r.content} (namespace={r.namespace}, tags={r.tags})')
+
+    assert len(rules) >= 3, f'Expected at least 3 rules saved, got {len(rules)}'
+
+    # Check that search works across stored rules
+    python_rules = store.search('python')
+    print(f'Rules matching "python": {len(python_rules)}')
+    assert len(python_rules) >= 1, 'Expected at least 1 rule matching "python"'
+
+    # --- Verify instructions injection ---
+    # Build instructions should now include the rules
+    from unittest.mock import MagicMock
+
+    from pydantic_ai._run_context import RunContext
+    from pydantic_ai.usage import RunUsage
+
+    ctx: RunContext[None] = RunContext(deps=None, model=MagicMock(), usage=RunUsage())
+    instructions = memory.build_instructions(ctx)
+    print(f'\nInstructions preview (first 300 chars):\n{instructions[:300]}...')
+
+    assert 'Currently stored memories' in instructions, 'Expected memories in instructions'
+
+    # --- Ask for code, verify rules are considered ---
+    with logfire.span('apply-rules'):
+        result2 = agent.run_sync(
+            'Write a Python function that greets a user by name. Follow all coding rules you know.'
+        )
+        print(f'\nAssistant: {result2.output}')
+
+    # The output should use f-strings and type hints (based on rules)
+    output_lower = result2.output.lower()
+    assert "f'" in result2.output or 'f"' in result2.output or 'f-string' in output_lower, (
+        'Expected f-string usage in code output'
+    )
+
+    # --- Delete an obsolete rule ---
+    with logfire.span('delete-rule'):
+        result3 = agent.run_sync('Actually, the TypeScript const rule is outdated for this project. Delete it.')
+        print(f'\nAssistant: {result3.output}')
+
+    remaining = store.list_all()
+    print(f'\nRules after deletion: {len(remaining)}')
+    for r in remaining:
+        print(f'  [{r.key}] {r.content}')
+
+    # Should have fewer rules now
+    assert len(remaining) < len(rules), 'Expected at least one rule deleted'
+
+    print('\n--- Coding Assistant example passed! ---')
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)
diff --git a/examples/memory/personal_assistant.py b/examples/memory/personal_assistant.py
@@ -0,0 +1,89 @@
+"""Personal Assistant — remembers user preferences across sessions.
+
+Demonstrates: FileMemoryStore persistence, save/recall, instructions injection, tags, scoping.
+"""
+
+from __future__ import annotations
+
+import sys
+import tempfile
+from pathlib import Path
+
+import logfire
+from pydantic_ai import Agent
+
+from pydantic_ai_harness.memory import FileMemoryStore, Memory
+
+logfire.configure(send_to_logfire='if-token-present')
+logfire.instrument_openai()  # pyright: ignore[reportUnknownMemberType]
+
+
+def main() -> None:
+    """Run the personal assistant example."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        mem_path = Path(tmpdir) / 'preferences.json'
+        store = FileMemoryStore(mem_path)
+        memory = Memory(store=store)
+
+        agent = Agent(
+            'openai:gpt-4o-mini',
+            capabilities=[memory],
+            system_prompt=(
+                'You are a helpful personal assistant. '
+                'When the user tells you about their preferences, save each one as a memory '
+                'with namespace ["user_prefs"] and appropriate tags. '
+                'Use descriptive keys like "preferred_name" or "theme_preference".'
+            ),
+        )
+
+        # --- Session 1: user shares preferences ---
+        with logfire.span('session-1-save-preferences'):
+            result1 = agent.run_sync("Hi! My name is Alice, I prefer dark mode, and I'm vegetarian.")
+            print(f'Assistant: {result1.output}')
+
+        entries = store.list_all()
+        print(f'\nMemories after session 1: {len(entries)}')
+        for e in entries:
+            print(f'  [{e.key}] {e.content} (tags={e.tags}, namespace={e.namespace})')
+
+        assert len(entries) >= 2, f'Expected at least 2 memories saved, got {len(entries)}'
+        all_content = ' '.join(e.content.lower() for e in entries)
+        assert 'alice' in all_content or any('alice' in e.key.lower() for e in entries), 'Expected a memory about Alice'
+
+        # --- Session 2: new agent instance loads from same file (persistence) ---
+        store2 = FileMemoryStore(mem_path)
+        memory2 = Memory(store=store2)
+        agent2 = Agent(
+            'openai:gpt-4o-mini',
+            capabilities=[memory2],
+            system_prompt='You are a helpful personal assistant.',
+        )
+
+        loaded_entries = store2.list_all()
+        print(f'\nMemories loaded in session 2: {len(loaded_entries)}')
+        assert len(loaded_entries) == len(entries), 'FileMemoryStore persistence failed'
+
+        with logfire.span('session-2-recall-preferences'):
+            result2 = agent2.run_sync('What do you know about me?')
+            print(f'Assistant: {result2.output}')
+
+        # The instructions injection should have included the memories
+        assert 'alice' in result2.output.lower() or 'dark' in result2.output.lower(), (
+            'Expected assistant to recall preferences from instructions injection'
+        )
+
+        # --- Session 3: update a preference ---
+        with logfire.span('session-3-update-preference'):
+            result3 = agent2.run_sync('Actually, I go by Ali now. Please update my name.')
+            print(f'\nAssistant: {result3.output}')
+
+        updated_entries = store2.list_all()
+        print(f'\nMemories after update: {len(updated_entries)}')
+        for e in updated_entries:
+            print(f'  [{e.key}] {e.content} (tags={e.tags})')
+
+        print('\n--- Personal Assistant example passed! ---')
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)