stackmemoryai · jonathanpeterwu · May 3, 2026 · Apr 29, 2026 · May 2, 2026 · May 3, 2026
diff --git a/docs/mcp.md b/docs/mcp.md
@@ -3,29 +3,36 @@
 The `plan_and_code` MCP tool lets Claude Code trigger StackMemory’s multi‑agent flow silently and receive a single JSON result. It plans with Claude, implements with Codex or Claude, and critiques the result — with optional retry loops and context recording.
 
 ## What it does
+
 - Planner (Claude): generates a concise plan with acceptance criteria and risks.
 - Implementer (Codex/Claude): applies a focused change per step.
 - Critic (Claude): returns `{ approved, issues[], suggestions[] }` to gate retries.
+- Verification commands: optional task-specific repro/test commands run after each implementation attempt and included in the critic input.
 - Returns a single JSON payload: `{ plan, implementation, critique, iterations[] }`.
 
 ## Tool definition
+
 - name: `plan_and_code`
 - arguments:
   - `task` (string, required): short task description
   - `implementer` ("codex" | "claude", default: `codex`)
   - `maxIters` (number, default: `2`): retry loop iterations
   - `execute` (boolean, default: `false`): if `false`, implementer is dry‑run
+  - `verificationCommands` (string[], optional): repro/test commands that must pass after each implementation attempt
   - `record` (boolean, default: `false`): write plan/critique as simple context rows
   - `recordFrame` (boolean, default: `false`): write a real frame + anchors
 
 ## Environment defaults
+
 If not specified in arguments, the MCP handler reads these env vars:
+
 - `STACKMEMORY_MM_PLANNER_MODEL` (e.g., `claude-sonnet-4-20250514`)
 - `STACKMEMORY_MM_REVIEWER_MODEL` (defaults to planner model if unset)
 - `STACKMEMORY_MM_IMPLEMENTER` (`codex` or `claude`)
 - `STACKMEMORY_MM_MAX_ITERS` (e.g., `3`)
 
 ## Example (MCP request)
+
 ```json
 {
   "method": "tools/call",
@@ -36,13 +43,17 @@ If not specified in arguments, the MCP handler reads these env vars:
       "implementer": "codex",
       "maxIters": 2,
       "execute": true,
+      "verificationCommands": [
+        "npx vitest run src/orchestrators/multimodal/__tests__/determinism.test.ts --reporter=dot"
+      ],
       "recordFrame": true
     }
   }
 }
 ```
 
 Response content is a single `text` item containing a JSON string:
+
 ```json
 {
   "ok": true,
@@ -58,6 +69,7 @@ Response content is a single `text` item containing a JSON string:
 ```
 
 ## Recording behavior
+
 - `record: true` writes two entries into `.stackmemory/context.db` (simple `contexts` table):
   - `Plan: <summary>` (importance 0.8)
   - `Critique: approved|needs_changes` (importance 0.6)
@@ -68,18 +80,22 @@ Response content is a single `text` item containing a JSON string:
 - Both modes are best‑effort. If the DB isn’t ready, handler returns JSON without failing.
 
 ## Notes
+
 - Implementer `codex` calls `codex-sm` (must be on PATH). Use `--execute` in CLI, or `execute: true` in MCP, to actually run it; otherwise it’s a dry‑run.
 - Audit files are saved to `.stackmemory/build/spike-<timestamp>.json` to support review/debugging.
 - You can compare models:
   - Planner/critic: override with `STACKMEMORY_MM_PLANNER_MODEL` / `STACKMEMORY_MM_REVIEWER_MODEL`.
   - Implementer: set to `claude` to A/B against Codex, or keep `codex` (default).
 
 ## CLI equivalents (for quick checks)
+
 - Quiet JSON output:
   - `stackmemory build "Refactor config loader" --json`
   - `stackmemory skills spike --task "Refactor config loader" --json`
 - Execute implementer and record as frame:
   - `stackmemory skills spike --task "Refactor" --execute --max-iters 3 --json --record-frame`
+- Execute with a task-specific verification harness:
+  - `stackmemory build "Fix deterministic replay drift" --verify "npm run determinism:test" --execute`
 
 ---
 
@@ -152,11 +168,13 @@ Response (content[0].text is a JSON string):
 ```
 
 Notes:
+
 - `recordFrame: true` creates a real StackMemory frame + anchors (plan summary, commands, issues, suggestions).
 - `execute: true` actually invokes the implementer; otherwise it’s a dry‑run.
 - Approval IDs are persisted to `.stackmemory/build/pending.json` so editor restarts don’t lose pending approvals.
 
 ### Optional helper tools
+
 - `plan_only`: Returns a plan JSON without running code.
 - `call_claude`: Calls Claude directly (prompt/model/system).
 - `call_codex`: Calls Codex via `codex-sm` (prompt/args/execute).

diff --git a/packages/python-sdk/pyproject.toml b/packages/python-sdk/pyproject.toml
@@ -0,0 +1,29 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "stackmemory"
+version = "0.1.0"
+description = "Python SDK for StackMemory — content cache, skill packs, and provenance tracking"
+readme = "README.md"
+license = "MIT"
+requires-python = ">=3.11"
+authors = [{ name = "StackMemory", email = "hello@stackmemory.ai" }]
+keywords = ["stackmemory", "mcp", "skill-packs", "provenance", "token-cache", "ai", "llm"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries",
+]
+
+[project.urls]
+Homepage = "https://github.com/stackmemoryai/stackmemory"
+Repository = "https://github.com/stackmemoryai/stackmemory/tree/main/packages/python-sdk"
+
+[tool.hatch.build.targets.wheel]
+packages = ["stackmemory"]
diff --git a/packages/python-sdk/stackmemory/__init__.py b/packages/python-sdk/stackmemory/__init__.py
@@ -0,0 +1,21 @@
+"""
+stackmemory — Python SDK for StackMemory.
+
+Content cache, skill packs, and provenance tracking for AI agent workflows.
+Zero external dependencies. Uses stdlib sqlite3.
+"""
+
+from stackmemory.cache import ContentCache
+from stackmemory.provenance import ProvenanceStore, TraceEvent
+from stackmemory.packs import SkillPackRegistry, load_pack_from_dir
+from stackmemory.client import StackMemory
+
+__version__ = "0.1.0"
+__all__ = [
+    "StackMemory",
+    "ContentCache",
+    "ProvenanceStore",
+    "TraceEvent",
+    "SkillPackRegistry",
+    "load_pack_from_dir",
+]
diff --git a/packages/python-sdk/stackmemory/cache.py b/packages/python-sdk/stackmemory/cache.py
@@ -0,0 +1,160 @@
+"""Content-addressable cache for LLM context deduplication."""
+
+from __future__ import annotations
+
+import hashlib
+import math
+import sqlite3
+import time
+from dataclasses import dataclass
+
+
+@dataclass
+class CacheEntry:
+    hash: str
+    content: str
+    token_count: int
+    hit_count: int
+    first_seen: int
+    last_seen: int
+    source: str
+
+
+@dataclass
+class CacheLookupResult:
+    hit: bool
+    hash: str
+    entry: CacheEntry | None = None
+    tokens_saved: int = 0
+
+
+@dataclass
+class CacheStats:
+    total_entries: int
+    total_tokens_cached: int
+    total_tokens_saved: int
+    hit_rate: float
+    top_sources: list[tuple[str, int]]
+
+
+def estimate_tokens(content: str) -> int:
+    """Estimate token count using chars/4 approximation."""
+    if not content:
+        return 0
+    return math.ceil(len(content) / 4)
+
+
+def hash_content(content: str) -> str:
+    """SHA-256 hex digest for content-addressable lookup."""
+    return hashlib.sha256(content.encode()).hexdigest()
+
+
+class ContentCache:
+    """SQLite-backed content-hash cache with token savings tracking."""
+
+    def __init__(self, db: sqlite3.Connection) -> None:
+        self._db = db
+        self._init_schema()
+
+    def _init_schema(self) -> None:
+        self._db.executescript("""
+            CREATE TABLE IF NOT EXISTS content_cache (
+                hash TEXT PRIMARY KEY,
+                content TEXT NOT NULL,
+                token_count INTEGER NOT NULL,
+                hit_count INTEGER NOT NULL DEFAULT 0,
+                first_seen INTEGER NOT NULL,
+                last_seen INTEGER NOT NULL,
+                source TEXT NOT NULL DEFAULT ''
+            );
+            CREATE INDEX IF NOT EXISTS idx_cache_source ON content_cache(source);
+        """)
+
+    def lookup(self, content: str, source: str = "") -> CacheLookupResult:
+        """Check if content exists. Increments hit_count on hit."""
+        h = hash_content(content)
+        row = self._db.execute(
+            "SELECT * FROM content_cache WHERE hash = ?", (h,)
+        ).fetchone()
+
+        if not row:
+            return CacheLookupResult(hit=False, hash=h)
+
+        now = int(time.time())
+        self._db.execute(
+            "UPDATE content_cache SET hit_count = hit_count + 1, last_seen = ? WHERE hash = ?",
+            (now, h),
+        )
+        if source and source != row[5]:
+            self._db.execute(
+                "UPDATE content_cache SET source = ? WHERE hash = ?", (source, h)
+            )
+        self._db.commit()
+
+        entry = CacheEntry(
+            hash=row[0], content=row[1], token_count=row[2],
+            hit_count=row[3] + 1, first_seen=row[4],
+            last_seen=now, source=source or row[5],
+        )
+        return CacheLookupResult(hit=True, hash=h, entry=entry, tokens_saved=entry.token_count)
+
+    def put(self, content: str, source: str = "") -> CacheEntry:
+        """Insert or update a cache entry."""
+        h = hash_content(content)
+        token_count = estimate_tokens(content)
+        now = int(time.time())
+
+        existing = self._db.execute(
+            "SELECT hash FROM content_cache WHERE hash = ?", (h,)
+        ).fetchone()
+
+        if existing:
+            self._db.execute(
+                "UPDATE content_cache SET hit_count = hit_count + 1, last_seen = ?, source = ? WHERE hash = ?",
+                (now, source, h),
+            )
+        else:
+            self._db.execute(
+                "INSERT INTO content_cache (hash, content, token_count, hit_count, first_seen, last_seen, source) VALUES (?, ?, ?, 0, ?, ?, ?)",
+                (h, content, token_count, now, now, source),
+            )
+        self._db.commit()
+
+        row = self._db.execute(
+            "SELECT * FROM content_cache WHERE hash = ?", (h,)
+        ).fetchone()
+        return CacheEntry(
+            hash=row[0], content=row[1], token_count=row[2],
+            hit_count=row[3], first_seen=row[4], last_seen=row[5], source=row[6],
+        )
+
+    def get_stats(self) -> CacheStats:
+        """Aggregate cache statistics."""
+        row = self._db.execute("""
+            SELECT COUNT(*), COALESCE(SUM(token_count), 0),
+                   COALESCE(SUM(hit_count * token_count), 0),
+                   COALESCE(SUM(hit_count), 0)
+            FROM content_cache
+        """).fetchone()
+
+        total_entries, total_cached, total_saved, total_hits = row
+        hit_rate = total_hits / (total_hits + total_entries) if (total_hits + total_entries) > 0 else 0.0
+
+        top = self._db.execute("""
+            SELECT source, SUM(hit_count * token_count) as saved
+            FROM content_cache WHERE source != ''
+            GROUP BY source ORDER BY saved DESC LIMIT 10
+        """).fetchall()
+
+        return CacheStats(
+            total_entries=total_entries,
+            total_tokens_cached=total_cached,
+            total_tokens_saved=total_saved,
+            hit_rate=hit_rate,
+            top_sources=[(r[0], r[1]) for r in top],
+        )
+
+    def clear(self) -> None:
+        """Remove all entries."""
+        self._db.execute("DELETE FROM content_cache")
+        self._db.commit()
diff --git a/packages/python-sdk/stackmemory/client.py b/packages/python-sdk/stackmemory/client.py
@@ -0,0 +1,55 @@
+"""StackMemory SDK — main entry point."""
+
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+from stackmemory.cache import ContentCache
+from stackmemory.packs import SkillPackRegistry
+from stackmemory.provenance import ProvenanceStore
+
+
+def _default_data_dir() -> Path:
+    import os
+    home = os.environ.get("HOME") or os.environ.get("USERPROFILE") or "/tmp"
+    return Path(home) / ".stackmemory"
+
+
+class StackMemory:
+    """Unified entry point for cache, packs, and provenance.
+
+    Usage::
+
+        from stackmemory import StackMemory
+
+        sm = StackMemory()
+        sm.cache.put("hello world", "test")
+        sm.packs.list()
+        sm.provenance.record(TraceEvent(operation="test"))
+        sm.close()
+    """
+
+    def __init__(self, data_dir: str | Path | None = None) -> None:
+        self.data_dir = Path(data_dir) if data_dir else _default_data_dir()
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+
+        self._cache_db = sqlite3.connect(str(self.data_dir / "content-cache.db"))
+        self._packs_db = sqlite3.connect(str(self.data_dir / "skill-packs.db"))
+        self._prov_db = sqlite3.connect(str(self.data_dir / "provenance.db"))
+
+        self.cache = ContentCache(self._cache_db)
+        self.packs = SkillPackRegistry(self._packs_db)
+        self.provenance = ProvenanceStore(self._prov_db)
+
+    def close(self) -> None:
+        """Close all database connections."""
+        self._cache_db.close()
+        self._packs_db.close()
+        self._prov_db.close()
+
+    def __enter__(self) -> "StackMemory":
+        return self
+
+    def __exit__(self, *args: object) -> None:
+        self.close()