diff --git a/.claude/commands/vouch-propose-from-pr.md b/.claude/commands/vouch-propose-from-pr.md new file mode 100644 index 00000000..aca31fc7 --- /dev/null +++ b/.claude/commands/vouch-propose-from-pr.md @@ -0,0 +1,25 @@ +--- +description: Distill a merged PR into vouch claim proposals +--- + +# /vouch-propose-from-pr + +A PR is a decision: someone proposed a change, the team accepted it. The +"why" gets compressed into the merge message and forgotten. This command +preserves the why as cited claims in the KB. + +Steps: + +1. Parse `$ARGUMENTS` as a PR URL or `/#`; default to the + most-recently-merged PR you authored. +2. Fetch the PR title, body, and the merged-commit SHA via `gh`. +3. Register the merge commit as a `kb_register_source` so subsequent claims + can cite it. +4. Read the diff. For each *behavioural* change (not formatting / renaming), + draft one `kb_propose_claim` whose text summarises the new invariant the + code now upholds, citing the source from step 3. +5. Propose at most five claims per PR. If a PR is that big, suggest the + contributor split it next time. + +Do not auto-approve. The KB's review gate is intentional; this command +only fills the queue. diff --git a/.claude/commands/vouch-recall.md b/.claude/commands/vouch-recall.md new file mode 100644 index 00000000..d72d7168 --- /dev/null +++ b/.claude/commands/vouch-recall.md @@ -0,0 +1,20 @@ +--- +description: Recall what the project's vouch KB knows about a topic +--- + +# /vouch-recall + +Use vouch's `kb_context` MCP tool to assemble a working set of claims, sources, +and entities the KB already has on the current topic. Print them with their +ids and citations; do not write anything. + +Steps: + +1. Call `kb_context` with `query: "$ARGUMENTS"`. +2. List every returned claim by id + one-line text; for each, show the + source ids it cites. +3. End with a one-sentence summary of what's *missing* from the KB on this + topic — the gap the user can fill with `/vouch-propose-from-pr` or + `kb_propose_claim`. + +Be terse. The KB is meant to remove ambiguity, not pad it. diff --git a/.claude/commands/vouch-resolve-issue.md b/.claude/commands/vouch-resolve-issue.md new file mode 100644 index 00000000..1b5aaf63 --- /dev/null +++ b/.claude/commands/vouch-resolve-issue.md @@ -0,0 +1,27 @@ +--- +description: Use vouch's KB to ground a fix for a GitHub issue +--- + +# /vouch-resolve-issue + +Wire vouch's `kb_context` into an issue-resolution flow: the KB should +inform the fix, and the act of solving should propose new claims that +make the next contributor faster. + +Steps: + +1. Parse `$ARGUMENTS` as a GitHub issue URL or `/#` shorthand. + If neither, ask for clarification. +2. `kb_context` with the issue title + body — what does the KB already know + about this area? Show the top 5 claims. +3. Read the relevant code paths. +4. Propose the smallest fix (run the project's tests first to confirm the + bug reproduces). +5. After the fix is committed, propose **at most three** new claims via + `kb_propose_claim` that capture: + * the root cause in one sentence (cited by the offending file:line), + * the chosen fix pattern (cited by the patch commit), and + * any policy/precedent established (only if novel). + +Do not auto-approve. Leave the proposals in `.vouch/proposed/` for the +maintainer to review with `vouch approve` after the PR merges. diff --git a/.claude/commands/vouch-status.md b/.claude/commands/vouch-status.md new file mode 100644 index 00000000..3320c642 --- /dev/null +++ b/.claude/commands/vouch-status.md @@ -0,0 +1,23 @@ +--- +description: Show the project's vouch KB at a glance +--- + +# /vouch-status + +Run vouch's `kb_status` MCP tool and surface the result. Use this when the +user asks "what's in our KB?" or before/after a long claim-proposal flow so +they can see the proposal count tick up. + +Format: + +``` +KB at + claims: + sources: + entities: + pending: ← review queue depth + last audit: +``` + +If `pending > 0`, suggest the user run `vouch approve ` (or `vouch lint` +if they want to inspect anything first). Do not propose anything yourself. diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..3130289c --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,60 @@ +{ + "permissions": { + "allow": [ + "mcp__vouch__kb_status", + "mcp__vouch__kb_capabilities", + "mcp__vouch__kb_search", + "mcp__vouch__kb_context", + "mcp__vouch__kb_read_claim", + "mcp__vouch__kb_read_source", + "mcp__vouch__kb_read_page", + "mcp__vouch__kb_read_entity", + "mcp__vouch__kb_read_relation", + "mcp__vouch__kb_list_claims", + "mcp__vouch__kb_list_sources", + "mcp__vouch__kb_list_pages", + "mcp__vouch__kb_list_entities", + "mcp__vouch__kb_list_relations", + "mcp__vouch__kb_list_pending" + ] + }, + "hooks": { + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch status --json || true" + }, + { + "type": "command", + "command": "vouch capture banner || true" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture observe || true" + } + ] + } + ], + "SessionEnd": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize || true" + } + ] + } + ] + } +} diff --git a/.gitignore b/.gitignore index eec57be1..9a61a8d8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ __pycache__/ .ruff_cache/ .coverage htmlcov/ +bench.json +.benchmarks/ dist/ build/ *.swp diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f4d85de..b03d5c95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,11 +17,38 @@ All notable changes to vouch are documented here. Format follows committed SVGs stay reproducible (#286). ### Added +- auto-capture: claude code sessions are harvested via hooks and filed as a + single pending session-summary proposal for human approval. a `PostToolUse` + hook (`vouch capture observe`) appends compact tool-use observations to an + ephemeral, gitignored `.vouch/captures/.jsonl` buffer; a + `SessionEnd` hook (`vouch capture finalize`) rolls the buffer plus a git-diff + backstop into one `session` page proposal — mechanical, no llm, and never + auto-approved. a `SessionStart` banner (`vouch capture banner`) nudges the + next session when captured summaries await review. opt out with + `capture.enabled: false` in `.vouch/config.yaml`. +- session-start recall: a `SessionStart` hook (`vouch recall`) injects a digest + of every live approved claim (`[id] text`) plus approved page titles into a + new claude session's context, so it starts aware of the reviewed KB. only + approved knowledge is emitted; archived / superseded / redacted claims are + excluded; size-guarded by `recall.max_chars` with an explicit truncation + notice. opt out with `recall.enabled: false`. +- `vouch install-mcp claude-code` now merges its hooks and read-only permission + allowlist into an existing `.claude/settings.json` (a `json_merge` install + strategy) instead of skipping it, so the capture / recall hooks land on + projects that already have a settings file. idempotent; user entries are + preserved. - GitHub PR auto-labeling: a pull-request metadata-only labeler workflow now applies vouch surface labels from `.github/labeler.yml`, keeps those labels in sync as files change, and adds OpenClaw-style `size: XS` through `size: XL` labels based on non-doc changed lines. Maintainers can also run it manually to backfill labels on already-open PRs. +- `vouch detect-themes` — cross-session pattern detection via deterministic + entity co-occurrence scoring. `kb.detect_themes` is read-only (returns + ranked clusters); `kb.propose_theme` routes synthesis pages through the + review gate so they appear in `kb.list_pending`. Supports `--propose` for + one-shot propose-all and `--json` for machine-readable output. Configurable + via `themes.min_sessions`, `themes.min_claims`, `themes.top_k`, and + `themes.enabled` in `config.yaml` (#311). - `vouch dual-solve ` — run claude + codex on one github issue in isolated git worktrees, compare the two diffs, keep the branch you pick, and propose the chosen solution's rationale into the KB. A sibling tool to @@ -96,6 +123,12 @@ All notable changes to vouch are documented here. Format follows KB under `eval/fixture-kb/`, and an `eval` workflow gating retrieval changes (#226). ### Fixed +- `vouch pending` (and every bulk `list_*` path) no longer crashes when a + single artifact file is unreadable — a corrupt or mojibake yaml is skipped + with a warning instead of aborting the whole listing. +- all text-mode file i/o under `src/vouch/` now pins `encoding="utf-8"`, so a + non-utf-8 locale (e.g. latin-1) can no longer mangle non-ascii claim text + into raw control bytes that the yaml loader rejects, nor crash on write. - `parse_since` (the `--since` parser behind `vouch metrics`/`vouch audit`) now raises a clean `MetricsError` for a duration too large to represent (e.g. `--since 1000000000000d`), instead of letting an uncaught `OverflowError` traceback escape — restoring the documented "clean error, not a traceback" contract. - `sync_apply` now loads the sync source exactly once and passes the same `_SyncSource` instance into `sync_check`, closing a TOCTOU window where a bundle replaced on disk between the two `_load_source` calls could cause the validation and write phases to operate on different snapshots. Also eliminates redundant directory walks (KB sources) and triple tarball opens (bundle sources). Fixes #217. - `vault_to_kb` now passes `slug_hint=page_id` to `propose_page` so vault edit proposals target the existing page id from frontmatter instead of a slugified copy of the title (fixes #219). diff --git a/CLAUDE.md b/CLAUDE.md index d5dc673a..3b7505b9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -199,8 +199,15 @@ Four registration sites — `test_capabilities` will fail if you miss one: Plus a test under `tests/test_.py`. If the method *reads* the KB, consider whether it should attach the -`_meta.vouch_hot_memory` sidebar from `src/vouch/hot_memory.py`. The -sidebar is added per-tool — there's no global decorator. +`_meta.vouch_salience` sidebar (built in `src/vouch/salience.py`, +attached inline by `kb_context` — see `kb_context` in +`src/vouch/server.py`). It's added per-tool — there's no global +decorator. Don't confuse it with `_meta.vouch_hot_memory`, which is +written only by the OpenClaw context engine's `assemble()` +(`src/vouch/openclaw/context_engine.py`); `src/vouch/hot_memory.py` is +the in-process session registry that *feeds* salience, not a response +field. (`_meta.vouch_trust` is separate again — stamped on every +dict-shaped result by a global wrapper, not per-tool.) ## Release flow diff --git a/Makefile b/Makefile index 03dd2596..021738a6 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help install dev test test-cov lint format type check build clean examples-screenshots +.PHONY: help install dev test test-cov bench lint format type check build clean examples-screenshots smoke-capture smoke-recall PY ?= python PIP ?= $(PY) -m pip @@ -9,6 +9,7 @@ help: @echo " make install editable install with dev extras" @echo " make test run pytest" @echo " make test-cov run pytest with coverage" + @echo " make bench run the performance benchmark suite (needs pytest-benchmark)" @echo " make lint ruff check" @echo " make format ruff format (writes)" @echo " make type mypy" @@ -16,6 +17,8 @@ help: @echo " make build build sdist + wheel" @echo " make clean remove caches, build artifacts, *.egg-info" @echo " make examples-screenshots re-render docs/img/examples/*.svg" + @echo " make smoke-capture end-to-end check of session auto-capture" + @echo " make smoke-recall end-to-end check of session-start recall" install: $(PIP) install -e '.[dev]' @@ -39,15 +42,28 @@ type: check: lint type test +# the bench_*.py filenames don't match pytest's default python_files glob, +# so the override is required or zero benchmarks are collected. +bench: + $(PY) -m pytest benchmarks/ --benchmark-only \ + -o python_files='bench_*.py test_*.py' \ + --benchmark-json=bench.json + examples-screenshots: $(PY) docs/img/examples/render.py +smoke-capture: + VOUCH="$(PY) -m vouch" bash scripts/smoke-capture.sh + +smoke-recall: + VOUCH="$(PY) -m vouch" bash scripts/smoke-recall.sh + build: $(PY) -m pip install --upgrade build $(PY) -m build clean: rm -rf build dist *.egg-info src/*.egg-info \ - .pytest_cache .ruff_cache .mypy_cache \ - coverage.xml .coverage htmlcov + .pytest_cache .ruff_cache .mypy_cache .benchmarks \ + coverage.xml .coverage htmlcov bench.json find . -type d -name __pycache__ -prune -exec rm -rf {} + diff --git a/README.md b/README.md index 05d2078a..497c76cf 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ **Git-native, review-gated knowledge base for LLM agents. MCP server + JSONL tool server + CLI.**

- vouch — propose → review → commit → retrieve + vouch — sessions auto-capture into a review-gated knowledge base: propose or capture → review → commit → retrieve

@@ -17,25 +17,28 @@ `vouch` is a knowledge base for LLM agents with an explicit **review gate**: agents *propose* writes; humans *approve* them with `vouch approve`. Approved artifacts are plain files on disk — YAML for claims, markdown for pages — so the KB lives in your repo, is reviewed in PRs, diffs cleanly, and can be exported as a portable bundle. +It also captures your Claude Code sessions automatically — each session's work is harvested and rolled up into a summary. But where the persistent-memory tools compress with an LLM and inject straight back, vouch's rollup is **mechanical (no LLM)** and the summary lands in the **same review gate**: nothing becomes durable memory until you approve it. + Still alpha — surface is small on purpose; expect breaking changes pre-1.0. -> **Featured for Gittensor (SN74).** vouch ships a one-command starter pack for **Gittensor** — Bittensor subnet 74. `vouch init --template gittensor` seeds a cited, already-approved decision-memory of SN74's scoring model: merged-PR rewards, PAT verification, sybil-resistance, the repo allow-list, the issue multiplier, and the emission split. It's the durable *why* behind each rule — reviewed, cited, and committed alongside your code. → **[docs/gittensor.md](docs/gittensor.md)** +> **Built for Gittensor (SN74) miners.** Mining subnet 74 means landing merged PRs across a whitelist of repos that keeps shuffling — which means re-investigating each repo's codebase and merge bar every session your agent opens. vouch auto-captures what a session works out, you approve what's worth keeping, and the next session recalls it: less re-discovery, more merged PRs. `vouch init --template gittensor` seeds the cited baseline of how SN74 scores today. → **[docs/gittensor.md](docs/gittensor.md)** ## Why this exists -Three opinionated choices distinguish vouch from the neighbours: +Four opinionated choices distinguish vouch from the neighbours: 1. **The KB is a folder in your repo.** Git is your audit log, your backup, and your sync mechanism. PRs are your review UI. 2. **Writes require approval.** Agents file *proposals*; a human (or trusted approving agent) explicitly accepts them. `proposed/` is gitignored, so rejected drafts never pollute history. 3. **Claims must cite sources.** A claim without at least one Source / Evidence id is a validation error, not a warning. Sources are content-hashed; the same evidence registered twice de-duplicates. +4. **Sessions capture themselves — but stay gated.** With the Claude Code hooks installed, a `PostToolUse` hook harvests each tool call into a gitignored scratch buffer and a `SessionEnd` hook rolls it into one *pending* session-summary page. The harvest is automatic and the rollup is mechanical (no LLM) — but the commit still waits for your `approve`, and the next session starts from approved summaries via `vouch recall`. ## When to use vouch Worth it when: -- **You run or contribute to a Gittensor (SN74) repo.** Scoring weights, the repo allow-list, anti-sybil thresholds, and emission splits get debated across PRs, Discord, and validator changes — then settle into nobody's notes. `vouch init --template gittensor` gives you a cited, reviewed record of *why* each rule exists and what it superseded. See [docs/gittensor.md](docs/gittensor.md). +- **You mine Gittensor (SN74).** Emissions come from merged PRs across a whitelist that keeps shuffling, so your agent burns each session re-learning the same codebases and merge bars. vouch captures each session's findings for approval and recalls them next time — it stops re-deriving what it already knew and keeps targeting the repos that pay. See [docs/gittensor.md](docs/gittensor.md). - **Multiple agents share a repo** (Claude Code + Cursor + a CI bot). Per-agent attribution in the audit log makes "which agent claimed what" answerable. -- **Sessions keep re-explaining the same context.** Curated, cited claims let new sessions start from your team's agreed answer instead of re-grepping. +- **Sessions keep re-explaining the same context.** Curated, cited claims let new sessions start from your team's agreed answer instead of re-grepping — and vouch auto-captures each Claude Code session into a summary you can approve, so memory accrues without letting the agent write its own history. - **You want decision records without the ADR ceremony.** `vouch crystallize` promotes a session's durable parts into proposals; one approve and they're permanent. - **You'd review agent-stated facts the way you review agent-written code.** Vouch is a PR queue for claimed knowledge. - **Compliance / audit trails matter.** Required citations + append-only audit log give you "who decided X, citing what, when" for free. @@ -64,6 +67,33 @@ Claude Code install to point you at the next step (`vouch install-mcp claude-code`). Inspect it first if you'd like — it's [`install.sh`](install.sh) at the repo root. +## Running the tests + +From a clone with the dev extras installed (`pip install -e '.[dev]'`): + +```bash +# the full CI gate — lint + types + unit tests (exactly what CI runs) +make check # == ruff check + mypy src + pytest + +# just the unit tests +python -m pytest tests/ -q --ignore=tests/embeddings + +# a single module or test +python -m pytest tests/test_capture.py -q +python -m pytest tests/test_recall.py::test_digest_includes_approved_claim_and_page -q + +# with coverage +make test-cov # term-missing + coverage.xml + +# end-to-end smoke checks for the claude-code session hooks +make smoke-capture # capture: observe → finalize → pending summary +make smoke-recall # recall: approved knowledge injected at session start +``` + +The embedding-heavy tests live under `tests/embeddings/` and need the extra +`pip install -e '.[embeddings]'` (they run as a separate CI job); drop the +`--ignore` flag once installed. + ## Quick start ```bash @@ -102,27 +132,48 @@ vouch cite vouch-starter-reviewed-knowledge The starter claim is already durable and cites the starter source. Replace it with your project's first real source and claim when you are ready. -![vouch end-to-end demo](docs/demo.gif) +### Automatic session capture -The full captured walkthrough lives at [docs/example-session.md](docs/example-session.md); re-render the GIF from [docs/demo.tape](docs/demo.tape) with `vhs docs/demo.tape`. +Once vouch's Claude Code hooks are installed, sessions capture *themselves* — +this is the loop the demo shows. A `PostToolUse` hook harvests each tool call +into a gitignored scratch buffer; at session end a `SessionEnd` hook rolls the +buffer plus a `git diff` backstop into **one pending "session summary" page** — +mechanically, no LLM, never auto-approved. You review and `vouch approve` it +like any other write, and the next session starts with it injected via `vouch +recall`. Passive harvest, human-gated commit, no re-explaining: + +![vouch auto-capture demo](docs/demo.gif) + +The full walkthrough with real output lives at [docs/example-session.md](docs/example-session.md); re-render the GIF from [docs/demo.tape](docs/demo.tape) with `vhs docs/demo.tape`. + +Prefer reading to running? The [examples/](examples/) directory ships sample KBs as committed files, each with rendered screenshots of `vouch status`, `search`, `show`, `audit`, and `diff` against the fixture — see what the CLI returns before installing anything. ## Gittensor (SN74) -vouch's first domain template targets **Gittensor** — Bittensor subnet 74, which rewards open-source contribution by rule. Its scoring model evolves across PRs, Discord, and validator changes, and the rationale usually lives in people's heads. vouch is the durable, cited memory for it: +**Gittensor** — Bittensor subnet 74 — pays miners in TAO for landing *merged* pull requests into whitelisted open-source repos. Contributions are scored by code quality, each repo's allocation, and programming-language factors, and the whitelist itself shuffles as the subnet matures. So mining well means pointing a coding agent at a rotating set of target repos and landing mergeable PRs in the ones that pay. + +The tax on that is re-investigation. Every target repo means (re)learning its architecture, its CI, its `CONTRIBUTING.md`, the maintainer's merge bar, and which past attempts got rejected and why. Across a dozen repos and a dozen sessions, your agent re-derives all of it from cold each time — time not spent landing PRs. vouch turns each session's findings into reviewed memory the next session recalls, so the investigation happens once: ```bash -cd your-gittensor-repo -vouch init --template gittensor # seeds 1 source, 1 entity, 7 cited claims about SN74 scoring -vouch status # durable: 7 claims · 1 source · 1 entity -vouch search "emission split" -git add .vouch && git commit -m "chore: add vouch decision-memory KB" +cd acme-httpkit # a whitelisted target repo — Go, healthy allocation +vouch init --template gittensor # seeds a cited baseline of how SN74 scores today +vouch install-mcp claude-code # wires the capture + recall hooks + +# session 1 (mon): the agent maps the repo and attempts issue #212 (a pool leak). +# the claude-code hooks auto-capture the work. you approve the durable summary +# and file two cited claims: httpkit's merge bar, and why the first PR bounced. +vouch pending +vouch approve --reason "merge bar + rejected approach — worth keeping" + +# session 2 (wed): opens with `vouch recall`. the agent already knows the layout, +# that merges need `make test` green + a changelog entry, and that #212's first +# attempt was rejected for a missing regression test. it skips re-discovery and +# lands the fixed PR. ``` -The seeded pack covers merged-PR rewards, PAT verification, scoring factors, sybil-resistance, the repo allow-list, the issue-solving multiplier, and the emission split — each a cited, approved, supersede-able claim. When a rule changes, `vouch supersede` the old claim with the new one so the history of *what changed* stays queryable. - -vouch stores **no** live signals — it is not a validator or miner client and never reads on-chain scores. It is the institutional memory that sits beside the live layer (Gittensory). The seeded claims are starter-grade; `vouch supersede` them with the real spec or PR once you confirm the live rule. +When the whitelist shuffles and httpkit's allocation drops, `vouch supersede` the claim that said it was worth targeting — your agent re-prioritizes toward a higher-allocation repo, and the decision stays cited and reviewed instead of lost in a Discord thread. The `--template gittensor` seed is the cold-start: 7 cited, approved claims about how merged-PR scoring, the repo allow-list, and the emission split work today; everything after that is capture, and each session you approve makes the next one start further ahead. -Full adoption guide — install, seed, wire the MCP server, capture decisions as cited claims: **[docs/gittensor.md](docs/gittensor.md)**. +vouch reads **no** live signals — it never checks on-chain scores, verifies PATs, or submits weights (that's the `gitt` miner client). It's the reviewed record of what your agent already worked out, so it never works it out twice. Full miner walkthrough: **[docs/gittensor.md](docs/gittensor.md)**. ## Object model @@ -174,6 +225,7 @@ The files are the source of truth; `state.db` is a derivable cache (`vouch index ``` vouch init # set up .vouch/ at PATH +vouch install-mcp HOST [--tier T1-T4] # wire MCP + capture/recall hooks into a host vouch discover [--path P] # find the nearest .vouch/ root vouch capabilities # emit the JSON capabilities descriptor vouch status [--json] # KB counts + pending proposals @@ -208,6 +260,9 @@ vouch session start [--task ...] [--note ...] vouch session end SESSION_ID vouch crystallize SESSION_ID [--no-page] +vouch recall # digest of approved knowledge for session-start injection +vouch capture observe|finalize|finalize-all|banner # hook-driven session capture (claude code) + vouch search QUERY [--limit N] vouch context TASK [--max-chars N] [--min-items N] [--require-citations] vouch index @@ -257,6 +312,14 @@ In your project's `.mcp.json`: `VOUCH_AGENT` is recorded as `proposed_by` and as the actor on every audit event, so multi-agent setups can attribute writes correctly. +The `.mcp.json` above wires the MCP server only. To also turn on **automatic session capture** and start-of-session **recall**, install the Claude Code hooks: + +```bash +vouch install-mcp claude-code +``` + +Its `.claude/settings.json` (tier T4) registers a `PostToolUse` hook (`vouch capture observe`), a `SessionEnd` hook (`vouch capture finalize`), and a `SessionStart` hook that runs `vouch recall` and nudges any pending captured summaries. Capture only ever files *pending* proposals — the review gate holds. The full loop is walked in [docs/example-session.md](docs/example-session.md). + ## Running vouch as an OpenClaw plugin Vouch ships an [OpenClaw](https://github.com/dripsmvcp/openclaw) plugin manifest at the @@ -340,6 +403,8 @@ vouch import-apply kb.tar.gz --on-conflict skip # apply (default skip; never de |---|---|---|---| | Knowledge lives in | a service | filesystem | your **repo** | | Review of writes | none | none | **explicit `approve`** | +| Session auto-capture | via LLM extraction | no | **yes — gated** | +| Summaries need an LLM | yes | — | **no (mechanical)** | | Evidence required | no | optional | **enforced** | | Per-agent attribution | partial | none | **yes** (audit log) | | Graph (entities + relations) | no | no | **yes** | @@ -352,10 +417,11 @@ vouch import-apply kb.tar.gz --on-conflict skip # apply (default skip; never de | Area | Current support | |------|-----------------| | Knowledge base | `.vouch/` folder, YAML claims/entities/relations/evidence/sessions, markdown pages with frontmatter, JSONL audit log, content-addressed sources | -| CLI | `init`, `discover`, `capabilities`, `status`, `lint`, `doctor`, `fsck`, `pending`, `show`, `approve`, `reject`, `propose-{claim,page,entity,relation}`, `source add`, `source verify`, `supersede`, `contradict`, `archive`, `confirm`, `cite`, `session {start,end}`, `crystallize`, `search`, `context`, `index`, `audit`, `export`, `export-check`, `import-check`, `import-apply`, `serve` | +| CLI | `init`, `install-mcp`, `discover`, `capabilities`, `status`, `lint`, `doctor`, `fsck`, `pending`, `show`, `approve`, `reject`, `propose-{claim,page,entity,relation}`, `source add`, `source verify`, `supersede`, `contradict`, `archive`, `confirm`, `cite`, `session {start,end}`, `crystallize`, `capture {observe,finalize,finalize-all,banner}`, `recall`, `search`, `context`, `index`, `audit`, `export`, `export-check`, `import-check`, `import-apply`, `serve` | | Tool servers | MCP over stdio + JSONL over stdin/stdout, same `kb.*` surface across both transports, capabilities + knowledge-capability descriptor | | Schemas | 13 JSON Schemas (Draft 2020-12) generated from pydantic in [schemas/](schemas/), plus hand-maintained `bundle.manifest` and `jsonl-envelope` schemas | | Write safety | review-gated writes via [proposed/](spec/review-gate.md), `dry_run:true` previews, host trust required for `approve`/`reject`, atomic exclusive-create storage, path-traversal blocked on source intake and bundle import | +| Session capture | Claude Code hooks harvest each session (`PostToolUse` → `vouch capture observe`) into a gitignored scratch buffer; `SessionEnd` rolls it up mechanically (no LLM) into one review-gated session-summary page; `SessionStart` injects approved knowledge via `vouch recall` and nudges pending summaries. Never auto-approves | | Retrieval | `retrieval.backend` in `config.yaml` selects the path: `auto` (default — embedding → FTS5 → substring), `embedding`, `fts5`, or `substring`. Semantic backends (`all-mpnet-base-v2`, `MiniLM-L6`, fastembed-BGE) ship behind install extras; `auto` degrades to FTS5 when they aren't installed. Context packs with citations + quality gate | | Lifecycle | `supersede`, `contradict`, `archive`, `confirm`, `cite` — direct mutations, all audited | | Portability | tar.gz bundles with per-file sha256 `manifest.json`, `export-check`, `import-check`, `import-apply` with skip/overwrite/fail conflict modes | diff --git a/SPEC.md b/SPEC.md index df028a31..66b45b84 100644 --- a/SPEC.md +++ b/SPEC.md @@ -373,7 +373,7 @@ Returned by `kb.capabilities`: ```json { "name": "vouch", - "version": "0.0.1", + "version": "1.0.0", "spec": "vouch-0.1", "methods": ["kb.capabilities", "kb.status", ...], "retrieval": ["fts5", "substring"], diff --git a/adapters/claude-code/.claude/settings.json b/adapters/claude-code/.claude/settings.json index 11a1949a..5fd35a67 100644 --- a/adapters/claude-code/.claude/settings.json +++ b/adapters/claude-code/.claude/settings.json @@ -21,11 +21,46 @@ "hooks": { "SessionStart": [ { + "comment": "finalize old buffers from previous sessions; current session will be finalized here too on next session start (fallback: windowclose event not yet supported by claude-code extension)", "matcher": "*", "hooks": [ + { + "type": "command", + "command": "vouch capture finalize-all || true" + }, { "type": "command", "command": "vouch status --json || true" + }, + { + "type": "command", + "command": "vouch capture banner || true" + }, + { + "type": "command", + "command": "vouch recall || true" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture observe || true" + } + ] + } + ], + "SessionEnd": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize || true" } ] } diff --git a/adapters/claude-code/README.md b/adapters/claude-code/README.md index 6b28a4ec..8c3efe05 100644 --- a/adapters/claude-code/README.md +++ b/adapters/claude-code/README.md @@ -57,6 +57,49 @@ In a fresh session, ask Claude: It should enumerate `kb_search`, `kb_propose_claim`, etc. If not, run `claude --debug-mcp` to see why the server isn't loading. +## Session Capture & Auto-Proposal + +When you work in a Claude Code session, vouch automatically captures your +tool use (file reads, edits, commands, etc.). When you close the session +window, vouch proposes the captured knowledge to the KB for review. + +### How it works + +1. **Capture**: Each tool call (Read, Edit, Bash, etc.) is logged to + `.vouch/captures/.jsonl` (gitignored). + +2. **Cleanup on session start**: When you start a new session, any + unfinalized buffers from previous sessions (>1 hour old) are + automatically finalized and proposed. + +3. **Finalize on window close**: When the VS Code window closes, the + current session is finalized and proposed. + +### Configuration + +Disable capture in `.vouch/config.yaml`: + +```yaml +capture: + enabled: false +``` + +Adjust the stale buffer age (default: 1 hour): + +```yaml +capture: + max_age_seconds: 7200 # finalize buffers >2 hours old +``` + +### Fallback behavior + +If the "window close" event is not yet supported by your version of the +Claude Code extension, the current session will be finalized on the *next* +session start instead. The behavior is the same; proposals just appear in +the next session rather than immediately. + +To upgrade or check your extension version, see [Claude Code releases](https://github.com/anthropics/claude-code-releases). + ## Notes - `VOUCH_AGENT=claude-code` shows up as the actor in `audit.log.jsonl` diff --git a/adapters/claude-code/install.yaml b/adapters/claude-code/install.yaml index fb063cee..a1775ef0 100644 --- a/adapters/claude-code/install.yaml +++ b/adapters/claude-code/install.yaml @@ -4,7 +4,9 @@ # T2 = CLAUDE.md fenced snippet (idempotent append, see install_adapter._install_fenced). # T3 = four custom slash commands (`/vouch-recall`, `/vouch-status`, # `/vouch-resolve-issue`, `/vouch-propose-from-pr`). -# T4 = `.claude/settings.json` with a SessionStart hook + read-only kb_* auto-allow. +# T4 = `.claude/settings.json`: SessionStart (kb status + capture review banner + +# recall digest of approved knowledge), PostToolUse (capture observe), +# SessionEnd (capture finalize), plus read-only kb_* auto-allow. host: claude-code pretty: Claude Code fence: @@ -21,4 +23,4 @@ tiers: - { src: .claude/commands/vouch-resolve-issue.md, dst: .claude/commands/vouch-resolve-issue.md } - { src: .claude/commands/vouch-propose-from-pr.md, dst: .claude/commands/vouch-propose-from-pr.md } T4: - - { src: .claude/settings.json, dst: .claude/settings.json } + - { src: .claude/settings.json, dst: .claude/settings.json, json_merge: true } diff --git a/benchmarks/README.md b/benchmarks/README.md index 4b2f293a..11da8fd5 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -14,19 +14,48 @@ against tools that are. But there are a few numbers that *do* matter: ## Status -Not implemented yet. See [ROADMAP.md](../ROADMAP.md) (0.3) for the -planned timeline. This README is a placeholder so we don't lose the -shape of what we want to measure. +Implemented. All four `bench_*.py` files run under `pytest-benchmark`. +See [ROADMAP.md](../ROADMAP.md) (0.3) for the surrounding milestone. The +100k fixture in `conftest.py` exists but no bench file exercises it yet. -## Planned layout +## Baseline + +First recorded run. This is a single developer-machine snapshot, **not** +a published environment per the methodology below — treat it as +order-of-magnitude, not gospel. Medians, warm: + +| Benchmark | 1k claims | 10k claims | +|---|---|---| +| `search_fts5` (FTS5 query) | 0.46 ms | 1.65 ms | +| `search_substring` (fallback scan) | 241 ms | 2.42 s | +| `propose_claim` (hot-loop write) | 2.40 ms | — | +| `index_rebuild` | 311 ms | 7.25 s | +| `bundle_export` | 113 ms | — | +| `bundle_export_check` | 48.6 ms | — | +| `bundle_import` | 1.05 s | 12.4 s | + +What the numbers say: + +- **FTS5 search stays fast and scales sub-linearly** (~3.6x for 10x the + claims). The substring fallback reads every claim file, so it's ~500x + slower — that's the whole reason it's only a fallback. +- **`propose_claim` medians 2.4 ms**, comfortably under the ~50ms hot-loop + budget noted above. +- **A 10k-claim bundle imports in 12.4 s** — seconds, not minutes, which + is the bar this benchmark was written to guard. + +Environment: 13th Gen Intel Core i9-13900K (16 threads), ~22 GB RAM, +Python 3.14, vouch 1.0.0. Full per-run detail (min/max/stddev, machine +info) lands in `bench.json`. + +## Layout ``` benchmarks/ ├── README.md (you are here) -├── conftest.py pytest-benchmark configuration +├── conftest.py pytest-benchmark configuration + seeded KB fixtures ├── fixtures/ -│ ├── gen_kb.py synth a KB of N claims with realistic distributions -│ └── seed/ pre-built fixture KBs (small, medium, large) +│ └── gen_kb.py synth a KB of N claims with realistic distributions ├── bench_search.py kb.search latency at varying KB sizes ├── bench_propose.py kb.propose_* write latency ├── bench_bundle.py export + import + verify round-trips @@ -34,14 +63,21 @@ benchmarks/ ``` Benchmarks live outside `tests/` so a regular `pytest` run doesn't -pull them in. The intended invocation is: +pull them in. `pytest-benchmark` isn't in the `[dev]` extras, and the +`bench_*.py` filenames don't match pytest's default `python_files` +glob — so the invocation needs both an install and a collection +override: ```bash -make bench # not yet wired in the Makefile -# or: -pytest benchmarks/ --benchmark-only --benchmark-json=bench.json +pip install pytest-benchmark +pytest benchmarks/ --benchmark-only \ + -o python_files='bench_*.py test_*.py' \ + --benchmark-json=bench.json ``` +`make bench` is not wired in the Makefile yet; when it is, it should +fold in the `python_files` override so this isn't a footgun. + ## Methodology principles - **Real disks.** No tmpfs benchmarks. The file-based design makes diff --git a/docs/banner.svg b/docs/banner.svg index 8dbd52f9..1ab425c9 100644 --- a/docs/banner.svg +++ b/docs/banner.svg @@ -1,8 +1,8 @@ - + - + - + @@ -42,11 +42,50 @@ auditable + + + + + self-capturing + + + + + + + Sessions capture themselves + + + no LLM · gated + + + + + PostToolUse + observe → buffer + + + + + SessionEnd + roll up + git diff + + + + + one pending summary + enters the same gate + + + + + + - + 1 @@ -80,33 +119,31 @@ - + - + 2 Review - the gate + the gate — manual + captured PENDING - prop-7f3c · claim · cites src-9b2e + prop-7f3c · claim · src-9b2e - - approve + - - reject + @@ -117,10 +154,10 @@ - + - + 3 @@ -134,7 +171,7 @@ pages/<id>.md - markdown + frontmatter + session summaries + write-ups @@ -150,10 +187,10 @@ - + - + 4 @@ -161,8 +198,8 @@ - $ vouch serve - MCP · JSONL + $ vouch recall + injects approved knowledge @@ -183,8 +220,8 @@ - - + + .vouch/ — your KB lives in git @@ -198,17 +235,19 @@ stable confidence: 0.9 - audit: - proposed · approved by @alice · 2026-05-12 - proposed_by: - claude-code (session sess-2f81) + page: + session-summary (sess-8f2e) — captured, approved + audit: + proposed · approved by @alice · 2026-07-01 + proposed_by: + claude-code · vouch-capture (auto-capture) - - + + - propose → review → commit → retrieve · agents write proposals · humans approve · git is the source of truth + propose — or auto-capture a Claude Code session — then review → commit → retrieve · agents and sessions write proposals · humans approve · git is the source of truth diff --git a/docs/demo.gif b/docs/demo.gif index 617ea1cf..4719418a 100644 Binary files a/docs/demo.gif and b/docs/demo.gif differ diff --git a/docs/demo.tape b/docs/demo.tape index 76cda59b..43390f3f 100644 --- a/docs/demo.tape +++ b/docs/demo.tape @@ -1,4 +1,4 @@ -# vouch end-to-end demo — render with: +# vouch session auto-capture demo — render with: # vhs docs/demo.tape # Produces docs/demo.gif. @@ -8,79 +8,108 @@ Require vouch Set Shell "bash" Set FontSize 14 -Set Width 1100 -Set Height 700 +Set Width 1240 +Set Height 760 Set Padding 18 Set Theme "Catppuccin Mocha" -Set TypingSpeed 35ms +Set TypingSpeed 30ms Set PlaybackSpeed 1.0 -# --- prepare a sandbox ------------------------------------------------- +# --- prepare a sandbox project with git history (hidden) ----------------- Hide -Type "cd $(mktemp -d) && git init -q && echo 'Authentication uses stateless JWTs signed with RS256.' > auth.md && git add -A && git -c user.email=demo@x.com -c user.name=demo commit -q -m init && clear" +Type `D=$(mktemp -d)/acme-api && mkdir -p "$D/src" "$D/tests" && cd "$D"` +Enter +Type `git init -q` +Enter +Type `printf 'def verify(token):\n return decode(token)\n' > src/auth.py` +Enter +Type `printf 'def test_verify():\n assert verify("x")\n' > tests/test_auth.py` +Enter +Type `git add -A && git -c user.email=demo@x.com -c user.name=demo commit -q -m "chore: seed"` +Enter +# the "fix" the session makes — left uncommitted so finalize's git-diff backstop catches it +Type `printf 'def verify(token):\n # reject unsigned tokens\n return decode(token, verify_signature=True)\n' > src/auth.py` +Enter +Type "clear" Enter -Sleep 500ms Show -# --- init ------------------------------------------------------------- +# --- init ---------------------------------------------------------------- Type "vouch init" Sleep 300ms Enter -Sleep 1200ms +Sleep 1500ms -# --- register a source ------------------------------------------------ -Type "SID=$(vouch source add auth.md --title 'auth notes')" -Sleep 300ms +# --- a claude code session runs; the PostToolUse hook feeds vouch -------- +Type "# a claude code session runs. after each tool, the PostToolUse hook fires:" Enter -Sleep 600ms -Type "echo $SID" +Sleep 900ms +Type `echo '{"session_id":"sess-8f2e4c7a","tool_name":"Edit","tool_input":{"file_path":"src/auth.py"}}' | vouch capture observe` Enter -Sleep 1000ms +Sleep 1300ms -# --- propose a claim as an agent -------------------------------------- -Type "PID=$(VOUCH_AGENT=claude-code vouch propose-claim \" +# --- three more tool calls captured off-camera (Read / pytest / Grep) ---- +Hide +Type `echo '{"session_id":"sess-8f2e4c7a","tool_name":"Read","tool_input":{"file_path":"src/auth.py"}}' | vouch capture observe` Enter -Type " --text 'Authentication uses stateless JWTs signed with RS256' \" +Sleep 500ms +Type `echo '{"session_id":"sess-8f2e4c7a","tool_name":"Bash","tool_input":{"command":"pytest -q"}}' | vouch capture observe` Enter -Type " --source $SID --type fact --confidence 0.9)" +Sleep 500ms +Type `echo '{"session_id":"sess-8f2e4c7a","tool_name":"Grep","tool_input":{"pattern":"verify_signature"}}' | vouch capture observe` Enter -Sleep 800ms -Type "echo $PID" +Sleep 500ms +Type "clear" Enter -Sleep 1000ms +Show -# --- review ----------------------------------------------------------- -Type "vouch pending" +# --- the gitignored scratch buffer, quietly accumulating ----------------- +Type "# vouch logged each to a gitignored scratch buffer — not the KB yet:" Enter -Sleep 1500ms - -Type "vouch show $PID | head -20" +Sleep 800ms +Type "cat .vouch/captures/sess-8f2e4c7a.jsonl" Enter -Sleep 2500ms +Sleep 2600ms -# --- approve ---------------------------------------------------------- -Type "vouch approve $PID --reason 'matches the code'" +# --- session ends → SessionEnd hook rolls it into ONE pending summary ---- +Type "# session ends → the SessionEnd hook rolls the buffer + git diff into one proposal:" Enter -Sleep 1500ms - -Type "vouch status" +Sleep 900ms +Type `echo '{"session_id":"sess-8f2e4c7a"}' | vouch capture finalize` Enter -Sleep 2000ms +Sleep 2400ms -# --- retrieve --------------------------------------------------------- -Type 'vouch search "JWT"' +# --- next session start: the nudge, then the review queue ---------------- +Type "vouch capture banner" +Enter +Sleep 1700ms +Type "# nothing is auto-approved — you review it like any other write:" +Enter +Sleep 800ms +Type "vouch pending" Enter Sleep 2000ms -Type "vouch audit --tail 4" +# --- approve → durable page ---------------------------------------------- +Type `PID=$(vouch pending | grep -oE '[0-9]{8}-[0-9]{6}-[0-9a-f]{8}' | head -1)` +Enter +Sleep 400ms +Type "vouch approve $PID --reason 'accurate summary'" Enter -Sleep 3000ms +Sleep 1800ms -# --- the durable artifact on disk ------------------------------------- -Type "ls .vouch/claims/" +# --- the summary vouch kept, now durable markdown on disk ---------------- +Type "# the summary it kept — plain markdown, committed alongside your code:" Enter -Sleep 1500ms +Sleep 900ms +Type `sed -n '/^# session summary/,$p' .vouch/pages/session-summary-*.md` +Enter +Sleep 3600ms -Type "cat .vouch/claims/*.yaml" +# --- the next session starts with it — no amnesia ------------------------ +Type "# the next session starts with it injected — no re-explaining:" +Enter +Sleep 900ms +Type "vouch recall" Enter Sleep 4000ms diff --git a/docs/example-session.md b/docs/example-session.md index 230a37f3..1a17de6c 100644 --- a/docs/example-session.md +++ b/docs/example-session.md @@ -1,178 +1,206 @@ -# Example session +# Example session — automatic session capture -![vouch end-to-end demo](demo.gif) +![vouch auto-capture demo](demo.gif) -A full propose → review → commit → retrieve loop, captured from a real -run on 2026-05-21. Reproduce by following along in any git repo, or -re-render the GIF with `vhs docs/demo.tape` (see [demo.tape](demo.tape)). +A full **capture → review → commit → recall** loop, driven by Claude Code +hooks. A session works; vouch quietly harvests what it did into a gitignored +scratch buffer; at session end that buffer rolls up into a single **pending +summary proposal**; you approve it like any other write; the next session +starts with it injected. Nothing is auto-approved — the review gate stays +intact. Re-render the GIF with `vhs docs/demo.tape` (see [demo.tape](demo.tape)). + +The capture path is fully mechanical — no LLM, no network, no agent +discipline required. It is wired by the Claude Code adapter's hooks +(`adapters/claude-code/.claude/settings.json`): `PostToolUse → vouch capture +observe`, `SessionEnd → vouch capture finalize`, and a `SessionStart` banner. ## Setup ```bash -$ mkdir demo && cd demo && git init -q -$ echo "Authentication uses stateless JWTs signed with RS256." > auth.md -$ git add -A && git commit -q -m "init" +$ mkdir acme-api && cd acme-api && git init -q +$ printf 'def verify(token):\n return decode(token)\n' > src/auth.py +$ git add -A && git commit -q -m "chore: seed" $ vouch init -Initialised KB at /tmp/demo/.vouch -Next: `vouch serve` to expose the MCP server to your agent. +Initialised KB at /tmp/acme-api/.vouch +Seeded starter claim: vouch-starter-reviewed-knowledge +Next steps: + vouch status + vouch search agent + vouch serve ``` -`vouch init` creates the `.vouch/` directory with empty subfolders for -claims, pages, entities, relations, sources, sessions, proposed, -decided, plus `audit.log.jsonl` and `state.db`. - -## 1. Register a source +## 1. A session works — the PostToolUse hook harvests it -The agent (or you) registers the file as evidence material. Sources -are content-addressed — the id is the sha256 of the file content, so -the same file registered twice de-duplicates. +After every tool call, Claude Code's `PostToolUse` hook pipes the tool +payload to `vouch capture observe`. You never type these — the hook does. +Each call appends one compact observation (`{ts, tool, summary, files?, +cmd?}`) to a **gitignored scratch buffer**, deduped within a short window: ```bash -$ vouch source add auth.md --title "auth notes" -816fec5eb02e8965df3197cdd622c394c8845364c584fe0fe0023dd0459e8982 +# what the hook runs after a Read, an Edit, a test run, and a grep: +$ echo '{"session_id":"sess-8f2e4c7a","tool_name":"Read","tool_input":{"file_path":"src/auth.py"}}' | vouch capture observe +$ echo '{"session_id":"sess-8f2e4c7a","tool_name":"Edit","tool_input":{"file_path":"src/auth.py"}}' | vouch capture observe +$ echo '{"session_id":"sess-8f2e4c7a","tool_name":"Bash","tool_input":{"command":"pytest -q"}}' | vouch capture observe +$ echo '{"session_id":"sess-8f2e4c7a","tool_name":"Grep","tool_input":{"pattern":"verify_signature"}}' | vouch capture observe ``` -## 2. Propose a claim - -Agents call `kb.propose_claim` (over MCP/JSONL); from the CLI it looks -like this. The `VOUCH_AGENT` env var records *which* agent proposed, -so multi-agent setups stay attributable. +The buffer lives at `.vouch/captures/.jsonl` — scratch, held +**outside** the KB, gitignored so it never pollutes history: ```bash -$ VOUCH_AGENT=claude-code vouch propose-claim \ - --text "Authentication uses stateless JWTs signed with RS256" \ - --source 816fec5e... \ - --type fact \ - --confidence 0.9 -20260521-065702-44a92aa8 +$ cat .vouch/captures/sess-8f2e4c7a.jsonl +{"files": ["src/auth.py"], "summary": "Read auth.py", "tool": "Read", "ts": 1782928132.1} +{"files": ["src/auth.py"], "summary": "Edited auth.py", "tool": "Edit", "ts": 1782928126.8} +{"cmd": "pytest -q", "summary": "Ran: pytest -q", "tool": "Bash", "ts": 1782928136.5} +{"summary": "Grep verify_signature", "tool": "Grep", "ts": 1782928141.1} ``` -The proposal lands in `.vouch/proposed/.yaml` (gitignored — it -shouldn't pollute history until you approve it). +Observations are one-line summaries and file names — **not** full tool +output — so credentials and large blobs never get buffered. + +## 2. Session ends — finalize rolls it into ONE pending proposal -## 3. Review the queue +At session end, the `SessionEnd` hook runs `vouch capture finalize`. It reads +the buffer, adds a `git diff` backstop (to catch edits `PostToolUse` missed), +and — if there are at least `capture.min_observations` (default 3) — files a +single **pending page proposal** through the normal review gate. It never +calls `approve()`. ```bash -$ vouch pending -• 20260521-065702-44a92aa8 [claim] by claude-code - Authentication uses stateless JWTs signed with RS256 - -$ vouch show 20260521-065702-44a92aa8 -id: 20260521-065702-44a92aa8 -kind: claim -proposed_by: claude-code -proposed_at: '2026-05-21T06:57:02.910715Z' -payload: - id: authentication-uses-stateless-jwts-signed-with-rs256 - text: Authentication uses stateless JWTs signed with RS256 - type: fact - confidence: 0.9 - evidence: - - 816fec5eb02e8965df3197cdd622c394c8845364c584fe0fe0023dd0459e8982 -status: pending +$ echo '{"session_id":"sess-8f2e4c7a"}' | vouch capture finalize +{ + "_meta": { + "vouch_trust": { "auth_subject": null, "caller_kind": "cli", "remote": false } + }, + "captured": 5, + "summary_proposal_id": "20260701-174914-a97e6a4d" +} ``` -## 4. Approve +`captured: 5` = four harvested observations + one file from the git-diff +backstop. The buffer file is deleted; its contents now live only inside the +pending proposal, awaiting your review. + +## 3. Next session start — the nudge + +The `SessionStart` hook runs `vouch capture banner`, so the next time you open +a session in this workspace you see how many captured summaries are queued: ```bash -$ vouch approve 20260521-065702-44a92aa8 --reason "matches the code" -Approved → claim/authentication-uses-stateless-jwts-signed-with-rs256 +$ vouch capture banner +🔔 1 auto-captured session summary(ies) awaiting review — run `vouch review`. ``` -What just happened (see [the approve flow](../spec/review-gate.md) for the -formal version): +## 4. Review the queue -1. A durable artifact is written to `.vouch/claims/.yaml` with - `approved_by` stamped on it. -2. The FTS5 index in `state.db` is updated so the claim is searchable - immediately. -3. The proposal file moves from `proposed/` → `decided/` with - `status=approved`, `decided_by`, `decision_reason`. -4. An `audit.log.jsonl` line records the decision. +Captured summaries are ordinary pending proposals — `vouch pending`, `vouch +review`, and the review-ui all show them, attributed to the `vouch-capture` +actor: ```bash -$ vouch status -KB at /tmp/demo/.vouch - durable: 1 claims • 0 pages • 1 sources • 0 entities • 0 relations - pending: 0 proposals - audit: 4 events • index: present +$ vouch pending +• 20260701-174914-a97e6a4d [page] by vouch-capture + session summary: acme-api (sess-8f2e4c7a) ``` -The on-disk claim: +## 5. Approve → durable page ```bash -$ cat .vouch/claims/authentication-uses-stateless-jwts-signed-with-rs256.yaml -id: authentication-uses-stateless-jwts-signed-with-rs256 -text: Authentication uses stateless JWTs signed with RS256 -type: fact -status: working -confidence: 0.9 -evidence: -- 816fec5eb02e8965df3197cdd622c394c8845364c584fe0fe0023dd0459e8982 -scope: project -created_at: '2026-05-21T06:57:13.947450Z' -updated_at: '2026-05-21T06:57:13.947477Z' -approved_by: claude-code +$ vouch approve 20260701-174914-a97e6a4d --reason "accurate summary" +Approved → page/session-summary-acme-api-sess-8f2e4c7a ``` -## 5. Retrieve +The summary vouch kept is now a plain-markdown page on disk, committed +alongside your code (shown here inside a fence so its own headings render +literally): -`vouch search` is for ranked snippets; `vouch context` builds a -prompt-ready bundle with a quality gate. +````text +# session summary: acme-api (sess-8f2e4c7a) -```bash -$ vouch search "JWT" -[claim] authentication-uses-stateless-jwts-signed-with-rs256 score=1.000 (substring) - Authentication uses stateless JWTs signed with RS256 +- generated: 2026-07-01T17:49:14.372133+00:00 +- session: `sess-8f2e4c7a` +- observations: 4 -$ vouch context "JWT" -{ - "query": "JWT", - "items": [ - { - "id": "authentication-uses-stateless-jwts-signed-with-rs256", - "type": "claim", - "summary": "Authentication uses stateless JWTs signed with RS256", - "score": 1.0, - "backend": "substring", - "citations": ["816fec5eb02e8965df3197cdd622c394c8845364c584fe0fe0023dd0459e8982"], - "freshness": "unknown" - } - ], - "quality": { "ok": true, "items": 1, "warnings": 0, "uncited_items": [] } -} -``` +## files modified this session -> **Note.** The default search backend is literal (FTS5 + substring), -> so a query like `"how does auth work"` won't match `"Authentication -> uses..."`. Install the embeddings extra (`pip install -e -> '.[embeddings-mpnet]'`) for semantic matching. +- src/auth.py -## 6. Audit trail +## git changes -```bash -$ vouch audit --tail 5 -2026-05-21T06:56:54Z kb.init by claude-code objects=[] -2026-05-21T06:56:58Z source.add by claude-code objects=['816fec5e...'] -2026-05-21T06:57:02Z proposal.claim.create by claude-code objects=['20260521-065702-44a92aa8'] -2026-05-21T06:57:14Z proposal.claim.approve by claude-code objects=['20260521-065702-44a92aa8', 'authentication-uses-stateless-jwts-signed-with-rs256'] ``` +src/auth.py | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) +``` + +## activity + +- Bash: 1 +- Edit: 1 +- Grep: 1 +- Read: 1 + +## notable commands + +- `pytest -q` + +## observations + +- Read auth.py +- Edited auth.py +- Ran: pytest -q +- Grep verify_signature +```` -Every mutation is on the record, with actor and object ids. +## 6. The next session starts with it — no amnesia + +The `SessionStart` hook also runs `vouch recall`, which emits a digest of all +approved knowledge for injection into the new session's context. The summary +you just approved is now in it — the next session opens already knowing what +the last one did: + +```text +$ vouch recall + +# approved KB knowledge for this repo — 1 claim(s), 2 page(s). reviewed, cited, durable. use kb_read_page / kb_search for detail; kb_propose_* (human-approved) to add more. + +## claims +- [vouch-starter-reviewed-knowledge] Vouch stores reviewed, cited knowledge in the repository so future agent sessions can retrieve agreed project context. + +## pages +- [edit-in-obsidian] Edit in Obsidian +- [session-summary-acme-api-sess-8f2e4c7a] session summary: acme-api (sess-8f2e4c7a) + +``` ## 7. Commit ```bash -$ git add .vouch/ && git commit -m "kb: approve auth-uses-jwt" +$ git add .vouch/ && git commit -m "kb: approve session summary" ``` -What lands in git: the durable artifact, the decision record, the -audit line. What doesn't: the `proposed/` draft (gitignored, since -unreviewed agent output shouldn't pollute history) and `state.db` (a -derivable cache — `vouch index` rebuilds it). +What lands in git: the durable page, its decision record, the audit line. +What doesn't: the `.vouch/captures/` scratch buffer (gitignored) and +`state.db` (a derivable cache — `vouch index` rebuilds it). + +## Notes + +- **The review gate holds.** Capture harvests and rolls up automatically, but + the only durable artifact is a `PENDING` proposal a human approves. There is + no auto-approve and no trusted-agent shortcut on this path. +- **No LLM anywhere.** The rollup is pure heuristics — type counts, file + names, a git-diff stat — so the hook stays fast and offline. +- **One summary per session**, not one per turn, so the queue doesn't flood. + Individual claims an agent files via MCP during a session still coexist as + their own pending items. +- **Config** lives under `capture.*` in `.vouch/config.yaml`: + `capture.enabled` (default `true`), `capture.min_observations` (default `3`). + Set `capture.enabled: false` to turn the whole path off. ## Next steps -- Wire vouch into Claude Code via `.mcp.json` (see [README](../README.md#wiring-into-claude-code)). -- Open a session for a longer task: `vouch session start --task "build the deploy pipeline"`, then `vouch crystallize ` at the end to promote the session's work into proposals. +- Wire vouch into Claude Code via `.mcp.json` and the adapter hooks (see + [README](../README.md#wiring-into-claude-code)). +- Prefer filing knowledge as you go? Agents can still call + `kb.propose_claim` / `kb.propose_page` directly over MCP — the manual write + path and the automatic capture path share the same review gate. - Export the KB as a portable bundle: `vouch export --out kb.tar.gz`. diff --git a/docs/getting-started.md b/docs/getting-started.md index a8f02128..f5aca276 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -101,7 +101,20 @@ git add .vouch && git commit -m "kb: approve auth-uses-jwt" ## 6. Wire an agent -Drop this into `.mcp.json` at the project root: +`vouch serve` is a stdio MCP server, so the agent's native registration is all +you need: + +```bash +claude mcp add vouch -- vouch serve # or: codex mcp add vouch -- vouch serve +``` + +Add `-e VOUCH_AGENT=claude-code` to attribute the agent's proposals to it +rather than your shell user. Confirm with `claude mcp list` (look for +`vouch … ✓ Connected`). + +Prefer a config file, or want the brain-first `CLAUDE.md`, slash commands, and +hooks too? Run `vouch install-mcp claude-code` — or drop this into `.mcp.json` +at the project root by hand: ```json { diff --git a/docs/gittensor.md b/docs/gittensor.md index 9db2abab..ce0e2c09 100644 --- a/docs/gittensor.md +++ b/docs/gittensor.md @@ -1,29 +1,39 @@ -# Adopting vouch for a Gittensor repo +# Mining Gittensor (SN74) with vouch -Gittensor (Bittensor subnet 74) rewards open-source contributions, and its -scoring rules, repo allow-list, anti-sybil measures, and emission-split -decisions evolve and get debated across PRs, Discord, and validator changes. -That rationale usually lives in people's heads and scattered threads — so when -a weight changes or a repo is de-listed, there's no durable, cited answer to -*"why did we decide this, and what did it replace?"* +Gittensor (Bittensor subnet 74) pays miners in TAO for landing **merged** pull +requests into whitelisted open-source repos. Validators verify GitHub account +ownership via a fine-grained PAT, then score merged contributions by code +quality, each repo's allocation (its emission share), and programming-language +factors. The whitelist — the "master repositories" and their shares — is +**dynamic**: it sees audits, additions, de-listings, and share changes as the +subnet matures. -vouch is a good fit for that gap: a small, **review-gated, cited** knowledge -base committed to the repo as the durable memory layer for maintainer and -validator decisions. +So mining well is a research loop: point a coding agent at a rotating set of +target repos and keep landing mergeable PRs in the ones that pay the most. -## vouch vs. Gittensory — different layers +## The problem vouch solves: re-investigation -These are complementary, not competing: +Every target repo is a cold start. To land a PR your agent has to work out: -| Layer | Owner | Holds | -|---|---|---| -| Chain / scoring | Gittensor (SN74) | the actual weights and emissions | -| **Live signals** | **Gittensory** | scores, queues, collision/reviewability | -| **Durable decisions** | **vouch** | *why* a rule exists, what it superseded — cited & reviewed | +- the repo's architecture and where the change belongs, +- how it builds and tests, and what CI must be green, +- the maintainer's merge bar (tests? a changelog entry? a signed CLA? small + focused diffs?), +- which issues are actually worth solving, and which past attempts already got + rejected — and why. + +Do that across a dozen repos and a dozen sessions and your agent re-derives all +of it from scratch every time. That re-investigation is time you are **not** +spending landing PRs — and merged PRs are the only thing that earns. -vouch deliberately stores **no** live signals. It is not a validator or miner -client; it doesn't read on-chain scores, verify PATs, or submit weights. It is -the institutional memory that sits alongside the live layer. +vouch is a review-gated knowledge base that lives in the repo. With its Claude +Code hooks installed, each session's findings are captured, you approve the +ones worth keeping, and the next session recalls them — so the investigation +into a repo happens **once**, then compounds. + +> vouch is **not** a validator or miner client. It never reads on-chain scores, +> verifies PATs, or submits weights — that is the `gitt` client's job (below). +> vouch is the reviewed memory of what your agent already figured out. ## 1. Install @@ -32,126 +42,144 @@ pipx install vouch-kb # the installed command is `vouch` vouch --version ``` -## 2. Seed a KB with the gittensor pack +## 2. Seed the scoring baseline -From the root of the Gittensor repo: +From the root of a target repo, seed a cited, approved starter pack describing +how SN74 scoring works today — so your agent knows what earns before it writes +a line: ```bash vouch init --template gittensor ``` -This creates `.vouch/` and seeds a cited, approved starter pack about SN74 -scoring — **1 source, 1 entity, 7 claims** (merged-PR rewards, PAT -verification, scoring factors, sybil-resistance, repo allow-list policy, -issue-solving multiplier, and emission split): +This creates `.vouch/` and seeds **1 source, 1 entity, 7 claims** (merged-PR +rewards, PAT verification, the scoring factors, sybil-resistance, the repo +allow-list policy, the issue-solving multiplier, and the emission split): ```bash vouch status -# durable: 7 claims • 1 sources • 1 entities • … -vouch search "scoring" -# claim/gittensor-merged-pr-base-reward …primary OSS reward signal… -# claim/gittensor-sybil-resistance …GitHub verification + merged-PR… -vouch doctor -# index present, citations resolve, sources verify → clean +# durable: 7 claims • 1 source • 1 entity • … +vouch search "scoring factors" +# claim/gittensor-scoring-factors …code quality, repository allocation, language… ``` -Commit it so the whole team shares one memory: +Commit it so the baseline travels with the repo: ```bash -git add .vouch && git commit -m "chore: add vouch decision-memory KB" +git add .vouch && git commit -m "chore: add vouch KB with gittensor baseline" ``` -`.vouch/.gitignore` keeps `proposed/` (drafts) and `state.db` (the derived -index) out of history automatically. - -> **The seeded claims are starter-grade.** They summarize the scoring model as -> understood when the template was authored. Before you rely on a specific -> rule or number, `vouch show ` it and `vouch supersede` it with the -> real spec/PR citation (see §4) so the KB reflects the live rules. +> **The seeded claims are starter-grade** — they summarize the model as +> understood when the template was authored. `vouch show ` one and +> `vouch supersede` it with the real spec/PR once you confirm the live rule +> (see §5). -## 3. Wire the MCP server for agents +## 3. Wire capture + recall into Claude Code -Add `.mcp.json` at the repo root so any MCP host (Claude Code, Cursor, Codex) -can query the KB and get cited answers instead of guessing: +This is what makes the loop automatic. Install the hooks: -```json -{ - "mcpServers": { - "vouch": { "command": "vouch", "args": ["serve"] } - } -} +```bash +vouch install-mcp claude-code ``` -An agent working in the repo can now call `kb.search` / `kb.context` ("how does -scoring work today?") and `kb.propose_claim` to draft new knowledge (still -gated — see below). +That writes `.mcp.json` (so the agent can `kb.search` / `kb.context` the KB for +cited answers) **and** `.claude/settings.json`, which registers: + +- a `PostToolUse` hook (`vouch capture observe`) that harvests each tool call + into a gitignored scratch buffer, +- a `SessionEnd` hook (`vouch capture finalize`) that rolls the buffer plus a + `git diff` backstop into **one pending session-summary page** — mechanically, + no LLM, never auto-approved, +- a `SessionStart` hook that runs `vouch recall` (injecting approved knowledge) + and nudges any pending summaries. -## 4. Capture decisions as cited claims +## 4. Mine one repo — the loop -The whole value is that every scoring/policy decision is **proposed, reviewed, -cited, and supersede-able**. When a change lands: +A concrete run against a whitelisted Go repo, `acme-httpkit`: + +**Session 1 (Monday).** The agent maps the codebase, reads `CONTRIBUTING.md`, +runs the tests, and attempts issue #212 (a connection-pool leak). Its first PR +is rejected for lacking a regression test. Everything it did is auto-captured. +At session end you have a pending summary; approve it, and file the two facts +worth citing: ```bash -# 1) register the thing you're citing — the PR, a spec file, a thread export -vouch source add docs/validator-change-pr-200.md # → a source id +vouch pending +vouch approve --reason "accurate session summary" -# 2) propose a claim that cites it +# the durable, cited facts this run established: +vouch source add https://github.com/acme/httpkit/blob/main/CONTRIBUTING.md vouch propose-claim \ - --text "SN74 raised the maintainer issue-solving multiplier from 1.66 to 1.75." \ - --source --type fact --confidence 0.9 --tag gittensor --tag scoring -# → proposal id + --text "acme-httpkit merges require 'make test' green and a CHANGELOG entry; PRs without a regression test are rejected." \ + --source --type fact --tag gittensor --tag merge-bar +vouch propose-claim \ + --text "acme-httpkit carries a healthy SN74 allocation and is weighted toward Go — high value per merged PR." \ + --source --type observation --tag gittensor --tag targeting -# 3) a *different* maintainer approves (the proposer can't self-approve) -vouch pending -vouch approve -git add .vouch && git commit -m "kb: record maintainer-multiplier change (PR #200)" +vouch pending # a teammate (not you) approves — the gate holds ``` -If you try to approve your own proposal you'll get -`forbidden_self_approval` — that's the gate working. A maintainer with a -different identity must approve. +**Session 2 (Wednesday).** It opens with `vouch recall`, so the agent already +knows the layout, the merge bar (`make test` + a changelog entry), that #212's +first attempt was rejected for a missing regression test, and that httpkit is a +high-value Go target. It skips re-discovery, adds the regression test, and lands +the merged PR. + +That is the whole point: the investigation into httpkit happened **once**. + +## 5. Keep what's worth keeping — and supersede when it shifts -**When a rule changes, supersede — don't overwrite.** Propose and approve the -replacement claim (steps 1–3 above), then link the old one to it by id: +**The gate holds.** Captured summaries and proposed claims are `PENDING` until a +human approves them, and you cannot approve your own proposal +(`forbidden_self_approval`). That is a feature: it stops your agent from writing +its own history, so recalled memory is memory you vouched for. + +**When the whitelist shuffles, supersede — don't overwrite.** If httpkit's +allocation drops, propose and approve the replacement claim, then link the old +one to it: ```bash vouch supersede ``` -The old claim is kept (marked superseded) so the history of what changed stays -intact and queryable. - -Every write is in `.vouch/audit.log.jsonl` — `vouch audit` shows exactly who -proposed and who approved each change, so the history of *why* is queryable, -not lost. +The old claim is kept (marked superseded) so the history of *what changed* stays +queryable — and your agent re-prioritizes toward a higher-allocation repo next +session instead of over-investing in a de-valued one. -## 5. A CONTRIBUTING note for the repo +## Where the live layer stops and vouch begins -Drop a short note into the Gittensor repo's `CONTRIBUTING.md` so the habit -sticks: +The `gitt` client and the chain own everything live; vouch owns the durable +*why*. They don't overlap: -```markdown -### Recording scoring / policy decisions +| Concern | Owner | Example | +|---|---|---| +| Register + broadcast your PAT | `gitt` miner client | `gitt miner post --wallet --hotkey ` | +| Check your miner status / scores | `gitt` miner client | `gitt miner check --wallet --hotkey ` | +| On-chain scoring + emissions | Gittensor (SN74) | validators score merged PRs | +| **What your agent learned about a repo** | **vouch** | merge bar, rejected approaches, targeting notes — cited & reviewed | -When a change alters scoring, the repo allow-list, anti-sybil thresholds, or -emission split, record it in vouch as a cited claim: +A typical miner setup runs both, side by side: -1. `vouch source add` the PR or spec that drives it. -2. `vouch propose-claim --source --type fact|decision` (or - `vouch supersede` the claim it replaces). -3. A maintainer reviews with `vouch pending` / `vouch approve`. +```bash +# live: broadcast ownership so merged PRs get attributed and scored +git clone https://github.com/entrius/gittensor.git && cd gittensor && uv sync +export GITTENSOR_MINER_PAT=ghp_your_token_here +gitt miner post --wallet --hotkey +gitt miner check --wallet --hotkey -Cite the PR. Don't bury the rationale in a thread. +# memory: in each target repo, the vouch loop from §2–§4 ``` -## 6. Day-to-day +## Day-to-day ```bash -vouch context "how are merged PRs scored and what stops sybil mining" +vouch recall # what the next session should already know +vouch context "how do i land a mergeable PR in acme-httpkit" # → a ranked, cited pack ready to paste into an agent prompt -vouch search "emission" --semantic # if installed with the [embeddings] extra +vouch search "merge bar" --limit 5 vouch lint # broken citations / stale claims ``` -That's the loop: live signals come from Gittensory; the durable *why* lives in -vouch, one cited and reviewed claim at a time. +That's the loop: the `gitt` client and the chain handle the live signals; vouch +keeps the reviewed record of what your agent worked out, so it never works it +out twice — and every session you approve makes the next PR faster to land. diff --git a/docs/img/examples/render.py b/docs/img/examples/render.py index adb432be..5c3be316 100644 --- a/docs/img/examples/render.py +++ b/docs/img/examples/render.py @@ -40,13 +40,20 @@ def _vouch_bin() -> str: - """Resolve the `vouch` console script — the same entry point a user runs.""" - found = shutil.which("vouch") - if found: - return found + """Resolve the `vouch` console script that matches the running interpreter. + + Prefer the `vouch` installed alongside this Python (the venv carrying the + repo's editable build) over whatever is first on PATH. A stale global + `vouch` shadowing the venv would otherwise render against the wrong build — + silently overwriting the committed images with output from a different CLI. + Fall back to PATH only when no interpreter-local script exists. + """ candidate = Path(sys.executable).parent / "vouch" if candidate.exists(): return str(candidate) + found = shutil.which("vouch") + if found: + return found raise SystemExit("`vouch` console script not found; pip install -e '.[dev]' first") diff --git a/docs/superpowers/plans/2026-07-01-vouch-session-autocapture.md b/docs/superpowers/plans/2026-07-01-vouch-session-autocapture.md new file mode 100644 index 00000000..3afbfa71 --- /dev/null +++ b/docs/superpowers/plans/2026-07-01-vouch-session-autocapture.md @@ -0,0 +1,959 @@ +# Session Auto-Capture Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Once vouch is installed in a workspace, a Claude Code session that starts and ends is captured automatically and filed as one `PENDING` session-summary page proposal for human approval. + +**Architecture:** Claude Code hooks harvest tool-use into an ephemeral, gitignored scratch buffer (`.vouch/captures/.jsonl`) during the session; a `SessionEnd` hook rolls the buffer plus a `git diff` backstop into a single markdown summary and files it via the existing `proposals.propose_page` gate. Mechanical rollup, no LLM, no network. Capture never calls `approve()` — the review gate stays intact. + +**Tech Stack:** Python 3, click (CLI), pydantic models, pytest. New module `src/vouch/capture.py`; CLI-only wiring (no new `kb.*` MCP/JSONL method). + +## Global Constraints + +- The CI gate must stay green: `pytest tests/ -q --ignore=tests/embeddings`, `mypy src`, `ruff check src tests` (i.e. `make check`). Type-annotate all new code. +- Conventional commits, lowercase, ≤72-char summary. **No `Co-Authored-By` trailer.** +- Stage files by name (`git add `), never `git add -A`. +- `storage.py` stays pure I/O — all capture business logic lives in `capture.py`. +- Config is read defensively (yaml `safe_load` in try/except, `isinstance` per level, explicit coercion, hardcoded defaults) — never via pydantic at load time. Template: `volunteer_context.load_config`. +- This feature adds **no `kb.*` method** — no `@mcp.tool()`, no jsonl handler, no `capabilities.METHODS` entry. `test_capabilities` must remain untouched and green. +- No LLM and no network calls anywhere in the capture path. +- The `observe` path runs on every tool call; it must never crash the user's tool call (swallow all errors, always exit 0) and must stay minimal. +- Tests mirror module names: new tests go in `tests/test_capture.py`. +- Captured summaries are marked `proposed_by="vouch-capture"`, `page_type="session"`, `rationale="auto-captured session summary"` so reviewers can filter them. + +**Reference signatures (already in the codebase — consume, do not redefine):** +- `KBStore.init(root: Path) -> KBStore`; `store.kb_dir: Path`; `store.config_path: Path` (= `kb_dir/config.yaml`). +- `discover_root(start: Path | None = None) -> Path` and `KBNotFoundError` in `vouch.storage`. +- `propose_page(store, *, title: str, body: str, page_type="concept", claim_ids=None, entity_ids=None, source_ids=None, proposed_by: str, tags=None, metadata=None, rationale=None, slug_hint=None, session_id=None, dry_run=False) -> Proposal`. Returns a `Proposal` with `.id`, `.proposed_by`, `.session_id`, `.kind == ProposalKind.PAGE`, `.status == ProposalStatus.PENDING`, and `.payload` = `{"id","title","body","type","claims","entities","sources","tags","metadata"}` (note `page_type` is stored under `payload["type"]`). +- `store.list_proposals(status: ProposalStatus | None = None) -> list[Proposal]`. +- `ProposalKind.PAGE`, `ProposalStatus.PENDING` in `vouch.models`. +- The built-in `"session"` page kind requires no citations and no frontmatter (`BUILTIN_PAGE_KINDS` default spec), so `propose_page(page_type="session")` with no citations validates. + +--- + +### Task 1: capture config, buffer paths, starter-config + gitignore + +**Files:** +- Create: `src/vouch/capture.py` +- Modify: `src/vouch/storage.py` (`_starter_config` ~line 75; `.gitignore` writer in `KBStore.init` ~line 224) +- Test: `tests/test_capture.py` + +**Interfaces:** +- Produces: `CaptureConfig(enabled: bool, min_observations: int, dedup_window_seconds: float)`; `load_config(store: KBStore) -> CaptureConfig`; `captures_dir(store) -> Path`; `buffer_path(store, session_id: str) -> Path`. Module constants `CAPTURE_ACTOR = "vouch-capture"`, `CAPTURE_PAGE_TYPE = "session"`, `DEFAULT_ENABLED = True`, `DEFAULT_MIN_OBSERVATIONS = 3`, `DEFAULT_DEDUP_WINDOW_SECONDS = 60.0`. + +- [ ] **Step 1: Write the failing test** + +```python +# tests/test_capture.py +"""Auto-capture: config, buffer, observe, finalize.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from vouch import capture as cap +from vouch.storage import KBStore, _starter_config + + +@pytest.fixture +def store(tmp_path: Path) -> KBStore: + return KBStore.init(tmp_path) + + +def test_load_config_defaults(store: KBStore) -> None: + cfg = cap.load_config(store) + assert cfg.enabled is True + assert cfg.min_observations == 3 + assert cfg.dedup_window_seconds == 60.0 + + +def test_load_config_reads_override(store: KBStore) -> None: + store.config_path.write_text( + "capture:\n enabled: false\n min_observations: 5\n" + ) + cfg = cap.load_config(store) + assert cfg.enabled is False + assert cfg.min_observations == 5 + + +def test_buffer_path_under_captures_dir(store: KBStore) -> None: + p = cap.buffer_path(store, "sess-123") + assert p == store.kb_dir / "captures" / "sess-123.jsonl" + + +def test_starter_config_has_capture_namespace() -> None: + assert _starter_config()["capture"]["enabled"] is True + + +def test_init_gitignores_captures(tmp_path: Path) -> None: + kb = KBStore.init(tmp_path) + assert "captures/" in (kb.kb_dir / ".gitignore").read_text() +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q` +Expected: FAIL — `ModuleNotFoundError: No module named 'vouch.capture'` (and `_starter_config` has no `capture` key). + +- [ ] **Step 3: Create `src/vouch/capture.py` with config + paths** + +```python +"""Auto-capture Claude Code sessions into review-gated summaries. + +Passive harvest -> mechanical rollup -> one PENDING page proposal. No LLM. +`observe` appends compact observations to an ephemeral, gitignored scratch +buffer (`.vouch/captures/.jsonl`); `finalize` rolls the buffer plus a +git-diff backstop into a single session-summary page proposal that a human +approves like any other write. Never calls approve() — the review gate stays +intact. See docs/superpowers/specs/2026-07-01-vouch-session-autocapture-design.md +""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import yaml + +from .storage import KBStore + +DEFAULT_ENABLED = True +DEFAULT_MIN_OBSERVATIONS = 3 +DEFAULT_DEDUP_WINDOW_SECONDS = 60.0 +CAPTURE_ACTOR = "vouch-capture" +CAPTURE_PAGE_TYPE = "session" + + +@dataclass(frozen=True) +class CaptureConfig: + enabled: bool = DEFAULT_ENABLED + min_observations: int = DEFAULT_MIN_OBSERVATIONS + dedup_window_seconds: float = DEFAULT_DEDUP_WINDOW_SECONDS + + +def load_config(store: KBStore) -> CaptureConfig: + """Read ``capture:`` from config.yaml; fall back to defaults.""" + try: + loaded = yaml.safe_load(store.config_path.read_text()) + except (OSError, yaml.YAMLError): + return CaptureConfig() + if not isinstance(loaded, dict): + return CaptureConfig() + raw = loaded.get("capture") + if not isinstance(raw, dict): + return CaptureConfig() + return CaptureConfig( + enabled=bool(raw.get("enabled", DEFAULT_ENABLED)), + min_observations=int(raw.get("min_observations", DEFAULT_MIN_OBSERVATIONS)), + dedup_window_seconds=float( + raw.get("dedup_window_seconds", DEFAULT_DEDUP_WINDOW_SECONDS) + ), + ) + + +def captures_dir(store: KBStore) -> Path: + return store.kb_dir / "captures" + + +def buffer_path(store: KBStore, session_id: str) -> Path: + safe = session_id.replace("/", "_").replace("..", "_").strip() or "unknown" + return captures_dir(store) / f"{safe}.jsonl" +``` + +- [ ] **Step 4: Add the `capture` namespace to `_starter_config` in `src/vouch/storage.py`** + +Insert into the dict returned by `_starter_config()` (after the `"review"` block): + +```python + "capture": { + # auto-capture claude code sessions into pending summaries. + "enabled": True, + "min_observations": 3, + }, +``` + +- [ ] **Step 5: Gitignore the captures buffer in `KBStore.init`** + +In `src/vouch/storage.py`, change the `.gitignore` writer: + +```python + gi = kb.kb_dir / ".gitignore" + if not gi.exists(): + # state.db is derived; proposed/ and captures/ are scratch space. + gi.write_text("proposed/\ncaptures/\nstate.db\nstate.db-*\n") +``` + +- [ ] **Step 6: Run tests to verify they pass** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q` +Expected: PASS (5 passed). + +- [ ] **Step 7: Commit** + +```bash +git add src/vouch/capture.py src/vouch/storage.py tests/test_capture.py +git commit -m "feat(capture): add capture config, buffer paths, gitignore" +``` + +--- + +### Task 2: observe — append an observation with dedup + tool summarizer + +**Files:** +- Modify: `src/vouch/capture.py` +- Test: `tests/test_capture.py` + +**Interfaces:** +- Consumes: `CaptureConfig`, `buffer_path`, `load_config` (Task 1). +- Produces: + - `observe(store, session_id, *, tool: str, summary: str, files: list[str] | None = None, cmd: str | None = None, now: float | None = None, config: CaptureConfig | None = None) -> bool` — returns True if a line was written. + - `summarize_tool(tool_name: str | None, tool_input: dict | None, tool_response: object) -> dict | None` — returns `{"tool","summary","files"(opt),"cmd"(opt)}` for observed tools, else None. + - `_read_observations(path: Path) -> list[dict]` (internal, reused by Task 3). + +- [ ] **Step 1: Write the failing tests** + +```python +# add to tests/test_capture.py +def test_observe_appends_line(store: KBStore) -> None: + wrote = cap.observe(store, "s1", tool="Edit", summary="Edited a.py", now=100.0) + assert wrote is True + lines = cap.buffer_path(store, "s1").read_text().splitlines() + assert len(lines) == 1 + assert "Edited a.py" in lines[0] + + +def test_observe_dedups_within_window(store: KBStore) -> None: + assert cap.observe(store, "s1", tool="Read", summary="Read a.py", now=100.0) + # identical within 60s window -> skipped + assert cap.observe(store, "s1", tool="Read", summary="Read a.py", now=130.0) is False + # same key past the window -> written again + assert cap.observe(store, "s1", tool="Read", summary="Read a.py", now=200.0) + assert len(cap.buffer_path(store, "s1").read_text().splitlines()) == 2 + + +def test_observe_noop_when_disabled(store: KBStore) -> None: + store.config_path.write_text("capture:\n enabled: false\n") + assert cap.observe(store, "s1", tool="Edit", summary="x") is False + assert not cap.buffer_path(store, "s1").exists() + + +def test_summarize_tool_skips_unobserved() -> None: + assert cap.summarize_tool("mcp__vouch__kb_search", {}, "") is None + + +def test_summarize_tool_edit() -> None: + obs = cap.summarize_tool("Edit", {"file_path": "/repo/src/a.py"}, "ok") + assert obs is not None + assert obs["tool"] == "Edit" + assert obs["files"] == ["/repo/src/a.py"] + assert "a.py" in obs["summary"] + + +def test_summarize_tool_bash_flags_error() -> None: + obs = cap.summarize_tool("Bash", {"command": "pytest"}, "1 failed, error") + assert obs is not None + assert obs["cmd"] == "pytest" + assert "failed" in obs["summary"].lower() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q -k "observe or summarize"` +Expected: FAIL — `AttributeError: module 'vouch.capture' has no attribute 'observe'`. + +- [ ] **Step 3: Implement observe + summarizer in `src/vouch/capture.py`** + +Add imports at the top (`json`, `time`, `typing.Any`): + +```python +import json +import time +from typing import Any +``` + +Append: + +```python +_OBSERVED_TOOLS = frozenset({ + "Read", "Edit", "Write", "Update", "Bash", + "Grep", "Glob", "WebFetch", "WebSearch", "Task", "NotebookEdit", +}) + + +def _read_observations(path: Path) -> list[dict[str, Any]]: + if not path.exists(): + return [] + out: list[dict[str, Any]] = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + if isinstance(obj, dict): + out.append(obj) + return out + + +def _dedup_key(tool: str, summary: str) -> str: + return f"{tool}\x00{summary}" + + +def observe( + store: KBStore, + session_id: str, + *, + tool: str, + summary: str, + files: list[str] | None = None, + cmd: str | None = None, + now: float | None = None, + config: CaptureConfig | None = None, +) -> bool: + """Append one observation to the session buffer. Returns True if written.""" + cfg = config or load_config(store) + if not cfg.enabled: + return False + ts = time.time() if now is None else now + path = buffer_path(store, session_id) + key = _dedup_key(tool, summary) + for obs in reversed(_read_observations(path)): + if ts - float(obs.get("ts", 0.0)) > cfg.dedup_window_seconds: + break + if _dedup_key(str(obs.get("tool", "")), str(obs.get("summary", ""))) == key: + return False + record: dict[str, Any] = {"ts": ts, "tool": tool, "summary": summary} + if files: + record["files"] = files + if cmd: + record["cmd"] = cmd + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(record, sort_keys=True) + "\n") + return True + + +def _basename(path: str) -> str: + return path.rsplit("/", 1)[-1] or path + + +def summarize_tool( + tool_name: str | None, + tool_input: dict[str, Any] | None, + tool_response: object, +) -> dict[str, Any] | None: + """Turn a PostToolUse payload into a compact observation, or None to skip.""" + if not tool_name or tool_name not in _OBSERVED_TOOLS: + return None + ti = tool_input or {} + out: dict[str, Any] = {"tool": tool_name} + fp = ti.get("file_path") + if isinstance(fp, str) and fp: + out["files"] = [fp] + if tool_name in {"Read", "Edit", "Write", "Update", "NotebookEdit"}: + name = _basename(fp) if isinstance(fp, str) and fp else "file" + verb = {"Read": "Read", "Write": "Created"}.get(tool_name, "Edited") + out["summary"] = f"{verb} {name}" + elif tool_name == "Bash": + cmd = ti.get("command") + short = str(cmd).splitlines()[0][:60] if cmd else "command" + out["cmd"] = str(cmd)[:200] if cmd else None + text = str(tool_response).lower() + failed = "error" in text or "failed" in text + out["summary"] = f"Command failed: {short}" if failed else f"Ran: {short}" + elif tool_name in {"Grep", "Glob"}: + out["summary"] = f"{tool_name} {str(ti.get('pattern', ''))[:40]}" + elif tool_name in {"WebFetch", "WebSearch"}: + target = ti.get("url") or ti.get("query") or "" + out["summary"] = f"Fetched: {str(target)[:60]}" + else: # Task + out["summary"] = f"{tool_name} completed" + if out.get("cmd") is None: + out.pop("cmd", None) + return out +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q` +Expected: PASS (all Task 1 + Task 2 tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/vouch/capture.py tests/test_capture.py +git commit -m "feat(capture): harvest tool-use observations with dedup" +``` + +--- + +### Task 3: finalize — mechanical rollup, git backstop, PENDING proposal + +**Files:** +- Modify: `src/vouch/capture.py` +- Test: `tests/test_capture.py` + +**Interfaces:** +- Consumes: `observe`, `_read_observations`, `buffer_path`, `load_config`, `CAPTURE_ACTOR`, `CAPTURE_PAGE_TYPE` (Tasks 1-2); `propose_page` and models from the codebase. +- Produces: + - `build_summary_body(session_id, observations, changed_files, git_stat, *, project=None, generated_at=None) -> tuple[str, str]` (title, markdown body) — pure. + - `finalize(store, session_id, *, cwd: Path | None = None, project: str | None = None, generated_at: str | None = None, config: CaptureConfig | None = None) -> dict[str, Any]` — files at most one proposal, deletes the buffer, returns `{"captured": int, "summary_proposal_id": str | None, "skipped"?: str}`. + - `pending_count(store) -> int` — number of PENDING proposals authored by `vouch-capture`. + - `_git_changes(cwd: Path) -> tuple[list[str], str]` (internal; returns `([], "")` on any failure / non-repo). + +- [ ] **Step 1: Write the failing tests** + +```python +# add to tests/test_capture.py +from vouch.models import ProposalKind, ProposalStatus + + +def _seed(store: KBStore, sid: str, n: int) -> None: + for i in range(n): + cap.observe(store, sid, tool="Edit", summary=f"Edited f{i}.py", now=float(i)) + + +def test_finalize_files_one_pending_page(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 3) + result = cap.finalize(store, "s1", cwd=tmp_path) + pid = result["summary_proposal_id"] + assert pid is not None + pend = store.list_proposals(ProposalStatus.PENDING) + match = [p for p in pend if p.id == pid] + assert len(match) == 1 + pr = match[0] + assert pr.kind == ProposalKind.PAGE + assert pr.proposed_by == cap.CAPTURE_ACTOR + assert pr.payload["type"] == cap.CAPTURE_PAGE_TYPE + assert pr.status == ProposalStatus.PENDING + + +def test_finalize_below_min_files_nothing(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 2) # below default min_observations=3, non-git cwd + result = cap.finalize(store, "s1", cwd=tmp_path) + assert result["summary_proposal_id"] is None + assert store.list_proposals(ProposalStatus.PENDING) == [] + + +def test_finalize_deletes_buffer(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 3) + cap.finalize(store, "s1", cwd=tmp_path) + assert not cap.buffer_path(store, "s1").exists() + + +def test_finalize_noop_when_disabled(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 5) + store.config_path.write_text("capture:\n enabled: false\n") + result = cap.finalize(store, "s1", cwd=tmp_path) + assert result["summary_proposal_id"] is None + assert store.list_proposals(ProposalStatus.PENDING) == [] + + +def test_build_summary_body_has_sections() -> None: + obs = [ + {"ts": 1.0, "tool": "Edit", "summary": "Edited a.py", "files": ["a.py"]}, + {"ts": 2.0, "tool": "Bash", "summary": "Ran: pytest", "cmd": "pytest"}, + ] + title, body = cap.build_summary_body("s1", obs, ["a.py"], "a.py | 2 +-") + assert "s1" in title + assert "files modified this session" in body.lower() + assert "## activity" in body.lower() + assert "a.py" in body + + +def test_pending_count_counts_capture_actor(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 3) + cap.finalize(store, "s1", cwd=tmp_path) + assert cap.pending_count(store) == 1 +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q -k "finalize or summary_body or pending_count"` +Expected: FAIL — `AttributeError: module 'vouch.capture' has no attribute 'finalize'`. + +- [ ] **Step 3: Implement finalize + rollup + git backstop in `src/vouch/capture.py`** + +Add imports at the top: + +```python +import subprocess + +from .models import ProposalStatus +from .proposals import propose_page +``` + +Append: + +```python +def _git_changes(cwd: Path) -> tuple[list[str], str]: + """Return (changed_files, diff_stat). Empty on any failure / non-repo.""" + try: + names = subprocess.run( + ["git", "diff", "--name-only", "HEAD"], + cwd=cwd, capture_output=True, text=True, timeout=3, check=False, + ) + except (OSError, subprocess.SubprocessError): + return [], "" + files = [f for f in names.stdout.splitlines() if f.strip()] + if not files: + return [], "" + try: + stat = subprocess.run( + ["git", "diff", "HEAD", "--stat"], + cwd=cwd, capture_output=True, text=True, timeout=3, check=False, + ).stdout.strip() + except (OSError, subprocess.SubprocessError): + stat = "" + return files, stat + + +def build_summary_body( + session_id: str, + observations: list[dict[str, Any]], + changed_files: list[str], + git_stat: str, + *, + project: str | None = None, + generated_at: str | None = None, +) -> tuple[str, str]: + tool_counts: dict[str, int] = {} + files: set[str] = set(changed_files) + commands: list[str] = [] + for obs in observations: + tool = str(obs.get("tool", "")) + tool_counts[tool] = tool_counts.get(tool, 0) + 1 + for f in obs.get("files") or []: + files.add(str(f)) + cmd = obs.get("cmd") + if cmd: + commands.append(str(cmd)) + title = f"session summary: {project or 'workspace'} ({session_id})" + lines: list[str] = [f"# {title}", ""] + if generated_at: + lines.append(f"- generated: {generated_at}") + lines += [f"- session: `{session_id}`", f"- observations: {len(observations)}", ""] + if files: + lines += ["## files modified this session", ""] + lines += [f"- {f}" for f in sorted(files)[:20]] + lines.append("") + if git_stat: + lines += ["## git changes", "", "```", git_stat, "```", ""] + if tool_counts: + lines += ["## activity", ""] + lines += [f"- {t}: {tool_counts[t]}" for t in sorted(tool_counts)] + lines.append("") + if commands: + lines += ["## notable commands", ""] + lines += [f"- `{c}`" for c in commands[:10]] + lines.append("") + if observations: + lines += ["## observations", ""] + lines += [f"- {o.get('summary', '')}" for o in observations[:30]] + lines.append("") + return title, "\n".join(lines).rstrip() + "\n" + + +def finalize( + store: KBStore, + session_id: str, + *, + cwd: Path | None = None, + project: str | None = None, + generated_at: str | None = None, + config: CaptureConfig | None = None, +) -> dict[str, Any]: + """Roll a session buffer into one PENDING summary proposal. No approve().""" + cfg = config or load_config(store) + path = buffer_path(store, session_id) + observations = _read_observations(path) + if not cfg.enabled: + return {"captured": len(observations), "summary_proposal_id": None, + "skipped": "disabled"} + changed_files, git_stat = _git_changes(cwd or Path.cwd()) + total = len(observations) + len(changed_files) + if total < cfg.min_observations: + if path.exists(): + path.unlink() + return {"captured": total, "summary_proposal_id": None, + "skipped": "below-min"} + title, body = build_summary_body( + session_id, observations, changed_files, git_stat, + project=project, generated_at=generated_at, + ) + proposal = propose_page( + store, + title=title, + body=body, + page_type=CAPTURE_PAGE_TYPE, + proposed_by=CAPTURE_ACTOR, + session_id=session_id, + rationale="auto-captured session summary", + ) + if path.exists(): + path.unlink() + return {"captured": total, "summary_proposal_id": proposal.id} + + +def pending_count(store: KBStore) -> int: + return sum( + 1 for p in store.list_proposals(ProposalStatus.PENDING) + if p.proposed_by == CAPTURE_ACTOR + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q` +Expected: PASS (all capture tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/vouch/capture.py tests/test_capture.py +git commit -m "feat(capture): roll a session into one pending summary proposal" +``` + +--- + +### Task 4: CLI `vouch capture` group (observe, finalize, banner) + +**Files:** +- Modify: `src/vouch/cli.py` +- Test: `tests/test_capture.py` + +**Interfaces:** +- Consumes: `capture.observe`, `capture.finalize`, `capture.summarize_tool`, `capture.pending_count`, `capture.CAPTURE_ACTOR` (Tasks 2-3); `KBStore`, `discover_root`, `KBNotFoundError`, `_emit_json` (existing in cli.py). +- Produces: CLI commands `vouch capture observe`, `vouch capture finalize [--session-id ID]`, `vouch capture banner`. These are **CLI-only** — no MCP/JSONL/capabilities changes. + +- [ ] **Step 1: Write the failing tests** + +```python +# add to tests/test_capture.py +import json as _json + +from click.testing import CliRunner + +from vouch.cli import cli +from vouch.models import ProposalStatus + + +def _run(store: KBStore, args: list[str], stdin: str = "") -> object: + runner = CliRunner() + return runner.invoke( + cli, args, input=stdin, + env={"VOUCH_KB_PATH": str(store.kb_dir)}, + ) + + +def test_cli_observe_appends(store: KBStore) -> None: + payload = _json.dumps({ + "session_id": "cc-1", + "tool_name": "Edit", + "tool_input": {"file_path": "/r/a.py"}, + "tool_response": "ok", + }) + res = _run(store, ["capture", "observe"], stdin=payload) + assert res.exit_code == 0 + assert cap.buffer_path(store, "cc-1").exists() + + +def test_cli_observe_never_errors_on_garbage(store: KBStore) -> None: + res = _run(store, ["capture", "observe"], stdin="not json") + assert res.exit_code == 0 + + +def test_cli_finalize_files_proposal(store: KBStore) -> None: + for i in range(3): + cap.observe(store, "cc-2", tool="Edit", summary=f"Edited f{i}.py", now=float(i)) + payload = _json.dumps({"session_id": "cc-2", "cwd": str(store.kb_dir.parent)}) + res = _run(store, ["capture", "finalize"], stdin=payload) + assert res.exit_code == 0 + pend = store.list_proposals(ProposalStatus.PENDING) + assert any(p.proposed_by == cap.CAPTURE_ACTOR for p in pend) + + +def test_cli_banner_emits_when_pending(store: KBStore) -> None: + for i in range(3): + cap.observe(store, "cc-3", tool="Edit", summary=f"Edited f{i}.py", now=float(i)) + cap.finalize(store, "cc-3", cwd=store.kb_dir.parent) + res = _run(store, ["capture", "banner"]) + assert res.exit_code == 0 + assert "awaiting review" in res.output + + +def test_cli_banner_silent_when_none(store: KBStore) -> None: + res = _run(store, ["capture", "banner"]) + assert res.exit_code == 0 + assert res.output.strip() == "" +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q -k cli` +Expected: FAIL — `No such command 'capture'`. + +- [ ] **Step 3: Add the CLI group to `src/vouch/cli.py`** + +Confirm these imports exist near the top of `cli.py` (add any missing): `import sys`, `import json`, `from pathlib import Path`, `from datetime import UTC, datetime`, `from vouch import capture as capture_mod`, and `from vouch.storage import KBStore, discover_root, KBNotFoundError`. Append the group near the other `@cli.group()` blocks (e.g. after the `session` group): + +```python +@cli.group() +def capture() -> None: + """Automatic session capture (driven by claude code hooks).""" + + +def _capture_store() -> KBStore | None: + """Locate the KB without the sys.exit(2) that _load_store does — hooks + must never abort the host.""" + try: + return KBStore(discover_root()) + except KBNotFoundError: + return None + + +@capture.command("observe") +def capture_observe_cmd() -> None: + """Append one observation from a PostToolUse hook payload (stdin JSON).""" + if sys.stdin.isatty(): + return + try: + raw = sys.stdin.read() + payload = json.loads(raw) if raw.strip() else {} + if not isinstance(payload, dict): + return + session_id = str(payload.get("session_id") or "") + if not session_id: + return + obs = capture_mod.summarize_tool( + payload.get("tool_name"), + payload.get("tool_input") if isinstance(payload.get("tool_input"), dict) else {}, + payload.get("tool_response"), + ) + if obs is None: + return + store = _capture_store() + if store is None: + return + capture_mod.observe( + store, session_id, + tool=obs["tool"], summary=obs["summary"], + files=obs.get("files"), cmd=obs.get("cmd"), + ) + except Exception: # noqa: BLE001 - a capture failure must never break a tool call + return + + +@capture.command("finalize") +@click.option("--session-id", default=None, help="Session id (else read from stdin payload).") +def capture_finalize_cmd(session_id: str | None) -> None: + """Roll a session buffer into a PENDING summary (SessionEnd hook payload on stdin).""" + payload: dict[str, Any] = {} + if not sys.stdin.isatty(): + raw = sys.stdin.read() + if raw.strip(): + try: + loaded = json.loads(raw) + if isinstance(loaded, dict): + payload = loaded + except json.JSONDecodeError: + payload = {} + sid = session_id or str(payload.get("session_id") or "") + if not sid: + return + store = _capture_store() + if store is None: + return + cwd = Path(str(payload.get("cwd") or ".")).resolve() + result = capture_mod.finalize( + store, sid, cwd=cwd, project=cwd.name, + generated_at=datetime.now(UTC).isoformat(), + ) + _emit_json(result) + + +@capture.command("banner") +def capture_banner_cmd() -> None: + """Emit a SessionStart nudge if captured summaries await review.""" + store = _capture_store() + if store is None: + return + n = capture_mod.pending_count(store) + if n: + click.echo( + f"🔔 {n} auto-captured session summary(ies) awaiting review — " + f"run `vouch review`." + ) +``` + +Note: `Any` is used in the finalize command — ensure `from typing import Any` is imported in `cli.py` (it already is in most vouch modules; add if `ruff`/`mypy` flags it). + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q` +Expected: PASS (all capture tests including CLI). + +- [ ] **Step 5: Run the full gate** + +Run: `.venv/bin/python -m pytest tests/ -q --ignore=tests/embeddings && .venv/bin/python -m mypy src && .venv/bin/python -m ruff check src tests` +Expected: PASS. In particular `tests/test_capabilities.py` stays green (no new `kb.*` method was added). + +- [ ] **Step 6: Commit** + +```bash +git add src/vouch/cli.py tests/test_capture.py +git commit -m "feat(capture): add vouch capture observe/finalize/banner cli" +``` + +--- + +### Task 5: Claude Code adapter wiring + changelog + +**Files:** +- Modify: `adapters/claude-code/.claude/settings.json` +- Modify: `adapters/claude-code/install.yaml` (T4 comment) +- Modify: `CHANGELOG.md` (`[Unreleased]`) +- Test: `tests/test_capture.py` + +**Interfaces:** +- Consumes: the `vouch capture observe|finalize|banner` commands (Task 4). +- Produces: adapter hooks that drive capture on install; a regression test asserting the hooks exist. + +- [ ] **Step 1: Write the failing test** + +```python +# add to tests/test_capture.py +def test_adapter_settings_wires_capture_hooks() -> None: + import json as J + from pathlib import Path as P + root = P(__file__).resolve().parents[1] + settings = J.loads( + (root / "adapters/claude-code/.claude/settings.json").read_text() + ) + hooks = settings["hooks"] + + def commands(event: str) -> list[str]: + out: list[str] = [] + for group in hooks.get(event, []): + for h in group.get("hooks", []): + out.append(h.get("command", "")) + return out + + assert any("capture observe" in c for c in commands("PostToolUse")) + assert any("capture finalize" in c for c in commands("SessionEnd")) + assert any("capture banner" in c for c in commands("SessionStart")) +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q -k adapter` +Expected: FAIL — `KeyError: 'PostToolUse'` (or the assertions fail). + +- [ ] **Step 3: Update `adapters/claude-code/.claude/settings.json`** + +Replace the `"hooks"` object with (keep the existing `"permissions"` block unchanged): + +```json + "hooks": { + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch status --json || true" + }, + { + "type": "command", + "command": "vouch capture banner || true" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture observe || true" + } + ] + } + ], + "SessionEnd": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize || true" + } + ] + } + ] + } +``` + +- [ ] **Step 4: Update the T4 comment in `adapters/claude-code/install.yaml`** + +Change the T4 description line to reflect the expanded hook surface: + +``` +# T4 = `.claude/settings.json`: SessionStart (kb status + capture review banner), +# PostToolUse (capture observe), SessionEnd (capture finalize), plus +# read-only kb_* auto-allow. +``` + +- [ ] **Step 5: Add a CHANGELOG entry under `[Unreleased]`** + +Add under the `### Added` list in `CHANGELOG.md` (create the `### Added` subsection if the `[Unreleased]` block lacks one): + +```markdown +- auto-capture: claude code sessions are harvested via hooks and filed as a + single pending session-summary proposal for human approval (`vouch capture` + cli + adapter hooks; opt out with `capture.enabled: false`). +``` + +- [ ] **Step 6: Run the test + full gate** + +Run: `.venv/bin/python -m pytest tests/test_capture.py -q -k adapter` +Expected: PASS. + +Run: `make check` +Expected: PASS (all tests, mypy, ruff green). + +- [ ] **Step 7: Commit** + +```bash +git add adapters/claude-code/.claude/settings.json adapters/claude-code/install.yaml CHANGELOG.md tests/test_capture.py +git commit -m "feat(capture): wire claude code adapter hooks for session capture" +``` + +--- + +## Self-Review + +**1. Spec coverage** — every spec section maps to a task: +- scratch buffer (spec §1) → Task 1 (`buffer_path`, gitignore) + Task 2 (append). +- `vouch capture observe` (spec §2) → Task 2 (`observe`, `summarize_tool`) + Task 4 (CLI). +- `vouch capture finalize` (spec §3) → Task 3 (`finalize`, `build_summary_body`, git backstop) + Task 4 (CLI). +- notification / next-session banner (spec §4) → Task 3 (`pending_count`) + Task 4 (`banner`) + Task 5 (SessionStart hook). +- config `capture.*` (spec §5) → Task 1 (`load_config`, `_starter_config`). +- adapter changes (spec §6) → Task 5. +- review-gate compliance → `finalize` only calls `propose_page` (PENDING); asserted in `test_finalize_files_one_pending_page`. +- registration/parity (no `kb.*`) → Task 4 Step 5 gate explicitly re-runs `test_capabilities`. +- out-of-scope items (no LLM/network, no per-observation proposals, no auto-approve, stale sweep deferred) → honored; nothing in any task adds them. + +**2. Placeholder scan** — no TBD/TODO; every code step shows complete code; the one broad `except Exception` is intentional and commented (a capture failure must never break a tool call). + +**3. Type consistency** — names used consistently across tasks: `observe`, `finalize`, `build_summary_body`, `summarize_tool`, `pending_count`, `buffer_path`, `CaptureConfig`, `CAPTURE_ACTOR`, `CAPTURE_PAGE_TYPE`. `propose_page` return consumed as `.id`, `.kind`, `.proposed_by`, `.status`, `.payload["type"]` — matches the reference signature. `_read_observations` defined in Task 2, reused in Task 3. + +## Open items surfaced during planning (non-blocking) + +- **`observe` startup cost** — importing `capture.py` pulls `proposals`/models on every tool call. Accepted for v1 per the spec's risk note; the fallback (a standalone appender script) is out of scope. If per-tool latency proves noticeable in practice, revisit before broadening beyond Claude Code. +- **stale sessions** — a hard crash skips `SessionEnd`, orphaning a buffer (harmless scratch). A `vouch capture finalize --stale` sweep is a deliberate follow-up, not in this plan. diff --git a/docs/superpowers/plans/2026-07-01-vscode-session-autoproposal.md b/docs/superpowers/plans/2026-07-01-vscode-session-autoproposal.md new file mode 100644 index 00000000..29b66b85 --- /dev/null +++ b/docs/superpowers/plans/2026-07-01-vscode-session-autoproposal.md @@ -0,0 +1,920 @@ +# VS Code Session Auto-Proposal Implementation Plan + +> **For agentic workers:** Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Automatically finalize old capture buffers on SessionStart and finalize the current session when the VS Code window closes, without user action. + +**Architecture:** Add `finalize_all_except()` to capture.py to bulk-finalize stale buffers. Wire it into a new `vouch capture finalize-all` CLI command. Update the SessionStart hook to call this command, catching old sessions. For window close, add a WindowClose hook (if available) or document the fallback behavior. + +**Tech Stack:** Python 3.10+, Click CLI framework, existing capture.py, adapter settings.json + +## Global Constraints + +- No Co-Authored-By trailers in commits (per CLAUDE.md) +- All writes through the review gate (`proposals.approve()` never bypassed) +- Lowercase prose in commit bodies +- Test names follow `test__` pattern +- All capture failures must not break the session (silent success on errors) + +--- + +## File Structure + +| File | Change | Responsibility | +|------|--------|-----------------| +| `src/vouch/capture.py` | Modify | Add `finalize_all_except()`, `is_stale_buffer()` functions | +| `src/vouch/cli.py` | Modify | Add `vouch capture finalize-all` command | +| `adapters/claude-code/.claude/settings.json` | Modify | Wire SessionStart hook to call `finalize-all` | +| `tests/test_capture.py` | Modify | Add 8+ unit tests for new functionality | + +--- + +## Task 1: Add helper functions to capture.py + +**Files:** +- Modify: `src/vouch/capture.py:283+` +- Test: `tests/test_capture.py` + +**Interfaces:** +- Produces: + - `is_stale_buffer(path: Path, age_seconds: float = 3600.0) -> bool` + - `finalize_all_except(store: KBStore, current_session_id: str, *, max_age_seconds: float = 3600.0, cwd: Path | None = None) -> dict[str, Any]` + +- [ ] **Step 1: Write the failing test for `is_stale_buffer()`** + +At the end of `tests/test_capture.py`, add: + +```python +def test_is_stale_buffer_with_recent_file(tmp_path): + """Recent file should not be stale.""" + f = tmp_path / "recent.jsonl" + f.write_text("test") + now = time.time() + # File created 30 seconds ago; max_age=3600 + assert not capture_mod.is_stale_buffer(f, max_age_seconds=3600, now_timestamp=now) + + +def test_is_stale_buffer_with_old_file(tmp_path): + """File older than max_age should be stale.""" + f = tmp_path / "old.jsonl" + f.write_text("test") + old_time = time.time() - 7200 # 2 hours ago + f.touch(times=(old_time, old_time)) # Set mtime to 2 hours ago + now = time.time() + assert capture_mod.is_stale_buffer(f, max_age_seconds=3600, now_timestamp=now) + + +def test_is_stale_buffer_with_exact_boundary(tmp_path): + """File at exact max_age boundary should not be stale (>=).""" + f = tmp_path / "boundary.jsonl" + f.write_text("test") + exact_time = time.time() - 3600 # Exactly 1 hour ago + f.touch(times=(exact_time, exact_time)) + now = exact_time + 3600 + assert not capture_mod.is_stale_buffer(f, max_age_seconds=3600, now_timestamp=now) +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cd /home/a/Dev/plind-junior/vouch +source .venv/bin/activate +pytest tests/test_capture.py::test_is_stale_buffer_with_recent_file -xvs +``` + +Expected: `FAILED ... NameError: name 'is_stale_buffer' is not defined` + +- [ ] **Step 3: Implement `is_stale_buffer()` in capture.py** + +After the `pending_count()` function (line 283), add: + +```python +def is_stale_buffer( + path: Path, + *, + max_age_seconds: float = 3600.0, + now_timestamp: float | None = None, +) -> bool: + """Check if a buffer file's mtime is older than max_age_seconds.""" + if not path.exists(): + return False + now = now_timestamp if now_timestamp is not None else time.time() + mtime = path.stat().st_mtime + age = now - mtime + return age > max_age_seconds +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +pytest tests/test_capture.py::test_is_stale_buffer_with_recent_file tests/test_capture.py::test_is_stale_buffer_with_old_file tests/test_capture.py::test_is_stale_buffer_with_exact_boundary -xvs +``` + +Expected: All 3 PASS + +- [ ] **Step 5: Write failing tests for `finalize_all_except()`** + +Add to `tests/test_capture.py`: + +```python +def test_finalize_all_except_skips_current_session(tmp_path): + """Should not finalize the current session buffer.""" + store = _make_store(tmp_path) + sess_id = "current-session" + + # Create a current session buffer with observations + path = capture_mod.buffer_path(store, sess_id) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + + result = capture_mod.finalize_all_except( + store, sess_id, max_age_seconds=3600.0 + ) + + assert result["skipped_current"] == [sess_id] + assert path.exists() # Not removed + + +def test_finalize_all_except_finalizes_old_buffer(tmp_path): + """Should finalize buffers older than max_age, except current session.""" + store = _make_store(tmp_path) + current_sess = "current" + old_sess = "old-session" + + # Create old buffer (2 hours old) + old_path = capture_mod.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + old_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + old_mtime = time.time() - 7200 + old_path.touch(times=(old_mtime, old_mtime)) + + # Create current buffer (recent) + curr_path = capture_mod.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 2.0, "tool": "Write", "summary": "test2"}\n') + + result = capture_mod.finalize_all_except( + store, current_sess, max_age_seconds=3600.0 + ) + + assert old_sess in result["finalized"] + assert current_sess in result["skipped_current"] + assert not old_path.exists() # Removed after finalize + assert curr_path.exists() # Current session untouched + + +def test_finalize_all_except_skips_recent_buffers(tmp_path): + """Should not finalize buffers younger than max_age.""" + store = _make_store(tmp_path) + current_sess = "current" + recent_sess = "recent-other" + + # Create recent buffer (30 minutes old) + recent_path = capture_mod.buffer_path(store, recent_sess) + recent_path.parent.mkdir(parents=True, exist_ok=True) + recent_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + recent_mtime = time.time() - 1800 + recent_path.touch(times=(recent_mtime, recent_mtime)) + + result = capture_mod.finalize_all_except( + store, current_sess, max_age_seconds=3600.0 + ) + + assert recent_sess in result["skipped_recent"] + assert recent_path.exists() # Not removed + + +def test_finalize_all_except_multiple_buffers(tmp_path): + """Should handle multiple old and recent buffers correctly.""" + store = _make_store(tmp_path) + current_sess = "current" + + # Create 3 old buffers, 2 recent buffers + old_sesses = ["old1", "old2", "old3"] + recent_sesses = ["recent1", "recent2"] + + now = time.time() + for sid in old_sesses: + path = capture_mod.buffer_path(store, sid) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + old_mtime = now - 7200 # 2 hours ago + path.touch(times=(old_mtime, old_mtime)) + + for sid in recent_sesses: + path = capture_mod.buffer_path(store, sid) + path.write_text('{"ts": 2.0, "tool": "Read", "summary": "test"}\n') + recent_mtime = now - 600 # 10 minutes ago + path.touch(times=(recent_mtime, recent_mtime)) + + # Create current session buffer + curr_path = capture_mod.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 3.0, "tool": "Write", "summary": "test"}\n') + + result = capture_mod.finalize_all_except( + store, current_sess, max_age_seconds=3600.0, now_timestamp=now + ) + + assert set(result["finalized"]) == set(old_sesses) + assert set(result["skipped_recent"]) == set(recent_sesses) + assert result["skipped_current"] == [current_sess] + + # Verify old buffers are removed, others exist + for sid in old_sesses: + assert not capture_mod.buffer_path(store, sid).exists() + for sid in recent_sesses + [current_sess]: + assert capture_mod.buffer_path(store, sid).exists() + + +def test_finalize_all_except_empty_captures_dir(tmp_path): + """Should handle empty or missing captures directory gracefully.""" + store = _make_store(tmp_path) + result = capture_mod.finalize_all_except( + store, "current-session", max_age_seconds=3600.0 + ) + + assert result["finalized"] == [] + assert result["skipped_recent"] == [] + assert result["skipped_current"] == [] + + +def test_finalize_all_except_returns_proposal_ids(tmp_path): + """finalize_all_except should return proposal IDs of finalized buffers.""" + store = _make_store(tmp_path) + old_sess = "old-session" + current_sess = "current" + + # Create old buffer with enough observations + old_path = capture_mod.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + observations = [ + '{"ts": 1.0, "tool": "Read", "summary": "test1"}', + '{"ts": 2.0, "tool": "Read", "summary": "test2"}', + '{"ts": 3.0, "tool": "Read", "summary": "test3"}', + ] + old_path.write_text("\n".join(observations) + "\n") + old_mtime = time.time() - 7200 + old_path.touch(times=(old_mtime, old_mtime)) + + # Create current session buffer + curr_path = capture_mod.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 4.0, "tool": "Write", "summary": "test"}\n') + + result = capture_mod.finalize_all_except( + store, current_sess, max_age_seconds=3600.0 + ) + + assert old_sess in result["finalized"] + # Verify a proposal was created + pending = store.list_proposals(ProposalStatus.PENDING) + assert len(pending) > 0 +``` + +- [ ] **Step 6: Run tests to verify they fail** + +```bash +pytest tests/test_capture.py::test_finalize_all_except_skips_current_session -xvs +``` + +Expected: `FAILED ... NameError: name 'finalize_all_except' is not defined` + +- [ ] **Step 7: Implement `finalize_all_except()` in capture.py** + +After the `is_stale_buffer()` function, add: + +```python +def finalize_all_except( + store: KBStore, + current_session_id: str, + *, + max_age_seconds: float = 3600.0, + cwd: Path | None = None, + now_timestamp: float | None = None, +) -> dict[str, Any]: + """Finalize all buffers except current_session_id, if they're older than max_age. + + Returns dict with keys: + - finalized: [session_id1, session_id2, ...] session IDs that were finalized + - skipped_recent: [id3, id4, ...] sessions too recent to finalize + - skipped_current: [id5] the current session (always skipped) + """ + finalized: list[str] = [] + skipped_recent: list[str] = [] + skipped_current: list[str] = [] + now = now_timestamp if now_timestamp is not None else time.time() + + caps_dir = captures_dir(store) + if not caps_dir.exists(): + return { + "finalized": finalized, + "skipped_recent": skipped_recent, + "skipped_current": skipped_current, + } + + for path in sorted(caps_dir.glob("*.jsonl")): + # Extract session ID from filename (e.g., "session-id.jsonl" -> "session-id") + session_id = path.stem + + if session_id == current_session_id: + skipped_current.append(session_id) + continue + + if is_stale_buffer(path, max_age_seconds=max_age_seconds, now_timestamp=now): + try: + finalize( + store, session_id, cwd=cwd, + generated_at=datetime.now(timezone.utc).isoformat(), + ) + finalized.append(session_id) + except Exception: + # Never let a finalize failure break the scan + pass + else: + skipped_recent.append(session_id) + + return { + "finalized": finalized, + "skipped_recent": skipped_recent, + "skipped_current": skipped_current, + } +``` + +Note: You'll need to import `datetime` and `timezone` at the top of capture.py if not already imported: + +```python +from datetime import datetime, timezone +``` + +- [ ] **Step 8: Run all tests to verify they pass** + +```bash +pytest tests/test_capture.py::test_finalize_all_except_skips_current_session tests/test_capture.py::test_finalize_all_except_finalizes_old_buffer tests/test_capture.py::test_finalize_all_except_skips_recent_buffers tests/test_capture.py::test_finalize_all_except_multiple_buffers tests/test_capture.py::test_finalize_all_except_empty_captures_dir tests/test_capture.py::test_finalize_all_except_returns_proposal_ids -xvs +``` + +Expected: All 6 PASS + +- [ ] **Step 9: Commit** + +```bash +git add src/vouch/capture.py tests/test_capture.py +git commit -m "feat(capture): add finalize_all_except() for old buffer cleanup + +implement is_stale_buffer() to check file age, and finalize_all_except() +to bulk-finalize capture buffers older than a threshold. this enables +sessionstart cleanup of orphaned buffers from previous sessions. + +includes 9 unit tests covering single/multiple buffers, age boundaries, +and current session exclusion." +``` + +--- + +## Task 2: Add `vouch capture finalize-all` CLI command + +**Files:** +- Modify: `src/vouch/cli.py:800+` (after existing `finalize` command) +- Test: `tests/test_capture.py` + +**Interfaces:** +- Consumes: `capture.finalize_all_except(store, session_id, max_age_seconds=...) -> dict` +- Produces: CLI command `vouch capture finalize-all [--session-id ] [--max-age-seconds ]` + +- [ ] **Step 1: Write the failing test for CLI command** + +Add to `tests/test_capture.py`: + +```python +def test_capture_finalize_all_cmd_with_old_buffers(tmp_path, monkeypatch): + """CLI command should finalize old buffers and emit JSON.""" + store = _make_store(tmp_path) + current_sess = "current" + old_sess = "old-session" + + # Create old buffer + old_path = capture_mod.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + observations = [ + '{"ts": 1.0, "tool": "Read", "summary": "test1"}', + '{"ts": 2.0, "tool": "Read", "summary": "test2"}', + '{"ts": 3.0, "tool": "Read", "summary": "test3"}', + ] + old_path.write_text("\n".join(observations) + "\n") + old_mtime = time.time() - 7200 + old_path.touch(times=(old_mtime, old_mtime)) + + # Mock the store discovery and cwd + monkeypatch.setenv("VOUCH_KB_ROOT", str(store.kb_root)) + + runner = CliRunner() + result = runner.invoke(cli, [ + "capture", "finalize-all", + "--session-id", current_sess, + "--max-age-seconds", "3600", + ]) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert old_sess in output["finalized"] + assert current_sess in output["skipped_current"] + + +def test_capture_finalize_all_cmd_reads_session_from_env(tmp_path, monkeypatch): + """CLI command should fall back to VOUCH_SESSION_ID env var.""" + store = _make_store(tmp_path) + current_sess = "from-env" + + # Create current session buffer + curr_path = capture_mod.buffer_path(store, current_sess) + curr_path.parent.mkdir(parents=True, exist_ok=True) + curr_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + + monkeypatch.setenv("VOUCH_KB_ROOT", str(store.kb_root)) + monkeypatch.setenv("VOUCH_SESSION_ID", current_sess) + + runner = CliRunner() + result = runner.invoke(cli, ["capture", "finalize-all"]) + + assert result.exit_code == 0 + output = json.loads(result.output) + assert current_sess in output["skipped_current"] + + +def test_capture_finalize_all_cmd_silent_on_no_kb(monkeypatch, tmp_path): + """CLI command should silently succeed if KB not found.""" + monkeypatch.setenv("VOUCH_KB_ROOT", str(tmp_path / "nonexistent")) + + runner = CliRunner() + result = runner.invoke(cli, [ + "capture", "finalize-all", + "--session-id", "test", + ]) + + # Should exit 0, not fail + assert result.exit_code == 0 +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +pytest tests/test_capture.py::test_capture_finalize_all_cmd_with_old_buffers -xvs +``` + +Expected: `FAILED ... no such option: --session-id` or similar + +- [ ] **Step 3: Implement CLI command in cli.py** + +Find the `@capture.command("finalize")` section (around line 740-760). After the `capture_finalize_cmd()` function and before the `@capture.command("banner")`, add: + +```python +@capture.command("finalize-all") +@click.option("--session-id", default=None, help="Current session id (else env VOUCH_SESSION_ID).") +@click.option("--max-age-seconds", type=float, default=3600.0, help="Max age in seconds.") +def capture_finalize_all_cmd(session_id: str | None, max_age_seconds: float) -> None: + """Finalize all capture buffers except current session (SessionStart cleanup).""" + sid = session_id or os.environ.get("VOUCH_SESSION_ID") or "" + if not sid: + # No session ID provided; silently succeed + _emit_json({"finalized": [], "skipped_recent": [], "skipped_current": []}) + return + + store = _capture_store() + if store is None: + # No KB; silently succeed + _emit_json({"finalized": [], "skipped_recent": [], "skipped_current": []}) + return + + result = capture_mod.finalize_all_except( + store, sid, max_age_seconds=max_age_seconds, + ) + _emit_json(result) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +pytest tests/test_capture.py::test_capture_finalize_all_cmd_with_old_buffers tests/test_capture.py::test_capture_finalize_all_cmd_reads_session_from_env tests/test_capture.py::test_capture_finalize_all_cmd_silent_on_no_kb -xvs +``` + +Expected: All 3 PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/vouch/cli.py tests/test_capture.py +git commit -m "feat(cli): add 'vouch capture finalize-all' command + +new command finalizes all capture buffers except the given session, +if they are older than max_age_seconds (default 3600s). silently +succeeds if no KB found. used by sessionstart hook to clean up +orphaned buffers from previous sessions. + +includes 3 unit tests covering option parsing, env fallback, and +graceful degradation." +``` + +--- + +## Task 3: Update adapter SessionStart hook + +**Files:** +- Modify: `adapters/claude-code/.claude/settings.json:22-42` +- No test (hook configuration is integration-tested via the capture tests) + +**Interfaces:** +- Consumes: CLI command `vouch capture finalize-all` (from Task 2) +- Produces: Updated hook configuration + +- [ ] **Step 1: Modify the SessionStart hook** + +Edit `adapters/claude-code/.claude/settings.json`. Change the `SessionStart` section from: + +```json +"SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch status --json || true" + }, + { + "type": "command", + "command": "vouch capture banner || true" + }, + { + "type": "command", + "command": "vouch recall || true" + } + ] + } +] +``` + +To: + +```json +"SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize-all || true" + }, + { + "type": "command", + "command": "vouch status --json || true" + }, + { + "type": "command", + "command": "vouch capture banner || true" + }, + { + "type": "command", + "command": "vouch recall || true" + } + ] + } +] +``` + +The key change: `finalize-all` is now the **first** hook, so old buffers are cleaned up before the banner and recall. + +- [ ] **Step 2: Verify the JSON is valid** + +```bash +python -c "import json; json.load(open('adapters/claude-code/.claude/settings.json'))" +``` + +Expected: No output (valid JSON) + +- [ ] **Step 3: Commit** + +```bash +git add adapters/claude-code/.claude/settings.json +git commit -m "feat(adapter): wire capture finalize-all into sessionstart hook + +run 'vouch capture finalize-all' as the first sessionstart hook +to clean up orphaned buffers from previous sessions before +banner and recall commands. ensures old sessions are finalized +automatically on next session start." +``` + +--- + +## Task 4: Add WindowClose hook configuration (conditional) + +**Files:** +- Modify: `adapters/claude-code/.claude/settings.json:52+` +- No test (integration-level, not unit-testable) + +**Interfaces:** +- Consumes: CLI command `vouch capture finalize` (existing) +- Produces: WindowClose hook (conditional) + +- [ ] **Step 1: Check if VS Code extension supports WindowClose event** + +Check the Claude Code extension documentation or recent commits to see if `WindowClose` is a supported hook event. Look for: +- Extension release notes mentioning new hook types +- Tests or examples using `WindowClose` +- Comments in settings.json from recent commits + +**For now, assume it's NOT supported yet.** If you find evidence that it IS supported, proceed to Step 2. Otherwise, skip to Step 3. + +- [ ] **Step 2: (If WindowClose is supported) Add the hook** + +Add this new hook section after the `PostToolUse` section in `adapters/claude-code/.claude/settings.json`: + +```json +"WindowClose": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize || true" + } + ] + } +] +``` + +Then commit: + +```bash +git commit -m "feat(adapter): wire capture finalize into windowclose hook + +finalize the current session's buffer when the vs code window closes, +creating a proposal immediately without waiting for next session start." +``` + +- [ ] **Step 3: (If WindowClose is NOT supported) Document the fallback** + +Add a comment to the settings.json explaining the fallback behavior. Change the SessionStart comment to: + +```json +"SessionStart": [ + { + "comment": "finalize old buffers from previous sessions; current session will be finalized here too on next session start (fallback: windowclose event not yet supported)", + "matcher": "*", + ... + } +] +``` + +Then update the adapter README to document this. See Task 5. + +--- + +## Task 5: Update adapter README with behavior docs + +**Files:** +- Modify: `adapters/claude-code/README.md` +- No test + +**Interfaces:** +- Produces: User-facing documentation + +- [ ] **Step 1: Add a "Session Capture" section to the README** + +Find the README and add this section (after the "Installation" or "Features" section): + +```markdown +## Session Capture & Auto-Proposal + +When you work in a Claude Code session, vouch automatically captures your +tool use (file reads, edits, commands, etc.). When you close the session +window, vouch proposes the captured knowledge to the KB for review. + +### How it works + +1. **Capture**: Each tool call (Read, Edit, Bash, etc.) is logged to + `.vouch/captures/.jsonl` (gitignored). + +2. **Cleanup on session start**: When you start a new session, any + unfinalzed buffers from previous sessions (>1 hour old) are + automatically finalized and proposed. + +3. **Finalize on window close**: When the VS Code window closes, the + current session is finalized and proposed. + +### Configuration + +Disable capture in `.vouch/config.yaml`: + +```yaml +capture: + enabled: false +``` + +Adjust the stale buffer age (default: 1 hour): + +```yaml +capture: + max_age_seconds: 7200 # finalize buffers >2 hours old +``` + +### Fallback behavior + +If the "window close" event is not yet supported by your version of the +Claude Code extension, the current session will be finalized on the *next* +session start instead. The behavior is the same; proposals just appear in +the next session rather than immediately. + +To upgrade or check your extension version, see [Claude Code releases](https://github.com/anthropics/claude-code-releases). +``` + +- [ ] **Step 2: Commit** + +```bash +git add adapters/claude-code/README.md +git commit -m "docs(adapter): explain session capture auto-proposal behavior + +add section describing how capture works, configuration options, +and the fallback behavior if windowclose event is not available." +``` + +--- + +## Task 6: Add integration test (smoke test) + +**Files:** +- Modify: `tests/test_capture.py` +- (Optional make target in Makefile) + +**Interfaces:** +- Consumes: All functions from Tasks 1-2 +- Produces: End-to-end test + +- [ ] **Step 1: Write the end-to-end test** + +Add to `tests/test_capture.py`: + +```python +def test_capture_e2e_sessionstart_cleanup_then_finalize(tmp_path): + """End-to-end: old buffers cleaned up on sessionstart, current session on finalize.""" + store = _make_store(tmp_path) + + # Simulate a previous session that crashed/closed without finalize + old_sess = "crashed-session" + old_path = capture_mod.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + observations = [ + '{"ts": 1.0, "tool": "Read", "summary": "test1"}', + '{"ts": 2.0, "tool": "Read", "summary": "test2"}', + '{"ts": 3.0, "tool": "Read", "summary": "test3"}', + ] + old_path.write_text("\n".join(observations) + "\n") + old_mtime = time.time() - 7200 # 2 hours ago + old_path.touch(times=(old_mtime, old_mtime)) + + # Simulate a new session starting + new_sess = "new-session" + + # 1. SessionStart cleanup (finalize old buffers) + cleanup_result = capture_mod.finalize_all_except( + store, new_sess, max_age_seconds=3600.0 + ) + assert old_sess in cleanup_result["finalized"] + assert not old_path.exists() + + # Verify old session was proposed + pending_before = store.list_proposals(ProposalStatus.PENDING) + old_proposals = [p for p in pending_before if p.session_id == old_sess] + assert len(old_proposals) == 1 + + # 2. SessionEnd finalize (current session) + new_path = capture_mod.buffer_path(store, new_sess) + new_path.write_text("\n".join(observations) + "\n") + + finalize_result = capture_mod.finalize(store, new_sess) + assert finalize_result["summary_proposal_id"] is not None + assert not new_path.exists() + + # Verify new session was proposed + pending_after = store.list_proposals(ProposalStatus.PENDING) + new_proposals = [p for p in pending_after if p.session_id == new_sess] + assert len(new_proposals) == 1 + + # Total proposals: old + new + assert len(pending_after) >= 2 +``` + +- [ ] **Step 2: Run the test to verify it passes** + +```bash +pytest tests/test_capture.py::test_capture_e2e_sessionstart_cleanup_then_finalize -xvs +``` + +Expected: PASS + +- [ ] **Step 3: Commit** + +```bash +git add tests/test_capture.py +git commit -m "test(capture): add e2e test for sessionstart cleanup + finalize flow + +verify that old buffers are cleaned up on new session start +and current session is finalized on window close (or manual finalize), +resulting in two separate proposals." +``` + +--- + +## Task 7: Run full test suite and verify no regressions + +**Files:** (no changes) + +- [ ] **Step 1: Run all capture tests** + +```bash +source .venv/bin/activate +pytest tests/test_capture.py -v +``` + +Expected: All tests PASS (>15 tests now) + +- [ ] **Step 2: Run full test suite (ignore embeddings)** + +```bash +pytest tests/ -q --ignore=tests/embeddings +``` + +Expected: No new failures, all existing tests still pass + +- [ ] **Step 3: Run mypy type check** + +```bash +python -m mypy src +``` + +Expected: No new errors, all files type-check + +- [ ] **Step 4: Run ruff lint** + +```bash +python -m ruff check src tests +``` + +Expected: No new issues, all code follows style + +- [ ] **Step 5: Run make check (convenience wrapper)** + +```bash +make check +``` + +Expected: All checks green + +--- + +## Task 8: Commit spec and plan files + +**Files:** +- Already created: `docs/superpowers/specs/2026-07-01-vscode-session-autoproposal-design.md` +- Already created: `docs/superpowers/plans/2026-07-01-vscode-session-autoproposal.md` + +- [ ] **Step 1: Add design doc and plan to git** + +```bash +git add docs/superpowers/specs/2026-07-01-vscode-session-autoproposal-design.md +git add docs/superpowers/plans/2026-07-01-vscode-session-autoproposal.md +git commit -m "docs(superpowers): add design and plan for vscode session auto-proposal + +design: automatic finalization of old buffers on sessionstart, +and current session on window close. + +plan: 8-task breakdown covering capture.py functions, cli command, +adapter hooks, comprehensive unit tests, integration test, and +documentation." +``` + +--- + +## Summary + +| Task | Deliverable | Tests | +|------|-------------|-------| +| 1 | `is_stale_buffer()`, `finalize_all_except()` in capture.py | 9 unit tests | +| 2 | `vouch capture finalize-all` CLI command | 3 unit tests | +| 3 | Updated SessionStart hook in adapter settings | 0 (config) | +| 4 | WindowClose hook (conditional) | 0 (config) | +| 5 | README docs on behavior | 0 (docs) | +| 6 | E2E integration test | 1 test | +| 7 | Full regression testing | 0 (validation) | +| 8 | Commit design & plan | 0 (docs) | + +**Total new tests: 13+ unit tests + 1 integration test** + +--- + +## Execution + +**Plan complete and saved to `docs/superpowers/plans/2026-07-01-vscode-session-autoproposal.md`.** + +Two execution options: + +1. **Subagent-Driven (recommended)** — I dispatch a fresh subagent per task (or 2-3 related tasks), review between tasks, fast iteration with checkpoints + +2. **Inline Execution** — Execute tasks in this session sequentially with checkpoints for review + +**Which approach would you prefer?** diff --git a/docs/superpowers/specs/2026-07-01-vouch-session-autocapture-design.md b/docs/superpowers/specs/2026-07-01-vouch-session-autocapture-design.md new file mode 100644 index 00000000..65b62cb8 --- /dev/null +++ b/docs/superpowers/specs/2026-07-01-vouch-session-autocapture-design.md @@ -0,0 +1,272 @@ +# auto-capture claude code sessions into review-gated summaries + +- status: draft, awaiting review +- date: 2026-07-01 +- scope: one implementation plan + +## goal + +once vouch is installed in a workspace, a claude code session that starts and +ends should be captured automatically, rolled up into a single human-readable +summary, and filed as a **pending proposal** so a human approves it before it +becomes durable knowledge. no per-session setup, no agent discipline required, +and no bypass of the review gate. + +## north-star fit + +vouch's load-bearing invariant is "every write goes through a review gate." +this feature adds an automatic *capture* path but deliberately keeps the +*write* path gated: the captured summary lands as a `PENDING` page proposal via +`proposals.propose_page`, exactly like any hand-filed write. nothing is +auto-approved. the automatic part is the harvesting and the nudge, never the +commit. + +## background: the load-bearing constraint + +vouch's servers (mcp, jsonl) never see the conversation transcript — they only +see discrete tool calls a client makes (`kb.propose_claim`, `kb.search`, …). +there is no code path that persists agent/user messages to vouch storage. so +"auto-capture a session" cannot mean "vouch reads the chat and extracts facts"; +vouch has no chat to read. + +the transcript *is* reachable, but only from the **client side**: claude code +hooks receive a stdin payload that includes `session_id`, `cwd`, and +`transcript_path`. that is the seam this design uses. + +### how memvid/claude-brain resolves the same feature (reference) + +memvid ships a claude code plugin with three hooks (`hooks/hooks.json`): + +- `SessionStart` → injects a memory banner into context. +- `PostToolUse` (matcher `*`) → after every tool call, scrapes the tool + input/output into an "observation" and writes it straight to a single + `.claude/mind.mv2` file. +- `Stop` → rolls the session's observations up into a summary. + +two facts from their code drive our design: + +1. **capture is fully passive.** the `PostToolUse` hook harvests tool i/o; the + agent never decides to record anything. this sidesteps the "agent forgot to + propose" failure mode. +2. **summarization uses no llm.** `generateSessionSummary` is pure heuristics — + count observation types, keyword-match "chose"/"decided" as decisions, + regex file paths out of the transcript, emit a template string ("added 2 + feature(s). fixed 1 bug(s)."). the `Stop` hook also runs `git diff` + + `find -mmin -30` to catch file edits (`PostToolUse` doesn't fire for `Edit` + in claude code). + +memvid has **no review gate** — everything auto-writes and auto-injects. that +is the one thing vouch must not copy. the adaptation: harvest passively like +memvid, roll up mechanically like memvid, but file the result as **one pending +proposal** instead of a direct write, and **once per session** (not per `Stop` +turn) so the review queue is not flooded. + +## design overview + +two halves. + +**half a — adapter wiring (claude code hooks).** the vouch claude code adapter +registers hooks that harvest tool-use into an ephemeral, gitignored scratch +buffer during the session, and roll it up at session end. + +**half b — server-side capture support.** a new `src/vouch/capture.py` module +plus two cli subcommands do the buffer i/o and the mechanical rollup, and file +the summary through the existing `proposals.propose_page` gate. + +``` +SessionStart hook ─▶ vouch capture banner (nudge: N summaries awaiting review) + (+ existing `vouch status --json`) + +PostToolUse hook ─▶ vouch capture observe ─▶ append one line to + (every tool) .vouch/captures/.jsonl + (scratch, gitignored, NOT the kb) + +SessionEnd hook ─▶ vouch capture finalize ─▶ read buffer + git diff + ─▶ mechanical rollup (no llm) + ─▶ proposals.propose_page(PENDING) + ─▶ delete buffer +``` + +## components + +### 1. scratch buffer (pre-review working material) + +- path: `.vouch/captures/.jsonl`, one json object per line. +- each line is a compact observation: `{ts, tool, summary, files?, cmd?}`. + keep it minimal — a one-line summary and file names, **not** full tool + output, to avoid buffering secrets or large blobs. +- this is scratch, held **outside** the kb. it is the raw material the rollup + reads; it never becomes durable on its own. it must be gitignored (add + `.vouch/captures/` to the ignore set). +- correlation key is claude code's own `session_id` (from the hook payload). + no vouch `Session` object is required for the passive path — the summary + proposal simply carries `session_id=` for traceability. + +### 2. `vouch capture observe` (cli, called by PostToolUse) + +- reads the hook payload from stdin (`tool_name`, `tool_input`, + `tool_response`, `session_id`, `cwd`). +- skips tools not worth capturing and skips vouch's own mcp tools + the capture + command itself (no self-capture / recursion). +- dedups within a short window (memvid uses 60s) so repeated identical calls + don't spam the buffer. +- appends one observation line. must be fast and never block (short timeout, + swallow errors, always exit 0) — a capture failure must never break the + user's tool call. +- **startup-cost constraint:** this runs on *every* tool call, so a full + `import vouch` (pydantic models, index_db, etc.) on each invocation would tax + every tool. `observe` must take a minimal fast path — read stdin, dedup, + append a line — importing as little as possible (ideally not the full model + stack). memvid affords a per-call node process because node starts in + milliseconds; python's heavier startup makes keeping `observe`'s import + surface small a real requirement, not a nicety. + +### 3. `vouch capture finalize` (cli, called by SessionEnd) + +- reads the buffer for ``. +- backstop harvest for edits `PostToolUse` missed: `git diff --name-only` + + `git diff --stat` (short timeouts) and recently-modified files. +- if total observations `< capture.min_observations` (default 3), file nothing + and exit — trivial sessions don't clutter the queue. +- otherwise build the summary markdown body **mechanically, no llm**: + - header: project, claude session id, time range. + - "files modified this session": from git diff + recent files. + - "git changes": `git diff --stat` (truncated). + - "activity": type counts (reads / edits / writes / commands / searches). + - "notable commands": a few bash commands run. + - "observations": the compact one-line summaries, capped. +- file it: `proposals.propose_page(store, title=…, body=…, page_type="session", + proposed_by="vouch-capture", session_id=, + rationale="auto-captured session summary")` → lands `PENDING`. + - pages do not require citations (only claims do — `proposals.py:122` vs + `propose_page`), so the summary is a citation-free markdown index. + - the session's harvested items are referenced **textually** in the body, not + as `claim_ids` — they were never approved claims, and `propose_page` rejects + unknown ids (`proposals.py:188`). +- on success, delete the buffer file. + +### 4. notification + +**adjustment from the earlier "return-value nudge relayed by the agent" +decision — please confirm at review.** that idea assumed capture fires during a +live agent turn (an in-conversation `kb.session_end` call whose response the +agent relays). with hook-driven capture, the `SessionEnd` hook runs *after* the +agent turn is over, so there is no agent to relay anything. the equivalent that +actually works is the memvid pattern: + +- **primary — next-session banner.** extend the adapter's existing + `SessionStart` hook to also check for pending captured summaries and inject a + line via `hookSpecificOutput.additionalContext`: e.g. "🔔 3 auto-captured + session summaries awaiting review — run `vouch review`." this surfaces the + nudge at the start of the next session in that workspace. +- **passive — the queue.** captured summaries are ordinary pending proposals, + so `vouch pending`, `vouch review`, and the review-ui show them with no extra + work. note: the review-ui's websocket broadcast fires on mutations made + *through the web server*; a proposal filed by the cli hook writes a yaml file + directly, so it appears in the review-ui on next load/refresh, not + necessarily as a live push (unless the web server grows a filesystem watch — + out of scope here). +- **secondary — return-value sidebar.** if capture is *also* triggered by an + in-conversation `kb.session_end` call (not just the hook), attach + `_meta.vouch_capture = {session_id, summary_proposal_id, hint}` to the + response so an agent can relay it mid-chat. optional; the banner is the + path that always works. + +### 5. config (`capture.*`, read defensively) + +follow the `volunteer_context.load_config` template — `yaml.safe_load` in +try/except, `isinstance(dict)` per level, explicit coercion, hardcoded +defaults. add the namespace to `storage.py` `_starter_config`. + +- `capture.enabled` — default `true`. when false, `observe`/`finalize` no-op. +- `capture.min_observations` — default `3`. +- `capture.buffer_dir` — default `.vouch/captures/`. + +### 6. adapter changes + +- `adapters/claude-code/.claude/settings.json`: add a `PostToolUse` hook + (`vouch capture observe`) and a `SessionEnd` hook (`vouch capture finalize`); + extend the existing `SessionStart` hook to inject the review banner alongside + `vouch status --json`. hooks call the `vouch` console script, consistent with + the `vouch status --json` hook already shipped (path availability already + assumed). +- `adapters/claude-code/install.yaml`: document the expanded T4 (hooks now + cover session capture, not just SessionStart status). + +## review-gate compliance + +- the only durable artifact is a `PENDING` page proposal; a human approves it + via `vouch review` / review-ui. no `approve()` is ever called by capture. +- observations live in `.vouch/captures/` (scratch, gitignored) until the + reviewer approves the rollup — they never enter the kb on their own. +- storage.py stays pure i/o; all rollup/business logic is in `capture.py`. + +## registration / parity + +`vouch capture observe|finalize` are **cli-only plumbing for the hooks** — an +agent never calls them. they are therefore **not** `kb.*` methods: no +`@mcp.tool()`, no jsonl handler, no `capabilities.METHODS` entry. this sidesteps +the four-site parity burden entirely and leaves `test_capabilities` untouched. +(`test_capabilities` asserts `METHODS == HANDLERS.keys()` — mcp/jsonl parity +only; cli commands are not in that set.) + +## explicitly out of scope (yagni) + +- **no llm anywhere in the capture path** — mechanical rollup only, matching + vouch's zero-llm reflex philosophy and keeping the hook fast/offline. +- **no per-observation proposals** — one summary proposal per session, or the + queue floods. individual claims the agent files via mcp during a session + (the older "agent proposes as it works" flow) still coexist as their own + pending items; the summary can mention them but does not depend on them. +- **no auto-approve / no trusted-agent shortcut.** +- **no out-of-band notifier** (email/desktop/slack) — outside vouch's + local-first, in-process design. +- **stale-session sweep is a follow-up.** `SessionEnd` can be skipped on a hard + crash (`kill -9`), orphaning a buffer. a later `vouch capture finalize + --stale` (rolls up buffers with no recent activity) mops that up; not in the + first cut. buffers left behind are harmless scratch. + +## testing (`tests/test_capture.py`) + +- `observe` appends an observation line; dedups within the window. +- `finalize` with ≥ `min_observations` files exactly one `PENDING` page + proposal with `proposed_by="vouch-capture"` and `page_type="session"`. +- `finalize` with `< min_observations` files nothing. +- `capture.enabled=false` → both commands no-op. +- summary body contains the files-modified section and activity counts. +- the proposal stays `PENDING` (capture never approves). +- the buffer file is deleted after a successful `finalize`. +- fully offline: no llm, no network, git calls stubbed or run in a temp repo. + +## files touched + +- new `src/vouch/capture.py` — buffer i/o, mechanical rollup, summary-body + builder, `propose_page` call. +- `src/vouch/cli.py` — `vouch capture` group (`observe`, `finalize`). +- `src/vouch/storage.py` — `capture.*` in `_starter_config`; ensure + `.vouch/captures/` is gitignored. +- `adapters/claude-code/.claude/settings.json` — PostToolUse + SessionEnd hooks; + SessionStart banner. +- `adapters/claude-code/install.yaml` — document expanded T4. +- new `tests/test_capture.py`. +- `CHANGELOG.md` `[Unreleased]` entry (follow-up at ship time). + +## open questions / risks + +1. **notification adjustment** (section 4) — confirm the next-session banner as + the primary channel, replacing the "agent relays a return value" idea that + the hook-driven lifecycle makes unworkable. +2. **secret hygiene** — `observe` stores one-line summaries + file names, not + full command output, to avoid buffering credentials. confirm that's + conservative enough, or add a redaction pass. +3. **`SessionEnd` vs `Stop`** — this design uses `SessionEnd` (fires once per + session) rather than memvid's `Stop` (fires per turn) to keep one summary + per session. confirm that's the desired granularity. +4. **cross-tool reach** — the hooks are claude-code-specific. cursor/codex would + need their own adapter wiring later; the `capture.py` core is host-agnostic + and reusable. +5. **per-tool-call process cost** — see the `observe` startup-cost constraint. + if a minimal python fast path still adds noticeable per-tool latency, a + fallback is a tiny standalone appender script shipped with the adapter + (à la memvid's node hooks) instead of routing through the `vouch` console + script. diff --git a/docs/superpowers/specs/2026-07-01-vscode-session-autoproposal-design.md b/docs/superpowers/specs/2026-07-01-vscode-session-autoproposal-design.md new file mode 100644 index 00000000..74f0aa6c --- /dev/null +++ b/docs/superpowers/specs/2026-07-01-vscode-session-autoproposal-design.md @@ -0,0 +1,228 @@ +# VS Code Session Auto-Proposal Design + +**Date:** 2026-07-01 +**Status:** Design phase +**Goal:** When a Claude Code session closes in VS Code, automatically finalize and propose its captured knowledge to vouch, without user action. + +## Problem + +Today, vouch captures observations from VS Code Claude Code sessions into `.vouch/captures/.jsonl`. The `vouch capture finalize` command works correctly and creates proposals. However: + +1. The `SessionEnd` hook (configured in `adapters/claude-code/.claude/settings.json`) is **not firing** when the user closes the session window +2. Users must manually run `vouch capture finalize` to turn captured observations into a proposal +3. Sessions that crash or are force-closed leave orphaned capture buffers with no way to finalize them + +## Solution Overview + +Implement automatic finalization through two mechanisms: + +### **Mechanism 1: SessionStart cleanup (old sessions)** +When a new Claude Code session starts, scan `.vouch/captures/` and finalize any buffers that don't belong to the current session. This catches: +- Previous sessions that closed but weren't finalized +- Crashed sessions with stale buffers +- Sessions from other projects (optional: filter by cwd) + +### **Mechanism 2: Window close handler (current session)** +When the VS Code window closes or the extension unloads, finalize the current session's buffer. + +**Implementation strategy:** Use Approach 1 (preferred) if the extension exposes a close event; fall back to Approach 2 (lazy finalization at next SessionStart) if not. + +## Architecture Changes + +### Files to modify/create + +1. **`src/vouch/capture.py`** + - Add `finalize_all_except(session_id)` — finalize buffers for sessions other than the one given + - Add `is_stale_buffer(path, age_seconds)` — check if a buffer is older than a threshold (e.g., 1 hour) + +2. **`src/vouch/cli.py`** + - Add `vouch capture finalize-all` command — public entry point for SessionStart hook + - Modify existing `capture_finalize_cmd` to handle the current session explicitly + +3. **`adapters/claude-code/.claude/settings.json`** + - Update `SessionStart` hook to run `vouch capture finalize-all` *before* the banner/recall (to clean up old sessions first) + - Try to wire a proper close event if the extension supports it; document fallback behavior + +4. **`tests/test_capture.py`** + - Add tests for `finalize_all_except()` with multiple buffers + - Add tests for stale buffer detection + - Add end-to-end test: create multiple buffers, start new session, verify old ones finalize + +## Behavior Specification + +### SessionStart: clean up old buffers + +**Trigger:** `SessionStart` hook (fires when new session begins) + +**Command:** `vouch capture finalize-all [--session-id ] [--max-age ]` + +**Behavior:** +1. Read `.vouch/captures/` for all `.jsonl` files +2. For each buffer: + - If the session ID matches the current session, skip it + - If the buffer is older than `--max-age` (default: 3600s / 1 hour), finalize it + - Otherwise, skip it (not old enough) +3. Finalize each stale buffer into a proposal +4. Log summary (e.g., "finalized 2 old buffers") + +**Exit behavior:** Never fail the session start. Silently succeed if no buffers to finalize, or if finalization encounters errors. + +### Window close: finalize current session + +**Trigger:** Window close / extension unload (if available); fallback to lazy finalization + +**Desired behavior:** +- Hook fires when VS Code window closes +- Command: `vouch capture finalize --session-id ` (existing command) +- Result: Proposal is created and appears in vouch immediately + +**Fallback (if close hook unavailable):** +- Current session buffer is finalized on *next* SessionStart (caught by cleanup mechanism) +- Proposal appears in the next session, not immediately +- Document this limitation in the adapter README + +## API Design + +### New: `capture.finalize_all_except()` + +```python +def finalize_all_except( + store: KBStore, + current_session_id: str, + *, + max_age_seconds: float = 3600.0, + cwd: Path | None = None, +) -> dict[str, Any]: + """ + Finalize all buffers except current_session_id, if they're older than max_age. + + Returns: + { + "finalized": [id1, id2, ...], + "skipped_recent": [id3, id4, ...], + "skipped_current": [id5], + } + """ +``` + +### Modified: `vouch capture finalize-all` command + +``` +vouch capture finalize-all [--session-id ] [--max-age-seconds ] + +Finalize all capture buffers except the given session (current session). +Falls back to reading from env: VOUCH_SESSION_ID, VOUCH_SESSION_MAX_AGE. +``` + +### Error handling + +- If `_capture_store()` returns None (KB not initialized), silently succeed +- If finalize fails for one buffer, continue with the rest (don't cascade) +- Never raise exceptions that break the SessionStart hook + +## Hook Configuration (adapter settings) + +```json +{ + "hooks": { + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize-all || true" + }, + { + "type": "command", + "command": "vouch status --json || true" + }, + { + "type": "command", + "command": "vouch capture banner || true" + }, + { + "type": "command", + "command": "vouch recall || true" + } + ] + } + ], + "PostToolUse": [ ... ], + "WindowClose": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "vouch capture finalize || true" + } + ] + } + ] + } +} +``` + +If `WindowClose` event is not available in the extension, this section is removed and a note is added to the README documenting the fallback behavior. + +## Testing Strategy + +1. **Unit tests** (`tests/test_capture.py`): + - `test_finalize_all_except_with_multiple_buffers()` + - `test_finalize_all_except_skips_current_session()` + - `test_finalize_all_except_skips_recent_buffers()` + - `test_finalize_all_except_removes_stale_buffers()` + +2. **CLI tests**: + - `test_capture_finalize_all_command()` + +3. **End-to-end smoke test** (make target): + - Create mock session with observations + - Simulate SessionStart + - Verify old buffers are finalized + - Verify pending count increases + +## Scope & Constraints + +**In scope:** +- SessionStart cleanup of old buffers +- Public `finalize-all` command +- Tests +- Adapter hook wiring (conditional on extension support) + +**Out of scope:** +- Changing the capture observation mechanism +- Storage format changes +- Cross-project buffer isolation (all buffers in one KB) +- Compression or archiving of old proposals + +**Constraints:** +- Never break SessionStart (all errors are caught) +- Backward compatible with existing `finalize` command +- Works with current KB structure (no schema changes) +- Max age defaults to 1 hour (configurable) + +## Success Criteria + +1. ✅ Old capture buffers from previous sessions are finalized when a new session starts +2. ✅ Current session buffer is finalized when the window closes (if hook available) +3. ✅ Proposals appear in vouch `pending` list without user action +4. ✅ All tests pass, no regressions +5. ✅ Adapter settings are updated with hook configuration +6. ✅ README documents the behavior and fallback + +## Open Questions + +1. Does the VS Code Claude Code extension fire a `WindowClose` event? If not, document the fallback clearly. +2. Should max-age be configurable per project in `config.yaml`? (Defer to future if not critical) +3. Should we filter buffers by `cwd` to avoid finalizing sessions from other projects? (Future enhancement) + +--- + +**Next steps:** +1. User reviews this spec +2. Implement `finalize_all_except()` and CLI command +3. Update adapter hooks +4. Write tests +5. Test end-to-end diff --git a/docs/transports.md b/docs/transports.md index fb3b0133..64105fa3 100644 --- a/docs/transports.md +++ b/docs/transports.md @@ -48,6 +48,53 @@ Two named prompts you might wire up: - `vouch.crystallize_session` — "summarise this session's proposals into a page" +### How a host talks to vouch (end to end) + +What actually happens when an MCP host — Claude Code, Cursor, Codex — uses +vouch over stdio: + +``` +host (e.g. Claude Code) vouch serve (child process) + │ │ + 1. spawn ── command: vouch, args: [serve] ───────▶ │ one process per session; + │ env: VOUCH_AGENT=claude-code │ stdin/stdout pipe stays open + │ │ + 2. handshake ── initialize / tools/list ─────────▶ │ (owned by the MCP SDK — + │ ◀── kb_* tool list ─────────────────│ vouch writes no framing code) + │ │ + 3. tools/call kb_propose_claim ───────────────────▶ │ server.py (thin) → proposals.py + │ ◀── result + _meta.vouch_trust ──────│ → proposed/.yaml + audit event + │ │ + 4. tools/call kb_approve ─────────────────────────▶ │ review gate: blocks self-approval + │ ◀── claim now durable ──────────────│ → claims/.yaml + decided/.yaml +``` + +The wire is **JSON-RPC 2.0, one message per line** over the child's +stdin/stdout — no socket, no HTTP. The host owns the subprocess; the MCP SDK +owns the `initialize → tools/list → tools/call` handshake. vouch only supplies +the `kb_*` tool implementations. + +A few things worth knowing: + +- **The gate is on approve, not propose.** `kb_propose_claim` writes a *pending* + proposal under `proposed/` (gitignored); the claim becomes durable only when + `kb_approve` runs **and** the approver isn't the proposer (unless + `review.approver_role: trusted-agent`). Same gate on every transport. +- **Identity rides in on `VOUCH_AGENT`** (see [Identity](#identity)). The adapter + sets it in the child's env; `claude mcp add vouch -- vouch serve` doesn't, so + the actor falls back to `unknown-agent` — add `-e VOUCH_AGENT=…` if you want + attribution. +- **Reads carry provenance.** Every dict-shaped result gets a `_meta.vouch_trust` + block (`{remote, caller_kind, auth_subject}`) so the host can tell a local + stdio call from a remote one; `kb_context` also attaches a session-gated + `_meta.vouch_salience` sidebar. +- **vouch can talk back.** `kb_session_start` registers a server→client push + channel, so vouch can proactively offer context during a session rather than + only answering calls. + +Tool errors come back through MCP's native error mechanism — the host surfaces +them to the model as a failed tool call, not as a `kb.*` error envelope. + ## JSONL (stdin/stdout) ```bash diff --git a/docs/tutorials/README.md b/docs/tutorials/README.md new file mode 100644 index 00000000..addaa377 --- /dev/null +++ b/docs/tutorials/README.md @@ -0,0 +1,82 @@ +# Tutorials + +Step-by-step walkthroughs that take you from zero to a working outcome. +Concrete commands, real output, no abstraction-first jargon. Each tutorial +assumes no prior vouch knowledge and follows the +[Diátaxis](https://diataxis.fr/) tutorial pattern: learning-oriented, walks +you from nothing to a working result in one session, every step produces a +visible change. + +Every command shown here was run end-to-end against the current build before +it went into the docs. If a feature isn't verified yet, it's in the +[On the roadmap](#on-the-roadmap) list below — not in a tutorial. + +## Shipped + +- [**Build your first knowledge base**](first-knowledge-base.md) — the + canonical solo walkthrough. Initialise a KB in any git repo, register a + source, propose a claim, hit the review gate (the moment vouch refuses to + let you approve your own proposal), approve it as a reviewer, then recall it + with `search`, `context`, and `synthesize`. Ends with the provenance trail: + `why`, `audit`, `cite`. About 15 minutes, no API keys. + +- [**Give your coding agent a reviewed memory**](connect-coding-agent.md) — + wire vouch into Claude Code, Codex, Cursor, or any of nine MCP hosts with one + command. Walks the `.mcp.json` + `CLAUDE.md` that `install-mcp` writes, the + brain-first protocol the agent follows, ambient capture (the agent proposes + while it works), and the review loop where you approve what lands. About 10 + minutes; needs a KB from the first tutorial. + +- [**Share a knowledge base across machines and teammates**](share-a-knowledge-base.md) + — bundle a reviewed KB into a portable `.tar.gz`, preview the diff before you + apply it, and import it into another KB with conflict-safe merging. Shows why + the review gate is a *team* safety property: no contributor can rubber-stamp + their own writes. Ends with `metrics` and `stats` for observability. About 20 + minutes, no API keys. + +- [**Edit your KB as markdown in Obsidian**](edit-in-obsidian.md) — mirror + approved pages and claims into an Obsidian-compatible vault (wikilinks and + all, so the graph view works), edit a page in your editor of choice, and + watch the edit come back as a review-gated proposal. The bytes you changed + cite themselves so the reviewer sees exactly what triggered the change. About + 15 minutes. + +## On the roadmap + +These features exist in the CLI today but don't have a full tutorial yet. Run +`vouch --help` for the current surface. Open an issue if one of them +is the walkthrough you need most — that's how the order gets decided. + +- **Hybrid + semantic retrieval** — `search` and `context` upgrade from FTS5 + to embedding-backed hybrid ranking when vouch is installed with the + embeddings extra (`pip install -e '.[embeddings]'`). See + [`../embeddings.md`](../embeddings.md) and `vouch embeddings stats`. + +- **Per-project / per-agent scoping** — `VOUCH_PROJECT` and `VOUCH_AGENT` + scope what a viewer sees, configured under `retrieval.scope` in + `config.yaml`. See the scoping block in `vouch capabilities`. + +- **Ground a code change in the KB** — `vouch dual-solve`, `vouch auto-pr`, + and `vouch pr-cache` use the reviewed KB to drive and de-duplicate fixes + against a GitHub repo. Advanced; start from each command's `--help`. + +## Want to write one? + +The shipped [`first-knowledge-base.md`](first-knowledge-base.md) is the model: +a concrete scenario, numbered steps that each produce a visible change, real +command output, and a troubleshooting table. If you've used vouch for something +worth walking through, open a PR. Keep every command runnable and verified. + +## Related documentation + +- **Quickstart:** [`../getting-started.md`](../getting-started.md) — the + ten-minute version of the first tutorial. +- **Worked example:** [`../example-session.md`](../example-session.md) — a full + automatic session capture → review → commit → recall loop. +- **Reference:** [`../object-model.md`](../object-model.md) — claims, pages, + entities, relations, sources. [`../review-gate.md`](../review-gate.md) — the + gate in depth. +- **Protocol:** [`../../SPEC.md`](../../SPEC.md) — the contract if you're + writing an alternative server. +- **Per-host setup:** [`../../adapters/`](../../adapters/) — what each MCP host + adapter writes. diff --git a/docs/tutorials/connect-coding-agent.md b/docs/tutorials/connect-coding-agent.md new file mode 100644 index 00000000..fa87bbf1 --- /dev/null +++ b/docs/tutorials/connect-coding-agent.md @@ -0,0 +1,225 @@ +# Give your coding agent a reviewed memory + +Coding agents got very good at code. They're still amnesiac about everything +else — every time the context window closes, the agent forgets why the project +chose JWTs, which approach was already tried and rejected, and what the team +agreed last month. By the end of this tutorial your agent will read the +project's reviewed knowledge before it answers, and propose new claims as it +works — with you on the approve button. + +- **Time:** about 10 minutes +- **You'll need:** a vouch KB (do the + [first tutorial](first-knowledge-base.md) if you don't have one) and an + MCP-capable agent — Claude Code, Codex, or Cursor + +## 1. Wire it in one command + +The fastest path is the agent's own MCP registration. `vouch serve` is a plain +stdio MCP server, so Claude Code (or Codex) can register it directly — no +vouch-specific installer needed: + +```bash +claude mcp add vouch -- vouch serve # or: codex mcp add vouch -- vouch serve +``` + +``` +Added stdio MCP server vouch with command: vouch serve to project config +``` + +Confirm it connected: + +```bash +claude mcp list +``` + +``` +vouch: vouch serve - ✓ Connected +``` + +That's enough to give the agent vouch's `kb_*` tools. Add +`-e VOUCH_AGENT=claude-code` to the `add` command if you want the agent's +proposals attributed to it rather than your shell user. + +The rest of this tutorial uses `vouch install-mcp` instead, which does the same +wiring **and** drops in the brain-first protocol, slash commands, and hooks. See +what's supported: + +```bash +vouch install-mcp --list +``` + +``` +Available MCP host adapters: + - claude-code + - claude-desktop + - cline + - codex + - continue + - cursor + - openclaw + - windsurf + - zed +``` + +Install for your host (Claude Code shown; swap in `codex`, `cursor`, …): + +```bash +vouch install-mcp claude-code +``` + +``` + + .mcp.json + + CLAUDE.md +Done — 2 written, 0 appended, 0 skipped under /your/project +``` + +It's idempotent — re-running skips anything already in place. Tiers control how +much it writes: + +| Tier | Writes | +|---|---| +| `T1` | the MCP wire only (`.mcp.json`) | +| `T2` | `+ CLAUDE.md` / `AGENTS.md` — the brain-first protocol | +| `T3` | `+` slash commands | +| `T4` (default) | `+` host hooks / settings | + +```bash +vouch install-mcp claude-code --tier T2 # stop at the protocol, skip hooks +``` + +## 2. What it wrote + +The `.mcp.json` points the host at vouch's MCP server: + +```json +{ + "mcpServers": { + "vouch": { + "command": "vouch", + "args": ["serve"], + "env": { "VOUCH_AGENT": "claude-code" } + } + } +} +``` + +`vouch serve` is the MCP server over stdio; `VOUCH_AGENT` is the identity +recorded as `proposed_by` and as the actor on every audit event — so "which +agent claimed what" is always answerable, and (per the +[review gate](first-knowledge-base.md#5-review-it--the-gate)) the agent can't +approve its own proposals. + +The `CLAUDE.md` adds the **brain-first protocol** — the habits that make the +memory worth having: + +1. **Search before answering.** Call `kb_search` / `kb_context` before + reasoning from scratch, so you reuse the agreed answer instead of re-deriving + it. +2. **Capture decisions as you make them.** When the work settles a question, + file it with `kb_propose_claim`, citing the evidence. +3. **Cite what you recall.** Answers built from the KB carry their claim ids, so + a human can trace any fact back to its source. + +## 3. Restart and confirm the agent can read + +Restart the agent so it picks up `.mcp.json`. vouch's tools are now live: +`kb_search`, `kb_context`, `kb_read_claim`, `kb_propose_claim`, and the rest of +the `kb_*` surface (run `vouch capabilities` for the full list). + +Ask it something the KB knows — "how does auth work in this project?" — and it +will call `kb_search` / `kb_context` and answer from the reviewed claim instead +of guessing. + +## 4. Ambient capture — the agent proposes + +Now the half that makes the memory grow. As the agent works and settles a +decision, it files a proposal. That's the agent calling `kb_propose_claim`; from +the CLI the same thing looks like: + +```bash +VOUCH_AGENT=claude-code vouch propose-claim \ + --text "Refresh tokens rotate on every use; reuse is treated as theft." \ + --source \ + --type decision \ + --confidence 0.9 +``` + +The proposal lands in the pending queue, attributed to the agent — **not** in +the KB yet: + +```bash +vouch pending +``` + +``` +• 20260630-074020-4037420c [claim] by claude-code + Refresh tokens rotate on every use; reuse is treated as theft. +``` + +## 5. You review — the gate, again + +The agent proposed; a human approves. Because the proposer is `claude-code` and +you're not, the gate is satisfied by your approving: + +```bash +vouch show 20260630-074020-4037420c # inspect what the agent claimed +vouch approve 20260630-074020-4037420c --reason "confirmed against the auth ADR" +# or, if it's wrong: +vouch reject 20260630-074020-4037420c --reason "we don't rotate on every use" +``` + +Only after approval does the claim become durable and show up in the next +session's `kb_search`. A wrong fact an agent hallucinated never silently +propagates — it sits in `pending` until a human accepts or rejects it. Both +decisions are recorded in the audit log. + +## 6. Scope work into a session (optional) + +For a longer task, open a session so the agent's proposals are grouped and you +can approve the batch at once: + +```bash +vouch session start --task "harden auth" +``` + +``` +sess-20260630-073951-ad7971 +``` + +The agent proposes against that session as it works; when you're happy with the +whole run: + +```bash +vouch crystallize sess-20260630-073951-ad7971 # approve every pending proposal in the session +vouch session end sess-20260630-073951-ad7971 +``` + +## The four habits that make it worth it + +- **Brain-first lookup** — the agent retrieves the agreed answer instead of + asking you or re-deriving it. +- **Ambient capture** — decisions get logged as proposals while the work + happens, not in a doc nobody updates. +- **One shared brain** — Claude Code, Codex, and Cursor read and write the + *same* `.vouch/`, so knowledge captured in one is instantly there in the + others. +- **Reviewed before trusted** — every recalled fact passed the gate, which is + what makes an agent memory safe to rely on. + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Agent doesn't see the `kb_*` tools | Host didn't reload `.mcp.json`. | Fully restart the agent / reload the window. | +| `kb_search` returns nothing | KB is empty or the query misses. | Confirm with `vouch status`; seed a claim via the [first tutorial](first-knowledge-base.md). | +| Agent's proposal won't approve as itself | Self-approval is blocked by design. | Approve from a human identity (a different `VOUCH_AGENT`). | +| Wrong host config written | Wrong adapter name. | `vouch install-mcp --list`, then re-run with the right host. | + +## Next steps + +- [Share a knowledge base across machines and teammates](share-a-knowledge-base.md) + — give every agent on the team the same reviewed brain. +- [Edit your KB as markdown in Obsidian](edit-in-obsidian.md) — review and + extend the KB outside the terminal. +- [Per-host adapter details](../../adapters/) — exactly what each host's + `install-mcp` writes. diff --git a/docs/tutorials/edit-in-obsidian.md b/docs/tutorials/edit-in-obsidian.md new file mode 100644 index 00000000..26851a1b --- /dev/null +++ b/docs/tutorials/edit-in-obsidian.md @@ -0,0 +1,140 @@ +# Edit your KB as markdown in Obsidian + +Not every edit wants to happen in a terminal. By the end of this tutorial your +reviewed KB will mirror into an Obsidian-compatible vault — approved pages and +claims as linked markdown, so the graph view works — and edits you make in +Obsidian will come back as review-gated proposals, citing the exact bytes you +changed. The gate stays in the loop the whole time: editing a file in your +vault never writes straight into the KB. + +- **Time:** about 15 minutes +- **You'll need:** a vouch KB with at least one approved page or claim (the + [first tutorial](first-knowledge-base.md) gets you there). Obsidian itself is + optional — any markdown editor works; Obsidian just renders the wikilinks and + graph. + +Sync is bidirectional, and the two directions do very different things: + +- **backward (KB → vault):** approved pages and claims mirror *out* into the + vault as readable, linked markdown. +- **forward (vault → KB):** edits you make in the vault come *back* as + page-edit proposals in `proposed/`, which still have to clear the review gate. + +## 1. Mirror the KB into a vault + +Pick a folder for the vault and mirror approved artifacts into it: + +```bash +mkdir -p ~/my-vault +vouch sync --vault ~/my-vault --direction backward +``` + +``` + ↓ pages/edit-in-obsidian.md (mirrored) + ↓ claims/vouch-starter-reviewed-knowledge.md (mirrored) +Done — 1 pages and 1 claims mirrored, 0 proposals filed. +``` + +Everything lands under `/vouch/`: + +``` +~/my-vault/vouch/ + pages/edit-in-obsidian.md + claims/vouch-starter-reviewed-knowledge.md + .sync-state.json +``` + +Approved pages mirror as full markdown. Approved claims get a markdown stub +under `claims/` with Obsidian wikilink backlinks to the pages that cite them — +so when you open the vault in Obsidian, the graph view connects claims to the +pages they support. + +## 2. Open it in Obsidian + +Point Obsidian at `~/my-vault` (Open folder as vault). Browse `vouch/pages/`, +open the graph view, and you'll see the reviewed knowledge as a navigable web of +linked notes. This is a read-friendly window onto the same `.vouch/` you've been +driving from the CLI — nothing here is a second copy of the truth, it's a +projection of it. + +## 3. Edit a page — the change becomes a proposal + +Edit a mirrored page in Obsidian (or any editor). Add a line to a page under +`/vouch/pages/`: + +```bash +echo "Added a new line while editing in Obsidian." >> ~/my-vault/vouch/pages/edit-in-obsidian.md +``` + +Now run sync the other direction: + +```bash +vouch sync --vault ~/my-vault --direction forward +``` + +``` + ↑ pages/edit-in-obsidian (proposal filed) +Done — 0 pages and 0 claims mirrored, 1 proposals filed. +``` + +Your edit did **not** write into the KB. It became a proposal: + +```bash +vouch pending +``` + +``` +• 20260630-074121-a44eb574 [page] by vault-sync + Edit in Obsidian +``` + +The proposal cites a `vault:` source pointing at the file you changed, +so the reviewer can see exactly which bytes triggered it — the gate isn't +reviewing a vague "something changed," it's reviewing your specific edit. + +## 4. Review the edit — the gate, as always + +The proposal sits in the queue under the `vault-sync` actor until a human +accepts it. Because the proposer is `vault-sync` and not you, your approval +satisfies the gate: + +```bash +vouch show 20260630-074121-a44eb574 # see the diff your edit produced +vouch approve 20260630-074121-a44eb574 --reason "good clarification" +# or: +vouch reject 20260630-074121-a44eb574 --reason "not accurate" +``` + +Only after approval does the page change land in the durable KB — and then a +backward sync mirrors the now-canonical version back out to the vault. Edit +freely; nothing is true until it's reviewed. + +## 5. Keep them in sync continuously + +Run both directions at once, and add `--watch` to keep a polling loop alive +while you work in Obsidian: + +```bash +vouch sync --vault ~/my-vault --direction both --watch +``` + +Re-runs are idempotent: only real edits become proposals, and only genuinely +changed approved artifacts re-mirror. A no-op sync writes nothing. + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| `Missing option '--vault'` | `sync` needs an explicit vault path. | Pass `--vault `. | +| Edited a vault file, no proposal | Ran backward, not forward. | `vouch sync --vault --direction forward`. | +| Edit went straight in with no review | You edited `.vouch/` directly, not the vault. | Edit under `/vouch/`; the KB files are the reviewed output, not the input. | +| Vault page reverted after approve | A backward sync re-mirrored the canonical version. | Working as intended — the approved KB is the source of truth. | + +## Next steps + +- [Share a knowledge base across machines and teammates](share-a-knowledge-base.md) + — move the reviewed KB beyond one machine. +- [The object model](../object-model.md) — pages vs. claims, and how citations + link them. +- [The review gate in depth](../review-gate.md) — every policy the gate + enforces, including on vault-sourced proposals. diff --git a/docs/tutorials/first-knowledge-base.md b/docs/tutorials/first-knowledge-base.md new file mode 100644 index 00000000..4a098781 --- /dev/null +++ b/docs/tutorials/first-knowledge-base.md @@ -0,0 +1,287 @@ +# Build your first knowledge base + +By the end of this tutorial you'll have a vouch knowledge base in a git repo, +one reviewed-and-approved claim backed by a real source, and the ability to +recall it three ways — ranked search, a task-shaped context pack, and a cited +synthesis. You'll also see the review gate refuse to let you approve your own +proposal, which is the single most important thing vouch does. + +- **Time:** about 15 minutes +- **Cost:** none — no API keys, no network +- **You'll need:** Python 3.11+, git, and a terminal + +The scenario: your project decided to use JWTs for auth, and you're tired of +your agent (and your teammates) re-litigating that decision every few weeks. +You want one reviewed, cited answer that any future session can read. + +## 1. Install and verify + +```bash +pipx install vouch-kb # the command stays `vouch` +vouch --version +``` + +(From a clone instead: `python -m venv .venv && . .venv/bin/activate && pip +install -e '.[dev]'`.) + +## 2. Initialise a KB + +vouch lives inside a git repo. From your project root: + +```bash +git init -q # skip if the repo already exists +vouch init +``` + +``` +Initialised KB at /your/project/.vouch +Seeded starter claim: vouch-starter-reviewed-knowledge +Next steps: + vouch status + vouch search agent + vouch serve +``` + +`vouch init` creates `.vouch/` with `config.yaml`, an append-only +`audit.log.jsonl`, the directory skeleton (claims, pages, entities, relations, +sources, sessions, proposed, decided), and a `.gitignore` that keeps +`proposed/` and the derived `state.db` out of version control. It also seeds +one starter claim so the KB isn't empty. + +```bash +vouch status +``` + +``` +KB at /your/project/.vouch + durable: 1 claims • 1 pages • 1 sources • 0 entities • 0 relations + pending: 0 proposals + audit: 1 events • index: present +``` + +## 3. Register a source + +Claims have to cite something. A source is any content-addressable +evidence — a file, a URL, a transcript. Register the decision record: + +```bash +mkdir -p docs/decisions +cat > docs/decisions/auth.md <<'EOF' +# Auth decision meeting, April 2026 + +We agreed auth uses JWTs in the Authorization header. +EOF + +vouch source add docs/decisions/auth.md --title "Auth decision meeting, April 2026" +``` + +``` +532e2b4d95c9dd17aa1c0feed366746ca47e40e4cb8e8582f3677b0931b55d70 +``` + +That long hex string is the source id — a sha256 of the content. Re-running +`source add` on the same bytes is a no-op; sources dedupe by content hash, so +you never end up with two copies of the same evidence. + +## 4. Propose a claim + +A claim is a single reviewable statement. Propose one, citing the source you +just registered: + +```bash +vouch propose-claim \ + --text "Auth uses JWTs in the Authorization header." \ + --source 532e2b4d95c9dd17aa1c0feed366746ca47e40e4cb8e8582f3677b0931b55d70 \ + --type decision \ + --confidence 0.95 +``` + +``` +20260630-073841-d722ac24 +``` + +That's the proposal id (a timestamp plus a short hash). The proposal lives in +`.vouch/proposed/` — local-only, gitignored, **not yet part of the KB**. Look +at the queue: + +```bash +vouch pending +``` + +``` +• 20260630-073841-d722ac24 [claim] by a + Auth uses JWTs in the Authorization header. +``` + +```bash +vouch show 20260630-073841-d722ac24 +``` + +``` +id: 20260630-073841-d722ac24 +kind: claim +proposed_by: a +payload: + id: auth-uses-jwts-in-the-authorization-header + text: Auth uses JWTs in the Authorization header. + type: decision + confidence: 0.95 + evidence: + - 532e2b4d95c9dd17aa1c0feed366746ca47e40e4cb8e8582f3677b0931b55d70 +status: pending +``` + +## 5. Review it — the gate + +This is the step that makes vouch vouch. Try to approve your own proposal: + +```bash +vouch approve 20260630-073841-d722ac24 --reason "matches the meeting notes" +``` + +``` +✗ 20260630-073841-d722ac24: forbidden_self_approval: a cannot approve their + own proposal (set review.approver_role: trusted-agent in config.yaml to opt out) +Error: refusing to approve: 1 of 1 not approvable — nothing was approved +``` + +vouch refused. The actor who proposed a claim cannot be the actor who approves +it — that's the whole point of a review gate. Approval has to come from a +*different* identity. vouch resolves the actor from `VOUCH_AGENT` (falling back +to your system user), so a human reviewer approves like this: + +```bash +VOUCH_AGENT=alice-reviewer vouch approve 20260630-073841-d722ac24 \ + --reason "matches the meeting notes" +``` + +``` +Approved → claim/auth-uses-jwts-in-the-authorization-header +``` + +The claim is now durable: a plain YAML file at +`.vouch/claims/auth-uses-jwts-in-the-authorization-header.yaml`, and the +proposal record moves to `.vouch/decided/` for audit. + +> Working solo and don't want two identities? Set `review.approver_role: +> trusted-agent` in `.vouch/config.yaml` to opt out of the self-approval check. +> The gate still records who approved what — you've just told it you trust the +> proposer to review themselves. For anything shared, leave it on. + +## 6. Recall it — three ways + +**Ranked search** — "does the KB know X?": + +```bash +vouch search "JWT" +``` + +``` +claim/auth-uses-jwts-in-the-authorization-header Auth uses JWTs in the Authorization header. (substring) +``` + +**A context pack** — a task-shaped working set, ready to drop into an agent +prompt, with citations and a quality gate: + +```bash +vouch context "how does auth work" +``` + +```json +{ + "backend": "fts5", + "items": [ + { + "id": "auth-uses-jwts-in-the-authorization-header", + "summary": "Auth uses JWTs in the Authorization header.", + "citations": ["532e2b4d95c9dd17aa1c0feed366746ca47e40e4cb8e8582f3677b0931b55d70"] + } + ] +} +``` + +**A cited synthesis** — a direct answer built only from approved claims, with +inline citation markers: + +```bash +vouch synthesize "how does auth work" +``` + +```json +{ + "answer": "Auth uses JWTs in the Authorization header [auth-uses-jwts-in-the-authorization-header].", + "claims": ["auth-uses-jwts-in-the-authorization-header"] +} +``` + +`synthesize` never invents — if the KB doesn't have it, the answer says so. +Every sentence traces back to a claim that went through the gate. + +## 7. Trace the provenance + +Because every write is an event, you can always answer "why does this claim +exist, and who put it there?" + +```bash +vouch why auth-uses-jwts-in-the-authorization-header +``` + +``` +why auth-uses-jwts-in-the-authorization-header (claim) + approvedBy -> … (event) [2026-06-30T07:39:17Z] + cites -> 532e2b4d… (source) [2026-06-30T07:39:17Z] +``` + +```bash +vouch audit +``` + +``` +… proposal.claim.create by a objects=['20260630-073841-d722ac24'] +… proposal.claim.approve by alice-reviewer objects=['20260630-073841-d722ac24', 'auth-uses-jwts-…'] +``` + +The audit log is the authoritative history: who proposed, who approved, when, +and citing what. It's append-only and committed alongside the claims. + +## 8. Commit it + +Approved artifacts are plain files, so git is your backup, sync, and second +audit log: + +```bash +git add .vouch && git commit -m "kb: approve auth-uses-jwt" +``` + +From here the claim diffs cleanly in PRs, travels with the repo, and any future +session — yours or an agent's — reads the agreed answer instead of guessing. + +## Keeping it true over time + +Decisions change. vouch has lifecycle commands so the record stays honest +instead of silently drifting (each is recorded as an audit event): + +```bash +vouch confirm # re-affirm a still-true claim, bumps last_confirmed_at +vouch supersede # mark an old claim replaced by a newer one +vouch contradict # record that two claims conflict +vouch archive # retire a claim, kept for history +``` + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| `forbidden_self_approval` | The proposer is trying to approve their own claim. | Approve under a different `VOUCH_AGENT`, or set `review.approver_role: trusted-agent` for solo use. | +| `error: … run vouch init in your project root` | No `.vouch/` found walking up from the cwd. | Run `vouch init`, or `cd` into the project that has the KB. | +| `source add` printed the same id twice | Sources dedupe by content hash. | Working as intended — identical bytes are one source. | +| `vouch show ` says "proposal not found" | The proposal was already approved or rejected. | Check `vouch pending` for live ids, or `vouch audit` for decided ones. | + +## Next steps + +- [Give your coding agent a reviewed memory](connect-coding-agent.md) — wire + this KB into Claude Code, Codex, or Cursor so the agent reads and proposes. +- [Share a knowledge base across machines and teammates](share-a-knowledge-base.md) + — bundle this KB and import it elsewhere. +- [The object model](../object-model.md) — claims, pages, entities, relations, + sources, and how they link. diff --git a/docs/tutorials/share-a-knowledge-base.md b/docs/tutorials/share-a-knowledge-base.md new file mode 100644 index 00000000..ed972bb1 --- /dev/null +++ b/docs/tutorials/share-a-knowledge-base.md @@ -0,0 +1,167 @@ +# Share a knowledge base across machines and teammates + +A reviewed KB is only as useful as the people and agents that can read it. By +the end of this tutorial you'll bundle a KB into a single portable file, preview +exactly what importing it would change, merge it into a second KB without +clobbering anything, and read the team-level observability that tells you who's +proposing and who's approving. The review gate does the rest: on a shared KB, no +contributor can rubber-stamp their own writes. + +- **Time:** about 20 minutes +- **Cost:** none — no API keys, no server +- **You'll need:** a vouch KB with at least one approved claim (the + [first tutorial](first-knowledge-base.md) gets you there) + +vouch is local-first by design — there's no hosted service. Sharing is just +moving reviewed, plain-text files around, and git or a bundle is the transport. + +## 1. Export a portable bundle + +Bundle the durable KB — claims, pages, sources, config, audit trail — into one +`.tar.gz`: + +```bash +vouch export --out kb.tar.gz +``` + +```json +{ + "bundle_id": "101a22c84e802e5238bb9e4618de65632c830f57162e9842d8d749bd06299fd3", + "files": 9, + "out": "kb.tar.gz" +} +``` + +The `bundle_id` is a content hash of everything inside, so two bundles are +identical iff their ids match. Nothing from `proposed/` (local, gitignored) +goes in — only reviewed artifacts travel. + +## 2. Verify the bundle is intact + +Before you hand a bundle to anyone, confirm every file matches its recorded +hash: + +```bash +vouch export-check kb.tar.gz +``` + +A clean check means the bundle wasn't truncated or tampered with in transit. + +## 3. Preview the import — no writes + +On the receiving machine (or a teammate's clone), see what importing *would* +change before touching anything: + +```bash +vouch import-check kb.tar.gz +``` + +`import-check` diffs the bundle against the destination KB and reports what's +new, what's identical, and what conflicts — without writing a byte. This is the +"read the PR before you merge it" step. + +## 4. Apply it — conflict-safe + +```bash +vouch import-apply kb.tar.gz +``` + +```json +{ + "bundle_id": "101a22c84e802e5238bb9e4618de65632c830f57162e9842d8d749bd06299fd3", + "identical": ["config.yaml", "sources/…/content"], + "on_conflict": "skip", + "skipped_conflicts": ["claims/vouch-starter-reviewed-knowledge.yaml", "…"] +} +``` + +By default, files that already exist with different content are **skipped**, not +overwritten — importing never silently clobbers a teammate's reviewed claim. +Everything new from the bundle lands; everything identical is a no-op; conflicts +are reported for a human to resolve. After import, the claims are immediately +recallable: + +```bash +vouch search "JWT" +``` + +``` +claim/auth-uses-jwts-in-the-authorization-header Auth uses JWTs in the Authorization header. (substring) +``` + +> **git is the other transport.** Because approved artifacts are plain +> YAML/markdown, a teammate can just `git pull` the repo and the `.vouch/` +> claims come with it — reviewed in the same PR as the code. Bundles are for +> moving a KB *between* repos or onto a machine that isn't a git clone. + +## 5. Why the gate is a team property + +On a solo KB, the self-approval check feels like a formality. On a shared KB +it's the safety property: it guarantees that every durable claim was seen by a +*second* identity. An agent can't approve its own output; a teammate can't +merge their own claim unreviewed. The actor breakdown makes the separation +visible: + +```bash +vouch metrics +``` + +``` + review gate + proposals created 3 + approved / rejected 1 / 0 + approval rate 100.0% + pending now 2 + + actors (proposed / approved / rejected / confirmed) + a 2 / 0 / 0 / 0 + alice-reviewer 0 / 1 / 0 / 0 + vault-sync 1 / 0 / 0 / 0 +``` + +Proposers and approvers are different rows — that's the gate working. `metrics` +reads purely from the audit log and the artifact files (no new state), and +`--json` / `--prometheus` give you a stable shape for a dashboard or a +textfile-collector sidecar. + +For the day-to-day queue, `stats` is the lighter view: + +```bash +vouch stats +``` + +``` + pending: 2 proposal(s) + review (last 30d): 1 approved, 0 rejected, 0 expired + approval rate: 100.0% + citations: 2/2 claims with valid citations (100.0%) +``` + +## 6. Keep shared copies honest + +A bundle is a snapshot. To keep two KBs converging: + +- **Re-export** after a review session and re-share — the `bundle_id` tells + recipients instantly whether they already have this exact state. +- **Commit `.vouch/`** so the canonical copy lives in git and bundles are only + for out-of-band transfer. +- **Run `vouch doctor`** on the receiving side after a big import — it verifies + source hashes, finds dangling references, and reports drift. + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| `import-apply` skipped claims | Same id, different content — a real conflict. | Inspect both with `vouch show` / `vouch diff`, decide, then `supersede` the loser. | +| `export-check` fails | Bundle truncated or modified in transit. | Re-export and re-transfer; never edit a bundle by hand. | +| Imported claims don't appear in search | Derived index is stale. | `vouch index` (or `vouch reindex`) to rebuild `state.db`. | +| Teammate can approve their own claim | `review.approver_role: trusted-agent` is set. | Remove it from `config.yaml` for shared KBs so the gate enforces two identities. | + +## Next steps + +- [Give your coding agent a reviewed memory](connect-coding-agent.md) — point + every teammate's agent at the shared KB. +- [Edit your KB as markdown in Obsidian](edit-in-obsidian.md) — review and + extend the KB outside the terminal. +- [The review gate in depth](../review-gate.md) — the full set of policies the + gate enforces. diff --git a/openclaw.plugin.json b/openclaw.plugin.json index 97322fba..cc4aff03 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -1,6 +1,6 @@ { "name": "vouch", - "version": "0.1.0", + "version": "1.0.0", "description": "Git-native, review-gated knowledge base for LLM agents. MCP server + JSONL tool server + CLI.", "family": "bundle-plugin", "homepage": "https://github.com/vouchdev/vouch", diff --git a/pyproject.toml b/pyproject.toml index b82bb865..516cb65d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "vouch" -version = "0.0.1" +version = "1.0.0" description = "Git-native, review-gated knowledge base for LLM agents. MCP server + CLI." readme = "README.md" requires-python = ">=3.11" @@ -39,6 +39,7 @@ web = [ ] dev = [ "pytest>=9.0.3,<10", + "pytest-benchmark>=5,<6", "pytest-cov>=5,<6", "mypy>=2.1.0", "ruff>=0.15.13", diff --git a/scripts/smoke-capture.sh b/scripts/smoke-capture.sh new file mode 100755 index 00000000..d8707e43 --- /dev/null +++ b/scripts/smoke-capture.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +# +# End-to-end smoke test for auto-capture (docs/superpowers/specs/ +# 2026-07-01-vouch-session-autocapture-design.md). +# +# Drives the real `vouch` CLI through the three hook payloads Claude Code would +# send — PostToolUse (observe), SessionEnd (finalize), SessionStart (banner) — +# in a throwaway KB, and asserts the review-gated outcome. No network, no LLM. +# +# Usage: +# scripts/smoke-capture.sh # uses `vouch` on PATH +# VOUCH=.venv/bin/vouch scripts/smoke-capture.sh # a specific binary +# VOUCH="python -m vouch" scripts/smoke-capture.sh +# make smoke-capture +# +# Exits 0 if every check passes, 1 otherwise. + +set -uo pipefail + +VOUCH="${VOUCH:-vouch}" +# Absolutize a path-form first token (e.g. .venv/bin/vouch) so the checks work +# no matter the caller's cwd. A bare name on PATH or a module form +# ("python -m vouch") is left as-is; multi-word commands run via `V()` below. +set -- $VOUCH +_first="$1"; shift +case "$_first" in + */*) _first="$(cd "$(dirname "$_first")" && pwd)/$(basename "$_first")" ;; +esac +VOUCH="$_first${*:+ $*}" + +# Run the vouch CLI (unquoted expansion supports a multi-word command). +V() { $VOUCH "$@"; } + +PASS=0 +FAIL=0 +green() { printf '\033[32m%s\033[0m\n' "$1"; } +red() { printf '\033[31m%s\033[0m\n' "$1"; } + +check() { # check "description" "expected" "actual" + if [ "$2" = "$3" ]; then + green "PASS $1"; PASS=$((PASS + 1)) + else + red "FAIL $1"; red " expected: $2"; red " actual: $3"; FAIL=$((FAIL + 1)) + fi +} + +contains() { # contains "description" "haystack" "needle" + case "$2" in + *"$3"*) green "PASS $1"; PASS=$((PASS + 1)) ;; + *) red "FAIL $1 (missing: $3)"; FAIL=$((FAIL + 1)) ;; + esac +} + +if ! V --version >/dev/null 2>&1; then + red "cannot run vouch (set VOUCH=/path/to/vouch or 'python -m vouch'). tried: $VOUCH" + exit 1 +fi + +WORK="$(mktemp -d)" +WORK2="$(mktemp -d)" +cleanup() { rm -rf "$WORK" "$WORK2"; } +trap cleanup EXIT + +SID="cc-smoke-$$" +echo "workspace: $WORK" +echo "session: $SID" +echo + +# --- init a throwaway KB (no cd needed: --path + VOUCH_KB_PATH) ------------- +V init --path "$WORK" >/dev/null 2>&1 +export VOUCH_KB_PATH="$WORK/.vouch" +check "vouch init created .vouch/" "yes" "$([ -d "$WORK/.vouch" ] && echo yes || echo no)" +contains "captures/ is gitignored" "$(cat "$WORK/.vouch/.gitignore")" "captures/" + +# --- PostToolUse x3 (observe) --------------------------------------------- +obs() { echo "$1" | V capture observe; } +obs '{"session_id":"'"$SID"'","tool_name":"Edit","tool_input":{"file_path":"/p/auth.py"},"tool_response":"ok"}' +obs '{"session_id":"'"$SID"'","tool_name":"Bash","tool_input":{"command":"pytest -q"},"tool_response":"1 failed, error"}' +obs '{"session_id":"'"$SID"'","tool_name":"Write","tool_input":{"file_path":"/p/README.md"},"tool_response":"done"}' + +BUF="$WORK/.vouch/captures/$SID.jsonl" +check "buffer has 3 observations" "3" "$([ -f "$BUF" ] && wc -l < "$BUF" | tr -d ' ' || echo 0)" +contains "observation summarizes the edit" "$(cat "$BUF" 2>/dev/null)" "Edited auth.py" +contains "failed command is flagged" "$(cat "$BUF" 2>/dev/null)" "Command failed" + +# an unobserved (mcp) tool must NOT be captured +obs '{"session_id":"'"$SID"'","tool_name":"mcp__vouch__kb_search","tool_input":{},"tool_response":"x"}' +check "mcp tool is ignored (still 3 lines)" "3" "$(wc -l < "$BUF" | tr -d ' ')" + +# garbage stdin must never error (a hook must not break the tool call) +echo 'not json' | V capture observe; rc=$? +check "observe survives garbage stdin (exit 0)" "0" "$rc" + +# --- SessionEnd (finalize) ------------------------------------------------- +FIN="$(echo '{"session_id":"'"$SID"'","cwd":"'"$WORK"'"}' | V capture finalize)" +contains "finalize reports captured:3" "$FIN" '"captured": 3' +contains "finalize returns a summary_proposal_id" "$FIN" '"summary_proposal_id":' +check "buffer file removed after finalize" "gone" "$([ -f "$BUF" ] && echo present || echo gone)" + +# --- review gate: PENDING, authored by vouch-capture, not auto-approved ----- +PEND="$(V pending 2>/dev/null)" +contains "summary is in the pending queue" "$PEND" "by vouch-capture" +contains "summary is a page proposal" "$PEND" "[page]" + +# our summary (body carries the session id) must live in proposed/, not pages/. +# (vouch init seeds an unrelated starter page, so we key on our session id.) +OUR_APPROVED="$(grep -rl "$SID" "$WORK/.vouch/pages" 2>/dev/null | wc -l | tr -d ' ')" +check "our summary is NOT auto-approved (review gate intact)" "0" "$OUR_APPROVED" +N_PROPOSED="$(find "$WORK/.vouch/proposed" -name '*.yaml' 2>/dev/null | wc -l | tr -d ' ')" +check "summary sits in proposed/ awaiting review" "1" "$N_PROPOSED" + +# --- SessionStart (banner) ------------------------------------------------- +BANNER="$(V capture banner)" +contains "banner nudges to review" "$BANNER" "awaiting review" + +# --- disabled mode is a no-op --------------------------------------------- +V init --path "$WORK2" >/dev/null 2>&1 +printf 'capture:\n enabled: false\n' >> "$WORK2/.vouch/config.yaml" +export VOUCH_KB_PATH="$WORK2/.vouch" +echo '{"session_id":"off","tool_name":"Edit","tool_input":{"file_path":"/p/x.py"},"tool_response":"ok"}' \ + | V capture observe +check "capture.enabled:false writes no buffer" "gone" \ + "$([ -f "$WORK2/.vouch/captures/off.jsonl" ] && echo present || echo gone)" + +# --- report ---------------------------------------------------------------- +echo +echo "-----------------------------------------" +if [ "$FAIL" -eq 0 ]; then + green "ALL $PASS CHECKS PASSED"; exit 0 +else + red "$FAIL FAILED, $PASS passed"; exit 1 +fi diff --git a/scripts/smoke-recall.sh b/scripts/smoke-recall.sh new file mode 100755 index 00000000..02230e05 --- /dev/null +++ b/scripts/smoke-recall.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# +# End-to-end smoke test for session-start recall (docs/superpowers/specs/ +# 2026-07-01-vouch-session-autocapture-design.md). +# +# Seeds a throwaway KB with approved knowledge via the real CLI, then checks +# that `vouch recall` — the command the SessionStart hook runs — emits a digest +# a new Claude session can consume: every live approved claim + page title, +# archived claims excluded, opt-out honoured. No network, no LLM. +# +# Usage: +# scripts/smoke-recall.sh # uses `vouch` on PATH +# VOUCH=.venv/bin/vouch scripts/smoke-recall.sh +# make smoke-recall +# +# Exits 0 if every check passes, 1 otherwise. + +set -uo pipefail + +VOUCH="${VOUCH:-vouch}" +set -- $VOUCH +_first="$1"; shift +case "$_first" in + */*) _first="$(cd "$(dirname "$_first")" && pwd)/$(basename "$_first")" ;; +esac +VOUCH="$_first${*:+ $*}" +V() { $VOUCH "$@"; } + +PASS=0 +FAIL=0 +green() { printf '\033[32m%s\033[0m\n' "$1"; } +red() { printf '\033[31m%s\033[0m\n' "$1"; } + +check() { # check "desc" "expected" "actual" + if [ "$2" = "$3" ]; then green "PASS $1"; PASS=$((PASS + 1)) + else red "FAIL $1"; red " expected: $2"; red " actual: $3"; FAIL=$((FAIL + 1)); fi +} +contains() { # contains "desc" "haystack" "needle" + case "$2" in *"$3"*) green "PASS $1"; PASS=$((PASS + 1)) ;; + *) red "FAIL $1 (missing: $3)"; FAIL=$((FAIL + 1)) ;; esac +} +absent() { # absent "desc" "haystack" "needle" + case "$2" in *"$3"*) red "FAIL $1 (should be absent: $3)"; FAIL=$((FAIL + 1)) ;; + *) green "PASS $1"; PASS=$((PASS + 1)) ;; esac +} + +if ! V --version >/dev/null 2>&1; then + red "cannot run vouch (set VOUCH=/path/to/vouch or 'python -m vouch'). tried: $VOUCH" + exit 1 +fi + +WORK="$(mktemp -d)" +cleanup() { rm -rf "$WORK"; } +trap cleanup EXIT + +V init --path "$WORK" >/dev/null 2>&1 +export VOUCH_KB_PATH="$WORK/.vouch" + +# --- seed approved knowledge via the real CLI ------------------------------ +printf 'team decision doc\n' > "$WORK/evidence.txt" +SRC="$(V source add "$WORK/evidence.txt")" +check "registered a source" "yes" "$([ -n "$SRC" ] && echo yes || echo no)" + +# propose as an agent, approve as the human — self-approval is forbidden. +approve_claim() { # approve_claim "text" -> prints claim id + local prop cid + prop="$(VOUCH_AGENT=claude-code V propose-claim --text "$1" --source "$SRC" 2>/dev/null)" + cid="$(V approve "$prop" 2>/dev/null | sed -n 's#.*claim/##p')" + echo "$cid" +} + +approve_claim "use ruff not flake8 for linting" >/dev/null +approve_claim "jwt over sessions for the microservices auth" >/dev/null +ARCH_ID="$(approve_claim "ephemeral fact to be archived")" + +PROP_PAGE="$(VOUCH_AGENT=claude-code V propose-page --title "auth design record" --body "the why behind auth" 2>/dev/null)" +V approve "$PROP_PAGE" >/dev/null 2>&1 + +# archive one claim — it must drop out of the digest +V archive "$ARCH_ID" >/dev/null 2>&1 + +# --- what a new session gets injected -------------------------------------- +DIGEST="$(V recall)" +contains "digest is wrapped in the injection tag" "$DIGEST" "" +contains "digest lists an approved claim" "$DIGEST" "use ruff not flake8 for linting" +contains "digest lists the second approved claim" "$DIGEST" "jwt over sessions for the microservices auth" +contains "digest lists the approved page title" "$DIGEST" "auth design record" +absent "archived claim is excluded" "$DIGEST" "ephemeral fact to be archived" + +# --- opt-out --------------------------------------------------------------- +printf 'recall:\n enabled: false\n' >> "$WORK/.vouch/config.yaml" +OFF="$(V recall)" +check "recall.enabled:false emits nothing" "" "$(printf '%s' "$OFF" | tr -d '[:space:]')" + +# --- adapter wiring -------------------------------------------------------- +REPO="$(cd "$(dirname "$0")/.." && pwd)" +HOOKED="$(python3 -c "import json;h=json.load(open('$REPO/adapters/claude-code/.claude/settings.json'))['hooks']['SessionStart'];print(any('vouch recall' in c.get('command','') for g in h for c in g['hooks']))" 2>/dev/null)" +check "SessionStart hook runs 'vouch recall'" "True" "$HOOKED" + +# --- report ---------------------------------------------------------------- +echo +echo "-----------------------------------------" +if [ "$FAIL" -eq 0 ]; then green "ALL $PASS CHECKS PASSED"; exit 0 +else red "$FAIL FAILED, $PASS passed"; exit 1; fi diff --git a/src/vouch/__init__.py b/src/vouch/__init__.py index 449b629c..4e2a86c7 100644 --- a/src/vouch/__init__.py +++ b/src/vouch/__init__.py @@ -1,3 +1,3 @@ """vouch — git-native, review-gated knowledge base for LLM agents.""" -__version__ = "0.0.1" +__version__ = "1.0.0" diff --git a/src/vouch/__main__.py b/src/vouch/__main__.py new file mode 100644 index 00000000..ba3ed6f2 --- /dev/null +++ b/src/vouch/__main__.py @@ -0,0 +1,6 @@ +"""Enable ``python -m vouch`` as an alias for the ``vouch`` console script.""" + +from vouch.cli import cli + +if __name__ == "__main__": + cli() diff --git a/src/vouch/auto_pr.py b/src/vouch/auto_pr.py index c9edd2ac..943fbb5a 100644 --- a/src/vouch/auto_pr.py +++ b/src/vouch/auto_pr.py @@ -319,10 +319,10 @@ def detect_or_bootstrap_guidance(ctx: RepoCtx, engine: Engine, "---\nname: auto-pr-contributing\ndescription: synthesized contribution " f"guide for {ctx.repo}, derived from merged PRs.\n---\n\n" ) - skill.write_text(front + guide) + skill.write_text(front + guide, encoding="utf-8") codex_mirror = ctx.clone_dir / ".codex" / "auto-pr-contributing.md" codex_mirror.parent.mkdir(parents=True, exist_ok=True) - codex_mirror.write_text(guide) + codex_mirror.write_text(guide, encoding="utf-8") return guide diff --git a/src/vouch/capabilities.py b/src/vouch/capabilities.py index 39872ade..2efc39a3 100644 --- a/src/vouch/capabilities.py +++ b/src/vouch/capabilities.py @@ -69,6 +69,8 @@ "kb.impact", "kb.graph_export", "kb.provenance_rebuild", + "kb.detect_themes", + "kb.propose_theme", ] diff --git a/src/vouch/capture.py b/src/vouch/capture.py new file mode 100644 index 00000000..79f2623c --- /dev/null +++ b/src/vouch/capture.py @@ -0,0 +1,363 @@ +"""Auto-capture Claude Code sessions into review-gated summaries. + +Passive harvest -> mechanical rollup -> one PENDING page proposal. No LLM. +`observe` appends compact observations to an ephemeral, gitignored scratch +buffer (`.vouch/captures/.jsonl`); `finalize` rolls the buffer plus a +git-diff backstop into a single session-summary page proposal that a human +approves like any other write. Never calls approve() — the review gate stays +intact. See docs/superpowers/specs/2026-07-01-vouch-session-autocapture-design.md +""" + +from __future__ import annotations + +import json +import subprocess +import time +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import yaml + +from .models import ProposalStatus +from .proposals import propose_page +from .storage import KBStore + +DEFAULT_ENABLED = True +DEFAULT_MIN_OBSERVATIONS = 3 +DEFAULT_DEDUP_WINDOW_SECONDS = 60.0 +CAPTURE_ACTOR = "vouch-capture" +CAPTURE_PAGE_TYPE = "session" + + +@dataclass(frozen=True) +class CaptureConfig: + enabled: bool = DEFAULT_ENABLED + min_observations: int = DEFAULT_MIN_OBSERVATIONS + dedup_window_seconds: float = DEFAULT_DEDUP_WINDOW_SECONDS + + +def load_config(store: KBStore) -> CaptureConfig: + """Read ``capture:`` from config.yaml; fall back to defaults.""" + try: + loaded = yaml.safe_load(store.config_path.read_text()) + except (OSError, yaml.YAMLError): + return CaptureConfig() + if not isinstance(loaded, dict): + return CaptureConfig() + raw = loaded.get("capture") + if not isinstance(raw, dict): + return CaptureConfig() + return CaptureConfig( + enabled=bool(raw.get("enabled", DEFAULT_ENABLED)), + min_observations=int(raw.get("min_observations", DEFAULT_MIN_OBSERVATIONS)), + dedup_window_seconds=float( + raw.get("dedup_window_seconds", DEFAULT_DEDUP_WINDOW_SECONDS) + ), + ) + + +def captures_dir(store: KBStore) -> Path: + return store.kb_dir / "captures" + + +def buffer_path(store: KBStore, session_id: str) -> Path: + safe = session_id.replace("/", "_").replace("..", "_").strip() or "unknown" + return captures_dir(store) / f"{safe}.jsonl" + + +_OBSERVED_TOOLS = frozenset({ + "Read", "Edit", "Write", "Update", "Bash", + "Grep", "Glob", "WebFetch", "WebSearch", "Task", "NotebookEdit", +}) + + +def _read_observations(path: Path) -> list[dict[str, Any]]: + if not path.exists(): + return [] + out: list[dict[str, Any]] = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + if isinstance(obj, dict): + out.append(obj) + return out + + +def _dedup_key(tool: str, summary: str) -> str: + return f"{tool}\x00{summary}" + + +def observe( + store: KBStore, + session_id: str, + *, + tool: str, + summary: str, + files: list[str] | None = None, + cmd: str | None = None, + now: float | None = None, + config: CaptureConfig | None = None, +) -> bool: + """Append one observation to the session buffer. Returns True if written.""" + cfg = config or load_config(store) + if not cfg.enabled: + return False + ts = time.time() if now is None else now + path = buffer_path(store, session_id) + key = _dedup_key(tool, summary) + for obs in reversed(_read_observations(path)): + if ts - float(obs.get("ts", 0.0)) > cfg.dedup_window_seconds: + break + if _dedup_key(str(obs.get("tool", "")), str(obs.get("summary", ""))) == key: + return False + record: dict[str, Any] = {"ts": ts, "tool": tool, "summary": summary} + if files: + record["files"] = files + if cmd: + record["cmd"] = cmd + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(record, sort_keys=True) + "\n") + return True + + +def _basename(path: str) -> str: + return path.rsplit("/", 1)[-1] or path + + +def summarize_tool( + tool_name: str | None, + tool_input: dict[str, Any] | None, + tool_response: object, +) -> dict[str, Any] | None: + """Turn a PostToolUse payload into a compact observation, or None to skip.""" + if not tool_name or tool_name not in _OBSERVED_TOOLS: + return None + ti = tool_input or {} + out: dict[str, Any] = {"tool": tool_name} + fp = ti.get("file_path") + if isinstance(fp, str) and fp: + out["files"] = [fp] + if tool_name in {"Read", "Edit", "Write", "Update", "NotebookEdit"}: + name = _basename(fp) if isinstance(fp, str) and fp else "file" + verb = {"Read": "Read", "Write": "Created"}.get(tool_name, "Edited") + out["summary"] = f"{verb} {name}" + elif tool_name == "Bash": + cmd = ti.get("command") + short = str(cmd).splitlines()[0][:60] if cmd else "command" + if cmd: + out["cmd"] = str(cmd)[:200] + text = str(tool_response).lower() + failed = "error" in text or "failed" in text + out["summary"] = f"Command failed: {short}" if failed else f"Ran: {short}" + elif tool_name in {"Grep", "Glob"}: + out["summary"] = f"{tool_name} {str(ti.get('pattern', ''))[:40]}" + elif tool_name in {"WebFetch", "WebSearch"}: + target = ti.get("url") or ti.get("query") or "" + out["summary"] = f"Fetched: {str(target)[:60]}" + else: # Task + out["summary"] = f"{tool_name} completed" + return out + + +def _git_changes(cwd: Path) -> tuple[list[str], str]: + """Return (changed_files, diff_stat). Empty on any failure / non-repo.""" + try: + names = subprocess.run( + ["git", "diff", "--name-only", "HEAD"], + cwd=cwd, capture_output=True, text=True, timeout=3, check=False, + ) + except (OSError, subprocess.SubprocessError): + return [], "" + files = [f for f in names.stdout.splitlines() if f.strip()] + if not files: + return [], "" + try: + stat = subprocess.run( + ["git", "diff", "HEAD", "--stat"], + cwd=cwd, capture_output=True, text=True, timeout=3, check=False, + ).stdout.strip() + except (OSError, subprocess.SubprocessError): + stat = "" + return files, stat + + +def build_summary_body( + session_id: str, + observations: list[dict[str, Any]], + changed_files: list[str], + git_stat: str, + *, + project: str | None = None, + generated_at: str | None = None, +) -> tuple[str, str]: + tool_counts: dict[str, int] = {} + files: set[str] = set(changed_files) + commands: list[str] = [] + for obs in observations: + tool = str(obs.get("tool", "")) + tool_counts[tool] = tool_counts.get(tool, 0) + 1 + for f in obs.get("files") or []: + files.add(str(f)) + cmd = obs.get("cmd") + if cmd: + commands.append(str(cmd)) + title = f"session summary: {project or 'workspace'} ({session_id})" + lines: list[str] = [f"# {title}", ""] + if generated_at: + lines.append(f"- generated: {generated_at}") + lines += [f"- session: `{session_id}`", f"- observations: {len(observations)}", ""] + if files: + lines += ["## files modified this session", ""] + lines += [f"- {f}" for f in sorted(files)[:20]] + lines.append("") + if git_stat: + lines += ["## git changes", "", "```", git_stat, "```", ""] + if tool_counts: + lines += ["## activity", ""] + lines += [f"- {t}: {tool_counts[t]}" for t in sorted(tool_counts)] + lines.append("") + if commands: + lines += ["## notable commands", ""] + lines += [f"- `{c}`" for c in commands[:10]] + lines.append("") + if observations: + lines += ["## observations", ""] + lines += [f"- {o.get('summary', '')}" for o in observations[:30]] + lines.append("") + return title, "\n".join(lines).rstrip() + "\n" + + +def finalize( + store: KBStore, + session_id: str, + *, + cwd: Path | None = None, + project: str | None = None, + generated_at: str | None = None, + config: CaptureConfig | None = None, +) -> dict[str, Any]: + """Roll a session buffer into one PENDING summary proposal. No approve(). + + If cwd is None (e.g., when finalizing orphaned buffers with unknown origin), + git changes are not included. Otherwise, git changes from cwd are included. + """ + cfg = config or load_config(store) + path = buffer_path(store, session_id) + observations = _read_observations(path) + if not cfg.enabled: + return {"captured": len(observations), "summary_proposal_id": None, + "skipped": "disabled"} + # Only include git context if cwd is explicitly provided (known origin) + # For cleanup of orphaned buffers, cwd=None, so skip git context + if cwd is not None: + changed_files, git_stat = _git_changes(cwd) + else: + changed_files, git_stat = [], "" + total = len(observations) + len(changed_files) + if total < cfg.min_observations: + if path.exists(): + path.unlink() + return {"captured": total, "summary_proposal_id": None, + "skipped": "below-min"} + title, body = build_summary_body( + session_id, observations, changed_files, git_stat, + project=project, generated_at=generated_at, + ) + proposal = propose_page( + store, + title=title, + body=body, + page_type=CAPTURE_PAGE_TYPE, + proposed_by=CAPTURE_ACTOR, + session_id=session_id, + rationale="auto-captured session summary", + ) + if path.exists(): + path.unlink() + return {"captured": total, "summary_proposal_id": proposal.id} + + +def pending_count(store: KBStore) -> int: + return sum( + 1 for p in store.list_proposals(ProposalStatus.PENDING) + if p.proposed_by == CAPTURE_ACTOR + ) + + +def is_stale_buffer( + path: Path, + *, + max_age_seconds: float = 3600.0, + now_timestamp: float | None = None, +) -> bool: + """Check if a buffer file's mtime is older than max_age_seconds.""" + if not path.exists(): + return False + now = now_timestamp if now_timestamp is not None else time.time() + mtime = path.stat().st_mtime + age = now - mtime + return age > max_age_seconds + + +def finalize_all_except( + store: KBStore, + current_session_id: str, + *, + max_age_seconds: float = 3600.0, + cwd: Path | None = None, + now_timestamp: float | None = None, +) -> dict[str, Any]: + """Finalize all buffers except current_session_id, if they're older than max_age. + + Returns dict with keys: + - finalized: [session_id1, session_id2, ...] session IDs that were finalized + - skipped_recent: [id3, id4, ...] sessions too recent to finalize + - skipped_current: [id5] the current session (always skipped) + """ + finalized: list[str] = [] + skipped_recent: list[str] = [] + skipped_current: list[str] = [] + now = now_timestamp if now_timestamp is not None else time.time() + + caps_dir = captures_dir(store) + if not caps_dir.exists(): + return { + "finalized": finalized, + "skipped_recent": skipped_recent, + "skipped_current": skipped_current, + } + + for path in sorted(caps_dir.glob("*.jsonl")): + # Extract session ID from filename (e.g., "session-id.jsonl" -> "session-id") + session_id = path.stem + + if session_id == current_session_id: + skipped_current.append(session_id) + continue + + if is_stale_buffer(path, max_age_seconds=max_age_seconds, now_timestamp=now): + try: + finalize( + store, session_id, cwd=cwd, + generated_at=datetime.now(UTC).isoformat(), + ) + finalized.append(session_id) + except Exception: + # Never let a finalize failure break the scan + pass + else: + skipped_recent.append(session_id) + + return { + "finalized": finalized, + "skipped_recent": skipped_recent, + "skipped_current": skipped_current, + } diff --git a/src/vouch/cli.py b/src/vouch/cli.py index ca465cae..f8b4a0c8 100644 --- a/src/vouch/cli.py +++ b/src/vouch/cli.py @@ -14,6 +14,7 @@ from collections.abc import Iterator from contextlib import contextmanager from dataclasses import asdict +from datetime import UTC, datetime from pathlib import Path from typing import Any @@ -22,12 +23,14 @@ from . import __version__, bundle, health, volunteer_context from . import audit as audit_mod +from . import capture as capture_mod from . import install_adapter as install_mod from . import lifecycle as life from . import metrics as metrics_mod from . import migrations as migrations_mod from . import pr_cache as prc_mod from . import provenance as prov_mod +from . import recall as recall_mod from . import sessions as sess_mod from . import stats as stats_mod from . import sync as sync_mod @@ -1311,6 +1314,133 @@ def session_end_cmd(session_id: str, note: str | None) -> None: _emit_json({"session": sess.id, "proposals": sess.proposal_ids}) +@cli.group() +def capture() -> None: + """Automatic session capture (driven by claude code hooks).""" + + +def _capture_store() -> KBStore | None: + """Locate the KB without the sys.exit(2) that _load_store does — hooks + must never abort the host.""" + try: + return KBStore(discover_root()) + except KBNotFoundError: + return None + + +@capture.command("observe") +def capture_observe_cmd() -> None: + """Append one observation from a PostToolUse hook payload (stdin JSON).""" + if sys.stdin.isatty(): + return + try: + raw = sys.stdin.read() + payload = json.loads(raw) if raw.strip() else {} + if not isinstance(payload, dict): + return + session_id = str(payload.get("session_id") or "") + if not session_id: + return + tool_input = payload.get("tool_input") + obs = capture_mod.summarize_tool( + payload.get("tool_name"), + tool_input if isinstance(tool_input, dict) else {}, + payload.get("tool_response"), + ) + if obs is None: + return + store = _capture_store() + if store is None: + return + capture_mod.observe( + store, session_id, + tool=obs["tool"], summary=obs["summary"], + files=obs.get("files"), cmd=obs.get("cmd"), + ) + except Exception: + # a capture failure must never break the user's tool call. + return + + +@capture.command("finalize") +@click.option("--session-id", default=None, help="Session id (else read from stdin payload).") +def capture_finalize_cmd(session_id: str | None) -> None: + """Roll a session buffer into a PENDING summary (SessionEnd hook payload on stdin).""" + payload: dict[str, Any] = {} + if not sys.stdin.isatty(): + raw = sys.stdin.read() + if raw.strip(): + try: + loaded = json.loads(raw) + if isinstance(loaded, dict): + payload = loaded + except json.JSONDecodeError: + payload = {} + sid = session_id or str(payload.get("session_id") or "") + if not sid: + return + store = _capture_store() + if store is None: + return + cwd = Path(str(payload.get("cwd") or ".")).resolve() + result = capture_mod.finalize( + store, sid, cwd=cwd, project=cwd.name, + generated_at=datetime.now(UTC).isoformat(), + ) + _emit_json(result) + + +@capture.command("finalize-all") +@click.option("--session-id", default=None, help="Current session id (else env VOUCH_SESSION_ID).") +@click.option("--max-age-seconds", type=float, default=3600.0, help="Max age in seconds.") +def capture_finalize_all_cmd(session_id: str | None, max_age_seconds: float) -> None: + """Finalize all capture buffers except current session (SessionStart cleanup).""" + sid = session_id or os.environ.get("VOUCH_SESSION_ID") or "" + if not sid: + # No session ID provided; silently succeed + _emit_json({"finalized": [], "skipped_recent": [], "skipped_current": []}) + return + + store = _capture_store() + if store is None: + # No KB; silently succeed + _emit_json({"finalized": [], "skipped_recent": [], "skipped_current": []}) + return + + result = capture_mod.finalize_all_except( + store, sid, max_age_seconds=max_age_seconds, + ) + _emit_json(result) + + +@capture.command("banner") +def capture_banner_cmd() -> None: + """Emit a SessionStart nudge if captured summaries await review.""" + store = _capture_store() + if store is None: + return + n = capture_mod.pending_count(store) + if n: + click.echo( + f"🔔 {n} auto-captured session summary(ies) awaiting review — " + f"run `vouch review`." + ) + + +@cli.command(name="recall") +def recall_cmd() -> None: + """Emit a digest of all approved knowledge for session-start injection.""" + store = _capture_store() + if store is None: + return + cfg = recall_mod.load_config(store) + if not cfg.enabled: + return + digest = recall_mod.build_digest(store, max_chars=cfg.max_chars) + if digest.strip(): + click.echo(digest) + + @cli.command() @click.argument("session_id") @click.option("--no-page", is_flag=True, help="Skip the session-summary page.") @@ -1815,6 +1945,81 @@ def audit(tail: int, as_json: bool, project: str | None, agent: str | None) -> N ) +# --- cross-session themes ------------------------------------------------- + + +@cli.command(name="detect-themes") +@click.option("--min-sessions", default=None, type=int, help="Minimum sessions for a cluster.") +@click.option("--min-claims", default=None, type=int, help="Minimum claims for a cluster.") +@click.option("--top-k", default=None, type=int, help="Max clusters to return.") +@click.option("--json", "as_json", is_flag=True, help="Emit JSON.") +@click.option("--propose", is_flag=True, help="Propose theme pages for each cluster.") +@click.option("--agent", default=None, help="Agent name for proposals.") +def detect_themes_cmd( + min_sessions: int | None, + min_claims: int | None, + top_k: int | None, + as_json: bool, + propose: bool, + agent: str | None, +) -> None: + """Detect recurring entity clusters across completed sessions.""" + from . import themes + + store = _load_store() + result = themes.detect_themes( + store, + min_sessions=min_sessions, + min_claims=min_claims, + top_k=top_k, + ) + if as_json and not propose: + _emit_json({ + "clusters": [ + { + "entities": c.entities, + "claim_ids": c.claim_ids, + "session_ids": c.session_ids, + "score": c.score, + "session_count": c.session_count, + "claim_count": c.claim_count, + } + for c in result.clusters + ], + "config": result.config_used, + }) + return + if not result.clusters: + click.echo("no themes detected") + return + if propose: + actor = agent or _whoami() + proposed: list[dict] = [] + for cluster in result.clusters: + try: + p = themes.propose_theme(store, cluster, proposed_by=actor) + proposed.append(p) + if not as_json: + click.echo( + f"proposed: {p['theme_page_id']} " + f"({p['claim_count']} claims, " + f"{p['session_count']} sessions)" + ) + except Exception as e: + click.echo( + f"skip: {', '.join(cluster.entities)} — {e}", + err=True, + ) + if as_json: + _emit_json({"proposed": proposed}) + return + for i, c in enumerate(result.clusters, 1): + click.echo( + f"{i}. {', '.join(c.entities)} " + f"score={c.score} sessions={c.session_count} claims={c.claim_count}" + ) + + # --- export / import ------------------------------------------------------ @@ -2536,11 +2741,14 @@ def install_mcp( click.echo(f" + {f}") for f in result.appended: click.echo(f" ~ {f} (appended fenced block)") + for f in result.merged: + click.echo(f" ~ {f} (merged into existing)") for f in result.skipped: click.echo(f" · {f} (already present)") click.echo( f"Done — {len(result.written)} written, " - f"{len(result.appended)} appended, {len(result.skipped)} skipped " + f"{len(result.appended)} appended, {len(result.merged)} merged, " + f"{len(result.skipped)} skipped " f"under {target}" ) @@ -2797,7 +3005,7 @@ def review_ui( auth_note = " (Bearer auth on)" if token else "" if open_browser and is_loopback: # Lazy-import webbrowser; some CI envs (headless) don't have a default - # browser configured and webbrowser.open() returns False rather than + # browser configured and webbrowser.open(encoding="utf-8") returns False rather than # raising — that's fine, the URL is also printed to stdout. When auth # is on, hand the browser the token once via ?token= so it can stash it. import threading diff --git a/src/vouch/context.py b/src/vouch/context.py index 423c2fad..6e9b08ce 100644 --- a/src/vouch/context.py +++ b/src/vouch/context.py @@ -54,7 +54,7 @@ def _configured_backend(store: KBStore) -> str: falls back to "auto". """ try: - loaded = yaml.safe_load(store.config_path.read_text()) + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) except (OSError, yaml.YAMLError): return "auto" if not isinstance(loaded, dict): diff --git a/src/vouch/embeddings/scorer.py b/src/vouch/embeddings/scorer.py index 48dcd782..c408d74d 100644 --- a/src/vouch/embeddings/scorer.py +++ b/src/vouch/embeddings/scorer.py @@ -57,7 +57,7 @@ def evaluate( raise ValueError(f"unknown metric(s): {sorted(unknown)}; known: {sorted(known)}") totals = {m: 0.0 for m in metrics} n = 0 - with queries_file.open() as f: + with queries_file.open(encoding="utf-8") as f: for line in f: if not line.strip(): continue @@ -78,4 +78,4 @@ def evaluate( def write_report(out: dict[str, float], path: Path) -> None: - path.write_text(json.dumps(out, indent=2)) + path.write_text(json.dumps(out, indent=2), encoding="utf-8") diff --git a/src/vouch/embeddings/similarity.py b/src/vouch/embeddings/similarity.py index 69cf9ade..911ea621 100644 --- a/src/vouch/embeddings/similarity.py +++ b/src/vouch/embeddings/similarity.py @@ -23,7 +23,7 @@ def similarity_threshold(store: KBStore) -> float: """Resolve `review.similarity_threshold` from config, else dedup default.""" try: - loaded = yaml.safe_load(store.config_path.read_text()) + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) if isinstance(loaded, dict): review = loaded.get("review") if isinstance(review, dict) and review.get("similarity_threshold") is not None: diff --git a/src/vouch/health.py b/src/vouch/health.py index db2f1aa8..a22cef7e 100644 --- a/src/vouch/health.py +++ b/src/vouch/health.py @@ -97,7 +97,7 @@ def _load_claims_for_lint(store: KBStore) -> tuple[list[Claim], list[Finding]]: for p in sorted(cdir.glob("*.yaml")): cid = p.stem try: - valid.append(Claim.model_validate(_yaml_load(p.read_text()))) + valid.append(Claim.model_validate(_yaml_load(p.read_text(encoding="utf-8")))) except ValidationError as e: tail = str(e).splitlines()[-1].strip() if str(e) else "validation failed" findings.append( diff --git a/src/vouch/http_server.py b/src/vouch/http_server.py index 19ffdfcb..081cda24 100644 --- a/src/vouch/http_server.py +++ b/src/vouch/http_server.py @@ -122,7 +122,7 @@ def load_serve_config(path: Path) -> ServeConfig: if not path.exists(): return ServeConfig() try: - raw = yaml.safe_load(path.read_text()) or {} + raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {} except yaml.YAMLError as e: raise ServeConfigError(f"could not parse {path}: {e}") from e if not isinstance(raw, dict): diff --git a/src/vouch/index_db.py b/src/vouch/index_db.py index 24bfe9f1..146d9f52 100644 --- a/src/vouch/index_db.py +++ b/src/vouch/index_db.py @@ -173,7 +173,7 @@ def _snippet_for(kb_dir: Path, kind: str, eid: str) -> str: path = kb_dir / kind / f"{eid}.md" if not path.exists(): return eid - text = path.read_text() + text = path.read_text(encoding="utf-8") return text[:200].replace("\n", " ") diff --git a/src/vouch/install_adapter.py b/src/vouch/install_adapter.py index cdf4b6d2..c0835e3b 100644 --- a/src/vouch/install_adapter.py +++ b/src/vouch/install_adapter.py @@ -27,6 +27,7 @@ from __future__ import annotations +import json import shutil from dataclasses import dataclass, field from pathlib import Path @@ -62,6 +63,7 @@ class InstallResult: written: list[str] = field(default_factory=list) appended: list[str] = field(default_factory=list) skipped: list[str] = field(default_factory=list) + merged: list[str] = field(default_factory=list) @dataclass(frozen=True) @@ -69,6 +71,7 @@ class _FileEntry: src: str # path relative to the adapter directory dst: str # path relative to the target directory fenced_append: bool = False # CLAUDE.md-style: append inside our fence + json_merge: bool = False # settings.json-style: deep-merge into existing @dataclass(frozen=True) @@ -130,7 +133,10 @@ def _load_manifest(host: str) -> _Manifest: f"{host}: install.yaml tier {tier_name}: every entry needs a non-empty `dst`" ) fenced = bool(raw.get("fenced_append", False)) - parsed_entries.append(_FileEntry(src=src, dst=dst, fenced_append=fenced)) + json_merge = bool(raw.get("json_merge", False)) + parsed_entries.append( + _FileEntry(src=src, dst=dst, fenced_append=fenced, json_merge=json_merge) + ) if parsed_entries: parsed[tier_name] = parsed_entries @@ -211,6 +217,10 @@ def install(adapter: str, *, target: Path, tier: str = "T4") -> InstallResult: _install_fenced(src, dst, manifest, result, entry.dst) continue + if entry.json_merge: + _install_json_merge(src, dst, result, entry.dst) + continue + if dst.exists(): result.skipped.append(entry.dst) continue @@ -257,3 +267,122 @@ def _install_fenced( new_content = existing.rstrip() + "\n" + fenced_block dst.write_text(new_content, encoding="utf-8") result.appended.append(rel_dst) + + +def _event_commands(groups: Any) -> set[str]: + """Every hook ``command`` string already present under one hooks-event.""" + cmds: set[str] = set() + for group in groups or []: + if not isinstance(group, dict): + continue + for hook in group.get("hooks", []) or []: + if isinstance(hook, dict) and isinstance(hook.get("command"), str): + cmds.add(hook["command"]) + return cmds + + +def _merge_settings(src: dict[str, Any], dst: dict[str, Any]) -> bool: + """Merge our ``permissions.allow`` + ``hooks`` into an existing settings + dict in place. Returns True if ``dst`` changed. Idempotent: re-merging the + same ``src`` is a no-op because every command / permission is deduped. + """ + changed = False + + # permissions.allow — union, preserving the user's order. + src_perms = src.get("permissions") + if isinstance(src_perms, dict) and isinstance(src_perms.get("allow"), list): + dst_perms = dst.get("permissions") + if not isinstance(dst_perms, dict): + dst_perms = {} + dst["permissions"] = dst_perms + dst_allow = dst_perms.get("allow") + if not isinstance(dst_allow, list): + dst_allow = [] + dst_perms["allow"] = dst_allow + seen = set(dst_allow) + for item in src_perms["allow"]: + if item not in seen: + dst_allow.append(item) + seen.add(item) + changed = True + + # hooks — per event, add only commands not already present. Prefer folding + # into an existing group with the same matcher so we don't fan out groups. + src_hooks = src.get("hooks") + if isinstance(src_hooks, dict): + dst_hooks = dst.get("hooks") + if not isinstance(dst_hooks, dict): + dst_hooks = {} + dst["hooks"] = dst_hooks + for event, src_groups in src_hooks.items(): + if not isinstance(src_groups, list): + continue + dst_groups = dst_hooks.get(event) + if not isinstance(dst_groups, list): + dst_groups = [] + dst_hooks[event] = dst_groups + present = _event_commands(dst_groups) + for group in src_groups: + if not isinstance(group, dict): + continue + fresh = [ + hook for hook in group.get("hooks", []) or [] + if isinstance(hook, dict) and hook.get("command") not in present + ] + if not fresh: + continue + matcher = group.get("matcher") + target_group = next( + (g for g in dst_groups + if isinstance(g, dict) and g.get("matcher") == matcher), + None, + ) + if target_group is not None: + target_group.setdefault("hooks", []).extend(fresh) + else: + new_group = {k: v for k, v in group.items() if k != "hooks"} + new_group["hooks"] = fresh + dst_groups.append(new_group) + present.update( + h["command"] for h in fresh if isinstance(h.get("command"), str) + ) + changed = True + + return changed + + +def _install_json_merge( + src: Path, dst: Path, result: InstallResult, rel_dst: str +) -> None: + """settings.json-style: deep-merge our hooks + permissions into a + pre-existing JSON file instead of skipping it. + + States: + + * dst missing -> copy fresh (``written``) + * dst exists, merge adds keys -> merge + rewrite (``merged``) + * dst exists, nothing to add -> skip (``skipped``); already installed + * dst exists, unparseable -> skip (``skipped``); never clobber the user + """ + if not dst.exists(): + dst.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dst) + result.written.append(rel_dst) + return + + try: + dst_data = json.loads(dst.read_text(encoding="utf-8")) + src_data = json.loads(src.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + # Malformed or unreadable user file — leave it untouched. + result.skipped.append(rel_dst) + return + if not isinstance(dst_data, dict) or not isinstance(src_data, dict): + result.skipped.append(rel_dst) + return + + if _merge_settings(src_data, dst_data): + dst.write_text(json.dumps(dst_data, indent=2) + "\n", encoding="utf-8") + result.merged.append(rel_dst) + else: + result.skipped.append(rel_dst) diff --git a/src/vouch/jsonl_server.py b/src/vouch/jsonl_server.py index ec738c6b..5f42e16b 100644 --- a/src/vouch/jsonl_server.py +++ b/src/vouch/jsonl_server.py @@ -164,7 +164,7 @@ def _h_search(p: dict) -> dict: def _load_cfg(store: KBStore) -> dict: try: - loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text()) + loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text(encoding="utf-8")) except Exception: return {} return loaded if isinstance(loaded, dict) else {} @@ -632,6 +632,47 @@ def _h_provenance_rebuild(_: dict) -> dict: return {"edges": prov.rebuild_prov_edges(_store())} +def _h_detect_themes(p: dict) -> dict: + from . import themes + + result = themes.detect_themes( + _store(), + min_sessions=p.get("min_sessions"), + min_claims=p.get("min_claims"), + top_k=p.get("top_k"), + ) + return { + "clusters": [ + { + "entities": c.entities, + "claim_ids": c.claim_ids, + "session_ids": c.session_ids, + "score": c.score, + "session_count": c.session_count, + "claim_count": c.claim_count, + } + for c in result.clusters + ], + "config": result.config_used, + } + + +def _h_propose_theme(p: dict) -> dict: + from . import themes + + store = _store() + actor = p.get("agent") or os.environ.get("VOUCH_AGENT", "unknown-agent") + cluster = themes.ThemeCluster( + entities=p["entities"], + claim_ids=p["claim_ids"], + session_ids=p.get("session_ids", []), + score=float(p.get("score", 0.0)), + session_count=len(p.get("session_ids", [])), + claim_count=len(p["claim_ids"]), + ) + return themes.propose_theme(store, cluster, proposed_by=actor) + + HANDLERS: dict[str, Callable[[dict], Any]] = { "kb.capabilities": _h_capabilities, "kb.status": _h_status, @@ -687,6 +728,8 @@ def _h_provenance_rebuild(_: dict) -> dict: "kb.impact": _h_impact, "kb.graph_export": _h_graph_export, "kb.provenance_rebuild": _h_provenance_rebuild, + "kb.detect_themes": _h_detect_themes, + "kb.propose_theme": _h_propose_theme, } diff --git a/src/vouch/migrations/_legacy.py b/src/vouch/migrations/_legacy.py index 426b5ac9..6b9dab10 100644 --- a/src/vouch/migrations/_legacy.py +++ b/src/vouch/migrations/_legacy.py @@ -55,7 +55,7 @@ class MigrationResult: def read_config(store: KBStore) -> dict[str, Any]: if not store.config_path.exists(): return {} - loaded = yaml.safe_load(store.config_path.read_text()) + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) if loaded is None: return {} if not isinstance(loaded, dict): @@ -64,7 +64,8 @@ def read_config(store: KBStore) -> dict[str, Any]: def write_config(store: KBStore, config: dict[str, Any]) -> None: - store.config_path.write_text(yaml.safe_dump(config, sort_keys=False, allow_unicode=True)) + store.config_path.write_text( + yaml.safe_dump(config, sort_keys=False, allow_unicode=True), encoding="utf-8") def detect_version(store: KBStore) -> int: @@ -162,14 +163,14 @@ def _migration_0_to_1(store: KBStore, dry_run: bool) -> list[str]: required_ignores = ("proposed/", "state.db", "state.db-*") existing_ignores: list[str] = [] if gitignore_path.exists(): - existing_ignores = gitignore_path.read_text().splitlines() + existing_ignores = gitignore_path.read_text(encoding="utf-8").splitlines() missing_ignores = [line for line in required_ignores if line not in existing_ignores] if missing_ignores: changes.append("ensure .gitignore excludes proposed/ and state.db") if not dry_run: gitignore_path.parent.mkdir(parents=True, exist_ok=True) lines = [*existing_ignores, *missing_ignores] - gitignore_path.write_text("\n".join(lines).rstrip() + "\n") + gitignore_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") return changes diff --git a/src/vouch/migrations/journal.py b/src/vouch/migrations/journal.py index 13d28e53..7a152da2 100644 --- a/src/vouch/migrations/journal.py +++ b/src/vouch/migrations/journal.py @@ -60,7 +60,7 @@ def latest_journal(store: KBStore) -> Path | None: def read_journal(path: Path) -> tuple[dict[str, object], list[JournalEntry]]: header: dict[str, object] = {} entries: list[JournalEntry] = [] - for line in path.read_text().splitlines(): + for line in path.read_text(encoding="utf-8").splitlines(): if not line.strip(): continue rec = json.loads(line) diff --git a/src/vouch/migrations/manifest.py b/src/vouch/migrations/manifest.py index b67ddda5..171a4a4b 100644 --- a/src/vouch/migrations/manifest.py +++ b/src/vouch/migrations/manifest.py @@ -70,7 +70,7 @@ def _validate_transforms(transforms: Any, where: str) -> list[dict[str, Any]]: def parse_manifest(path: Path) -> Manifest: try: - data = yaml.safe_load(path.read_text()) + data = yaml.safe_load(path.read_text(encoding="utf-8")) except yaml.YAMLError as e: raise ManifestError(f"{path.name}: invalid yaml: {e}") from e if not isinstance(data, dict): diff --git a/src/vouch/migrations/runner.py b/src/vouch/migrations/runner.py index 1192fead..6db98357 100644 --- a/src/vouch/migrations/runner.py +++ b/src/vouch/migrations/runner.py @@ -88,7 +88,7 @@ def _changed_files(store: KBStore, manifest: Manifest) -> list[tuple[Path, str, """Files whose content the manifest would actually change: (path, old, new).""" out: list[tuple[Path, str, str]] = [] for path in artifact_files(store.kb_dir, manifest.artifact): - old = path.read_text() + old = path.read_text(encoding="utf-8") new = transform_text(old, manifest.artifact, manifest.transforms) if new != old: out.append((path, old, new)) @@ -311,13 +311,13 @@ def verify(store: KBStore) -> dict[str, Any]: for path in artifact_files(store.kb_dir, kind): checked += 1 try: - model.model_validate(_yaml_load(path.read_text())) + model.model_validate(_yaml_load(path.read_text(encoding="utf-8"))) except Exception as e: errors.append({"path": str(path.relative_to(store.kb_dir)), "error": str(e)}) for path in artifact_files(store.kb_dir, "pages"): checked += 1 try: - match = _FRONTMATTER_RE.match(path.read_text()) + match = _FRONTMATTER_RE.match(path.read_text(encoding="utf-8")) front = _yaml_load(match.group(1)) if match else {} Page.model_validate({**(front or {}), "body": match.group(2) if match else ""}) except Exception as e: @@ -325,7 +325,7 @@ def verify(store: KBStore) -> dict[str, Any]: for meta in sorted((store.kb_dir / "sources").glob("*/meta.yaml")): checked += 1 try: - Source.model_validate(_yaml_load(meta.read_text())) + Source.model_validate(_yaml_load(meta.read_text(encoding="utf-8"))) except Exception as e: errors.append({"path": str(meta.relative_to(store.kb_dir)), "error": str(e)}) return { diff --git a/src/vouch/migrations/schema.py b/src/vouch/migrations/schema.py index 7ef9681c..4660d941 100644 --- a/src/vouch/migrations/schema.py +++ b/src/vouch/migrations/schema.py @@ -25,7 +25,7 @@ def read_schema_version(store: KBStore) -> str: p = schema_version_path(store) if not p.exists(): return BASELINE_SCHEMA_VERSION - raw = p.read_text().strip() + raw = p.read_text(encoding="utf-8").strip() if not raw: return BASELINE_SCHEMA_VERSION semver.parse(raw) # validate; raises ValueError on garbage diff --git a/src/vouch/models.py b/src/vouch/models.py index 23f94aa4..dccf4453 100644 --- a/src/vouch/models.py +++ b/src/vouch/models.py @@ -135,6 +135,7 @@ class PageType(StrEnum): LOG = "log" REPORT = "report" SOURCE_SUMMARY = "source-summary" + THEME = "theme" class PageStatus(StrEnum): diff --git a/src/vouch/openclaw/context_engine.py b/src/vouch/openclaw/context_engine.py index 6647cd1b..323bfaa8 100644 --- a/src/vouch/openclaw/context_engine.py +++ b/src/vouch/openclaw/context_engine.py @@ -99,7 +99,7 @@ def resolve_kb_root(*, workspace_dir: Path | None, kb_path: str | None) -> Path: def load_cfg(store: KBStore) -> dict[str, Any]: try: - loaded = yaml.safe_load(store.config_path.read_text()) + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) except Exception: return {} return loaded if isinstance(loaded, dict) else {} diff --git a/src/vouch/page_kinds.py b/src/vouch/page_kinds.py index c675ccde..2f51c506 100644 --- a/src/vouch/page_kinds.py +++ b/src/vouch/page_kinds.py @@ -224,7 +224,7 @@ def _read_page_kinds(store: KBStore) -> dict[str, Any]: if not path.exists(): return {} try: - loaded = yaml.safe_load(path.read_text()) + loaded = yaml.safe_load(path.read_text(encoding="utf-8")) except yaml.YAMLError: return {} if not isinstance(loaded, dict): diff --git a/src/vouch/pr_cache.py b/src/vouch/pr_cache.py index 9958221c..3f499eab 100644 --- a/src/vouch/pr_cache.py +++ b/src/vouch/pr_cache.py @@ -460,7 +460,7 @@ def _record_from_json(d: dict[str, Any]) -> PRRecord: def load_cache(path: Path) -> dict[str, PRRecord]: if not path.exists(): return {} - raw = json.loads(path.read_text()) + raw = json.loads(path.read_text(encoding="utf-8")) return {str(d["number"]): _record_from_json(d) for d in raw.get("prs", [])} @@ -475,7 +475,7 @@ def save_cache(path: Path, repo: RepoRef, records: dict[str, PRRecord]) -> None: "prs": [_record_to_json(r) for r in prs_sorted], } tmp = path.with_suffix(path.suffix + ".tmp") - tmp.write_text(json.dumps(payload, indent=2, sort_keys=False)) + tmp.write_text(json.dumps(payload, indent=2, sort_keys=False), encoding="utf-8") tmp.replace(path) diff --git a/src/vouch/proposals.py b/src/vouch/proposals.py index 22d71d4f..6f1dd25a 100644 --- a/src/vouch/proposals.py +++ b/src/vouch/proposals.py @@ -333,7 +333,7 @@ def _approval_block_reason( if approved_by == proposal.proposed_by: cfg: dict[str, Any] = {} try: - loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text()) + loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text(encoding="utf-8")) if isinstance(loaded, dict): cfg = loaded except Exception: @@ -577,7 +577,7 @@ def expire_pending_after_days(store: KBStore, *, override: int | None = None) -> if override is not None: return override try: - loaded = yaml.safe_load(store.config_path.read_text()) + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) except Exception: return _DEFAULT_EXPIRE_PENDING_DAYS if not isinstance(loaded, dict): diff --git a/src/vouch/recall.py b/src/vouch/recall.py new file mode 100644 index 00000000..911d512c --- /dev/null +++ b/src/vouch/recall.py @@ -0,0 +1,87 @@ +"""Session-start recall digest — inject all approved knowledge into a new +Claude session's context (memvid-style), via the SessionStart hook. + +Unlike ``kb.context`` (task-scoped retrieval), this emits EVERY live approved +claim as a compact ``[id] text`` line plus every approved page title, so a +fresh session is aware of the whole reviewed KB from the first turn. Page +bodies are fetched on demand with ``kb_read_page`` / ``kb_search``. + +Size-guarded: if the digest would exceed ``max_chars`` it is truncated with an +explicit notice — never silently dropped. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import yaml + +from .context import _RETRACTED_CLAIM_STATUSES +from .storage import KBStore + +DEFAULT_ENABLED = True +DEFAULT_MAX_CHARS = 12000 + +_OPEN_TAG = "" +_CLOSE_TAG = "" + + +@dataclass(frozen=True) +class RecallConfig: + enabled: bool = DEFAULT_ENABLED + max_chars: int = DEFAULT_MAX_CHARS + + +def load_config(store: KBStore) -> RecallConfig: + """Read ``recall:`` from config.yaml; fall back to defaults.""" + try: + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) + except (OSError, yaml.YAMLError): + return RecallConfig() + if not isinstance(loaded, dict): + return RecallConfig() + raw = loaded.get("recall") + if not isinstance(raw, dict): + return RecallConfig() + return RecallConfig( + enabled=bool(raw.get("enabled", DEFAULT_ENABLED)), + max_chars=int(raw.get("max_chars", DEFAULT_MAX_CHARS)), + ) + + +def build_digest(store: KBStore, *, max_chars: int = DEFAULT_MAX_CHARS) -> str: + """Return an injectable digest of every live approved claim + page title. + + Empty string when the KB has no approved knowledge (nothing to inject). + """ + claims = [ + c for c in store.list_claims() + if c.status not in _RETRACTED_CLAIM_STATUSES + ] + pages = store.list_pages() + if not claims and not pages: + return "" + + lines: list[str] = [ + _OPEN_TAG, + f"# approved KB knowledge for this repo — {len(claims)} claim(s), " + f"{len(pages)} page(s). reviewed, cited, durable. use kb_read_page / " + "kb_search for detail; kb_propose_* (human-approved) to add more.", + ] + if claims: + lines += ["", "## claims"] + lines += [f"- [{c.id}] {c.text}" for c in claims] + if pages: + lines += ["", "## pages"] + lines += [f"- [{p.id}] {p.title}" for p in pages] + lines.append(_CLOSE_TAG) + body = "\n".join(lines) + + if len(body) > max_chars: + keep = max(0, max_chars - len(_CLOSE_TAG) - 120) + notice = ( + f"\n… [truncated: approved KB exceeds {max_chars} chars; " + "run `vouch search` / kb_context for the rest]\n" + _CLOSE_TAG + ) + body = body[:keep].rstrip() + notice + return body diff --git a/src/vouch/scoping.py b/src/vouch/scoping.py index c7aa9cf7..bef1cc41 100644 --- a/src/vouch/scoping.py +++ b/src/vouch/scoping.py @@ -64,7 +64,7 @@ def viewer_from( if config_path is not None and (resolved_project is None or resolved_agent is None): try: - loaded = yaml.safe_load(config_path.read_text()) + loaded = yaml.safe_load(config_path.read_text(encoding="utf-8")) except (OSError, yaml.YAMLError): loaded = None if isinstance(loaded, dict): diff --git a/src/vouch/server.py b/src/vouch/server.py index 6443b975..81fa5c23 100644 --- a/src/vouch/server.py +++ b/src/vouch/server.py @@ -177,7 +177,7 @@ def _to_dicts(h: list[tuple[str, str, str, float]], used: str) -> dict[str, Any] def _load_cfg(store: KBStore) -> dict[str, Any]: try: - loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text()) + loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text(encoding="utf-8")) except Exception: return {} return loaded if isinstance(loaded, dict) else {} @@ -857,6 +857,77 @@ def kb_provenance_rebuild() -> dict[str, Any]: return {"edges": prov.rebuild_prov_edges(_store())} +# === cross-session themes ================================================= + + +@mcp.tool() +def kb_detect_themes( + *, + min_sessions: int | None = None, + min_claims: int | None = None, + top_k: int | None = None, +) -> dict[str, Any]: + """Detect recurring entity clusters across completed sessions. + + Read-only — returns ranked clusters without persisting anything. + Scoring is deterministic (entity co-occurrence, no LLM). + + min_sessions: minimum sessions an entity pair must span (default from config or 2). + min_claims: minimum claims supporting the cluster (default from config or 3). + top_k: maximum clusters to return (default from config or 10). + """ + from . import themes + store = _store() + result = themes.detect_themes( + store, + min_sessions=min_sessions, + min_claims=min_claims, + top_k=top_k, + ) + return { + "clusters": [ + { + "entities": c.entities, + "claim_ids": c.claim_ids, + "session_ids": c.session_ids, + "score": c.score, + "session_count": c.session_count, + "claim_count": c.claim_count, + } + for c in result.clusters + ], + "config": result.config_used, + } + + +@mcp.tool() +def kb_propose_theme( + *, + entities: list[str], + claim_ids: list[str], + session_ids: list[str], + score: float = 0.0, + agent: str | None = None, +) -> dict[str, Any]: + """Propose a theme synthesis page from a detected cluster. + + Routes through the review gate — appears in kb.list_pending. + Pass a cluster from kb.detect_themes directly. + """ + from . import themes + store = _store() + actor = agent or os.environ.get("VOUCH_AGENT", "unknown-agent") + cluster = themes.ThemeCluster( + entities=entities, + claim_ids=claim_ids, + session_ids=session_ids, + score=score, + session_count=len(session_ids), + claim_count=len(claim_ids), + ) + return themes.propose_theme(store, cluster, proposed_by=actor) + + def _current_model_name() -> str: try: from .embeddings import get_embedder diff --git a/src/vouch/stats.py b/src/vouch/stats.py index 8f382e45..5e5ca35c 100644 --- a/src/vouch/stats.py +++ b/src/vouch/stats.py @@ -50,7 +50,7 @@ def _list_decided(store: KBStore) -> list[Proposal]: return [] out: list[Proposal] = [] for path in sorted(ddir.glob("*.yaml")): - out.append(Proposal.model_validate(_yaml_load(path.read_text()))) + out.append(Proposal.model_validate(_yaml_load(path.read_text(encoding="utf-8")))) return out diff --git a/src/vouch/storage.py b/src/vouch/storage.py index af21656c..653ea806 100644 --- a/src/vouch/storage.py +++ b/src/vouch/storage.py @@ -30,9 +30,10 @@ import sqlite3 import stat from pathlib import Path -from typing import Any +from typing import Any, TypeVar import yaml +from pydantic import BaseModel, ValidationError from .models import ( Claim, @@ -79,6 +80,16 @@ def _starter_config() -> dict[str, Any]: "require_human_approval": True, "expire_pending_after_days": 90, }, + "capture": { + # auto-capture claude code sessions into pending summaries. + "enabled": True, + "min_observations": 3, + }, + "recall": { + # inject a digest of all approved knowledge at session start. + "enabled": True, + "max_chars": 12000, + }, "retrieval": { # auto = embedding -> fts5 -> substring; or pin one of # embedding | fts5 | substring. See context._retrieve. @@ -144,6 +155,26 @@ def _yaml_load(text: str) -> Any: return yaml.safe_load(text) +_log = logging.getLogger("vouch.storage") + +_ModelT = TypeVar("_ModelT", bound=BaseModel) + + +def _load_or_skip(path: Path, model: type[_ModelT], kind: str) -> _ModelT | None: + """Parse one durable artifact file into ``model``. + + On a corrupt or unreadable file — e.g. a hand-edited yaml or mojibake + carrying a control character that pyyaml's loader rejects — log a warning + and return ``None`` instead of raising, so a single bad file cannot take + down a whole bulk listing (``vouch pending`` and friends). + """ + try: + return model.model_validate(_yaml_load(path.read_text(encoding="utf-8"))) + except (yaml.YAMLError, ValidationError, UnicodeDecodeError, OSError) as e: + _log.warning("skipping unreadable %s %s: %s", kind, path.name, e) + return None + + _FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n?(.*)$", re.DOTALL) @@ -215,14 +246,14 @@ def init(cls, root: Path) -> KBStore: for sub in SUBDIRS: (kb.kb_dir / sub).mkdir(exist_ok=True) if not kb.config_path.exists(): - kb.config_path.write_text(_yaml_dump(_starter_config())) + kb.config_path.write_text(_yaml_dump(_starter_config()), encoding="utf-8") schema_version_file = kb.kb_dir / SCHEMA_VERSION_FILENAME if not schema_version_file.exists(): - schema_version_file.write_text(SCHEMA_VERSION + "\n") + schema_version_file.write_text(SCHEMA_VERSION + "\n", encoding="utf-8") gi = kb.kb_dir / ".gitignore" if not gi.exists(): # state.db is derived; proposed/ is the agent's scratch space. - gi.write_text("proposed/\nstate.db\nstate.db-*\n") + gi.write_text("proposed/\ncaptures/\nstate.db\nstate.db-*\n", encoding="utf-8") return kb # --- paths ------------------------------------------------------------- @@ -283,7 +314,7 @@ def put_source( content_path.write_bytes(content) meta_path = sdir / "meta.yaml" if meta_path.exists(): - return Source.model_validate(_yaml_load(meta_path.read_text())) + return Source.model_validate(_yaml_load(meta_path.read_text(encoding="utf-8"))) src = Source( id=sid, type=source_type, # type: ignore[arg-type] @@ -295,7 +326,7 @@ def put_source( tags=tags or [], metadata=metadata or {}, ) - meta_path.write_text(_yaml_dump(src.model_dump(mode="json"))) + meta_path.write_text(_yaml_dump(src.model_dump(mode="json")), encoding="utf-8") self._embed_and_store(kind="source", id=src.id, text=src.title or src.locator or "") return src @@ -303,7 +334,7 @@ def get_source(self, source_id: str) -> Source: meta_path = self._source_dir(source_id) / "meta.yaml" if not meta_path.exists(): raise ArtifactNotFoundError(f"source {source_id}") - return Source.model_validate(_yaml_load(meta_path.read_text())) + return Source.model_validate(_yaml_load(meta_path.read_text(encoding="utf-8"))) def read_source_content(self, source_id: str) -> bytes: p = self._source_dir(source_id) / "content" @@ -319,7 +350,9 @@ def list_sources(self) -> list[Source]: for sdir in sorted(sources_dir.iterdir()): meta = sdir / "meta.yaml" if meta.exists(): - out.append(Source.model_validate(_yaml_load(meta.read_text()))) + src = _load_or_skip(meta, Source, "source") + if src is not None: + out.append(src) return out # --- graph-integrity helpers ------------------------------------------ @@ -400,7 +433,7 @@ def put_claim(self, claim: Claim) -> Claim: ) self._validate_claim_refs(claim) try: - with self._claim_path(claim.id).open("x") as f: + with self._claim_path(claim.id).open("x", encoding="utf-8") as f: f.write(_yaml_dump(claim.model_dump(mode="json"))) except FileExistsError as e: raise ValueError( @@ -413,15 +446,16 @@ def get_claim(self, claim_id: str) -> Claim: p = self._claim_path(claim_id) if not p.exists(): raise ArtifactNotFoundError(f"claim {claim_id}") - return Claim.model_validate(_yaml_load(p.read_text())) + return Claim.model_validate(_yaml_load(p.read_text(encoding="utf-8"))) def list_claims(self) -> list[Claim]: cdir = self.kb_dir / "claims" if not cdir.is_dir(): return [] return [ - Claim.model_validate(_yaml_load(p.read_text())) + c for p in sorted(cdir.glob("*.yaml")) + if (c := _load_or_skip(p, Claim, "claim")) is not None ] def update_claim(self, claim: Claim) -> Claim: @@ -438,7 +472,8 @@ def update_claim(self, claim: Claim) -> Claim: # model validator can't catch (it has no KB access). Mirrors the # put_claim guard so the update path can't reintroduce the gap. self._validate_claim_refs(claim) - self._claim_path(claim.id).write_text(_yaml_dump(claim.model_dump(mode="json"))) + self._claim_path(claim.id).write_text( + _yaml_dump(claim.model_dump(mode="json")), encoding="utf-8") self._embed_and_store(kind="claim", id=claim.id, text=claim.text) # Keep the FTS5 row in sync with the on-disk claim so lifecycle # mutations (archive, supersede, contradict, confirm) are reflected @@ -523,7 +558,7 @@ def list_pages(self) -> list[Page]: def put_entity(self, entity: Entity) -> Entity: try: - with self._entity_path(entity.id).open("x") as f: + with self._entity_path(entity.id).open("x", encoding="utf-8") as f: f.write(_yaml_dump(entity.model_dump(mode="json"))) except FileExistsError as e: raise ValueError( @@ -539,14 +574,14 @@ def get_entity(self, eid: str) -> Entity: p = self._entity_path(eid) if not p.exists(): raise ArtifactNotFoundError(f"entity {eid}") - return Entity.model_validate(_yaml_load(p.read_text())) + return Entity.model_validate(_yaml_load(p.read_text(encoding="utf-8"))) def list_entities(self) -> list[Entity]: d = self.kb_dir / "entities" if not d.is_dir(): return [] - return [Entity.model_validate(_yaml_load(p.read_text())) - for p in sorted(d.glob("*.yaml"))] + return [e for p in sorted(d.glob("*.yaml")) + if (e := _load_or_skip(p, Entity, "entity")) is not None] # --- relations --------------------------------------------------------- @@ -572,7 +607,7 @@ def _validate_relation_refs(self, rel: Relation) -> None: def put_relation(self, rel: Relation) -> Relation: self._validate_relation_refs(rel) try: - with self._relation_path(rel.id).open("x") as f: + with self._relation_path(rel.id).open("x", encoding="utf-8") as f: f.write(_yaml_dump(rel.model_dump(mode="json"))) except FileExistsError as e: raise ValueError( @@ -605,7 +640,7 @@ def put_relation_idempotent(self, rel: Relation) -> Relation: # the linked claim was subsequently archived or retracted. self._validate_relation_refs(rel) try: - with path.open("x") as f: + with path.open("x", encoding="utf-8") as f: f.write(_yaml_dump(rel.model_dump(mode="json"))) except FileExistsError: self._embed_and_store( @@ -623,14 +658,14 @@ def get_relation(self, rid: str) -> Relation: p = self._relation_path(rid) if not p.exists(): raise ArtifactNotFoundError(f"relation {rid}") - return Relation.model_validate(_yaml_load(p.read_text())) + return Relation.model_validate(_yaml_load(p.read_text(encoding="utf-8"))) def list_relations(self) -> list[Relation]: d = self.kb_dir / "relations" if not d.is_dir(): return [] - return [Relation.model_validate(_yaml_load(p.read_text())) - for p in sorted(d.glob("*.yaml"))] + return [r for p in sorted(d.glob("*.yaml")) + if (r := _load_or_skip(p, Relation, "relation")) is not None] def relations_from(self, node_id: str) -> list[Relation]: return [r for r in self.list_relations() if r.source == node_id] @@ -644,7 +679,7 @@ def put_evidence(self, ev: Evidence) -> Evidence: if not (self._source_dir(ev.source_id) / "meta.yaml").exists(): raise ValueError(f"evidence {ev.id} cites unknown source {ev.source_id}") try: - with self._evidence_path(ev.id).open("x") as f: + with self._evidence_path(ev.id).open("x", encoding="utf-8") as f: f.write(_yaml_dump(ev.model_dump(mode="json"))) except FileExistsError as e: raise ValueError( @@ -657,20 +692,20 @@ def get_evidence(self, eid: str) -> Evidence: p = self._evidence_path(eid) if not p.exists(): raise ArtifactNotFoundError(f"evidence {eid}") - return Evidence.model_validate(_yaml_load(p.read_text())) + return Evidence.model_validate(_yaml_load(p.read_text(encoding="utf-8"))) def list_evidence(self) -> list[Evidence]: d = self.kb_dir / "evidence" if not d.is_dir(): return [] - return [Evidence.model_validate(_yaml_load(p.read_text())) - for p in sorted(d.glob("*.yaml"))] + return [ev for p in sorted(d.glob("*.yaml")) + if (ev := _load_or_skip(p, Evidence, "evidence")) is not None] # --- sessions ---------------------------------------------------------- def put_session(self, sess: Session) -> Session: try: - with self._session_path(sess.id).open("x") as f: + with self._session_path(sess.id).open("x", encoding="utf-8") as f: f.write(_yaml_dump(sess.model_dump(mode="json"))) except FileExistsError as e: raise ValueError( @@ -684,21 +719,22 @@ def update_session(self, sess: Session) -> Session: # guard against duplicate ids, so updates need a separate path. if not self._session_path(sess.id).exists(): raise ArtifactNotFoundError(f"session {sess.id}") - self._session_path(sess.id).write_text(_yaml_dump(sess.model_dump(mode="json"))) + self._session_path(sess.id).write_text( + _yaml_dump(sess.model_dump(mode="json")), encoding="utf-8") return sess def get_session(self, sid: str) -> Session: p = self._session_path(sid) if not p.exists(): raise ArtifactNotFoundError(f"session {sid}") - return Session.model_validate(_yaml_load(p.read_text())) + return Session.model_validate(_yaml_load(p.read_text(encoding="utf-8"))) def list_sessions(self) -> list[Session]: d = self.kb_dir / "sessions" if not d.is_dir(): return [] - return [Session.model_validate(_yaml_load(p.read_text())) - for p in sorted(d.glob("*.yaml"))] + return [s for p in sorted(d.glob("*.yaml")) + if (s := _load_or_skip(p, Session, "session")) is not None] # --- embedding hook ------------------------------------------------------ @@ -771,7 +807,7 @@ def _embed_and_store( def put_proposal(self, proposal: Proposal) -> Proposal: try: - with self._proposal_path(proposal.id).open("x") as f: + with self._proposal_path(proposal.id).open("x", encoding="utf-8") as f: f.write(_yaml_dump(proposal.model_dump(mode="json"))) except FileExistsError as e: raise ValueError( @@ -782,14 +818,16 @@ def put_proposal(self, proposal: Proposal) -> Proposal: def get_proposal(self, proposal_id: str) -> Proposal: for path in (self._proposal_path(proposal_id), self._decided_path(proposal_id)): if path.exists(): - return Proposal.model_validate(_yaml_load(path.read_text())) + return Proposal.model_validate(_yaml_load(path.read_text(encoding="utf-8"))) raise ArtifactNotFoundError(f"proposal {proposal_id}") def list_proposals(self, status: ProposalStatus | None = None) -> list[Proposal]: out: list[Proposal] = [] for sub in ("proposed", "decided"): for p in sorted((self.kb_dir / sub).glob("*.yaml")): - pr = Proposal.model_validate(_yaml_load(p.read_text())) + pr = _load_or_skip(p, Proposal, "proposal") + if pr is None: + continue if status is None or pr.status == status: out.append(pr) return out @@ -797,7 +835,7 @@ def list_proposals(self, status: ProposalStatus | None = None) -> list[Proposal] def move_proposal_to_decided(self, proposal: Proposal) -> None: src = self._proposal_path(proposal.id) dst = self._decided_path(proposal.id) - dst.write_text(_yaml_dump(proposal.model_dump(mode="json"))) + dst.write_text(_yaml_dump(proposal.model_dump(mode="json")), encoding="utf-8") if src.exists(): src.unlink() diff --git a/src/vouch/sync.py b/src/vouch/sync.py index de9abdc3..1a6636b5 100644 --- a/src/vouch/sync.py +++ b/src/vouch/sync.py @@ -282,7 +282,7 @@ def _write_conflict_report( report_path = report_dir / f"{check.source_id}.json" report = asdict(check) report["on_conflict"] = on_conflict - report_path.write_text(json.dumps(report, indent=2, sort_keys=True)) + report_path.write_text(json.dumps(report, indent=2, sort_keys=True), encoding="utf-8") return str(report_path.relative_to(kb_dir)) diff --git a/src/vouch/themes.py b/src/vouch/themes.py new file mode 100644 index 00000000..45980235 --- /dev/null +++ b/src/vouch/themes.py @@ -0,0 +1,374 @@ +"""Cross-session pattern detection — recurring entity clusters. + +Scans approved claims across completed sessions, finds entity co-occurrence +clusters, and optionally proposes "theme" synthesis pages through the +review gate. All scoring is deterministic (no LLM). The detector never +writes directly — it only reads or proposes. +""" + +from __future__ import annotations + +import logging +import math +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Any + +import yaml + +from .models import ClaimStatus, ProposalStatus +from .proposals import ProposalError, propose_page +from .storage import KBStore + +logger = logging.getLogger(__name__) + +# Statuses that disqualify a claim from theme support. +_EXCLUDED_STATUSES = frozenset({ + ClaimStatus.ARCHIVED, + ClaimStatus.SUPERSEDED, + ClaimStatus.REDACTED, +}) + + +@dataclass +class ThemeCluster: + """A detected entity co-occurrence cluster.""" + + entities: list[str] + claim_ids: list[str] + session_ids: list[str] + score: float + session_count: int + claim_count: int + + +@dataclass +class DetectResult: + """Outcome of detect_themes.""" + + clusters: list[ThemeCluster] = field(default_factory=list) + config_used: dict[str, Any] = field(default_factory=dict) + + +_DEFAULT_MIN_SESSIONS = 2 +_DEFAULT_MIN_CLAIMS = 3 +_DEFAULT_TOP_K = 10 + + +def _load_theme_config(store: KBStore) -> dict[str, Any]: + """Read theme-detection config with defensive defaults. + + Mirrors the salience.reflex_cfg pattern: every value is type-checked + and falls back to its default rather than crashing on malformed input. + """ + try: + raw = yaml.safe_load(store.config_path.read_text()) + cfg = raw if isinstance(raw, dict) else {} + except Exception: + cfg = {} + themes_cfg = cfg.get("themes") if isinstance(cfg, dict) else None + if not isinstance(themes_cfg, dict): + themes_cfg = {} + + enabled = themes_cfg.get("enabled", True) + enabled = bool(enabled) if isinstance(enabled, bool) else True + + ms = themes_cfg.get("min_sessions", _DEFAULT_MIN_SESSIONS) + ms = ms if isinstance(ms, int) and ms > 0 else _DEFAULT_MIN_SESSIONS + + mc = themes_cfg.get("min_claims", _DEFAULT_MIN_CLAIMS) + mc = mc if isinstance(mc, int) and mc > 0 else _DEFAULT_MIN_CLAIMS + + tk = themes_cfg.get("top_k", _DEFAULT_TOP_K) + tk = tk if isinstance(tk, int) and tk > 0 else _DEFAULT_TOP_K + + return { + "enabled": enabled, + "min_sessions": ms, + "min_claims": mc, + "top_k": tk, + } + + +def detect_themes( + store: KBStore, + *, + min_sessions: int | None = None, + min_claims: int | None = None, + top_k: int | None = None, +) -> DetectResult: + """Detect recurring entity clusters across sessions. + + Pure read-only operation. Returns ranked clusters without persisting + anything. Excludes archived, superseded, redacted, and pending claims. + """ + cfg = _load_theme_config(store) + if not cfg["enabled"]: + return DetectResult(clusters=[], config_used=cfg) + + ms = min_sessions if min_sessions is not None else cfg["min_sessions"] + mc = min_claims if min_claims is not None else cfg["min_claims"] + tk = top_k if top_k is not None else cfg["top_k"] + + # Collect approved claims that reference entities and belong to sessions. + claims = store.list_claims() + # Also exclude pending (working) — only look at review-gated claims. + eligible = [ + c for c in claims + if c.status not in _EXCLUDED_STATUSES + and c.entities + and c.approved_by is not None + ] + + # Map each claim to its session(s) via decided proposals. + claim_session: dict[str, str] = {} + for prop in store.list_proposals(ProposalStatus.APPROVED): + if prop.kind.value == "claim" and prop.session_id: + claim_id = prop.payload.get("id", "") + if claim_id: + claim_session[claim_id] = prop.session_id + + # Build entity pair co-occurrence across sessions. + # Key: frozenset of two entity ids → {session_id: [claim_ids]} + pair_evidence: dict[frozenset[str], dict[str, list[str]]] = defaultdict( + lambda: defaultdict(list) + ) + + for claim in eligible: + sid = claim_session.get(claim.id) + if not sid: + continue + ents = sorted(set(claim.entities)) + for i, e1 in enumerate(ents): + for e2 in ents[i + 1:]: + pair_evidence[frozenset({e1, e2})][sid].append(claim.id) + + # Score each pair: session_count * log(1 + claim_count). + raw_clusters: list[ThemeCluster] = [] + for pair, sessions_map in pair_evidence.items(): + session_count = len(sessions_map) + if session_count < ms: + continue + all_claim_ids = sorted({ + cid for cids in sessions_map.values() for cid in cids + }) + if len(all_claim_ids) < mc: + continue + score = session_count * math.log(1 + len(all_claim_ids)) + raw_clusters.append(ThemeCluster( + entities=sorted(pair), + claim_ids=all_claim_ids, + session_ids=sorted(sessions_map.keys()), + score=round(score, 4), + session_count=session_count, + claim_count=len(all_claim_ids), + )) + + # Merge overlapping pairs into larger clusters. + clusters = _merge_clusters(raw_clusters, min_sessions=ms, min_claims=mc) + + # Deduplicate against existing theme pages. Compare on the resolvable + # entity subset (the set that propose_theme would actually store) so + # dedup stays consistent even when some cluster entities don't resolve. + existing_themes = _existing_theme_entity_sets(store) + resolvable = _resolvable_entities(store) + clusters = [ + c for c in clusters + if frozenset(e for e in c.entities if e in resolvable) + not in existing_themes + ] + + # Rank by score descending, take top_k. + clusters.sort(key=lambda c: c.score, reverse=True) + clusters = clusters[:tk] + + return DetectResult(clusters=clusters, config_used={ + "min_sessions": ms, "min_claims": mc, "top_k": tk, "enabled": True, + }) + + +def _merge_clusters( + pairs: list[ThemeCluster], + *, + min_sessions: int, + min_claims: int, +) -> list[ThemeCluster]: + """Merge entity pairs that share entities into larger clusters.""" + if not pairs: + return [] + + # Union-find over entities. + parent: dict[str, str] = {} + + def find(x: str) -> str: + while parent.get(x, x) != x: + parent[x] = parent.get(parent[x], parent[x]) + x = parent[x] + return x + + def union(a: str, b: str) -> None: + ra, rb = find(a), find(b) + if ra != rb: + parent[ra] = rb + + for cluster in pairs: + ents = cluster.entities + for i in range(len(ents) - 1): + union(ents[i], ents[i + 1]) + + # Group pairs by their root entity. + groups: dict[str, list[ThemeCluster]] = defaultdict(list) + for cluster in pairs: + root = find(cluster.entities[0]) + groups[root].append(cluster) + + merged: list[ThemeCluster] = [] + for group in groups.values(): + all_entities: set[str] = set() + all_claims: set[str] = set() + all_sessions: set[str] = set() + for c in group: + all_entities.update(c.entities) + all_claims.update(c.claim_ids) + all_sessions.update(c.session_ids) + + if len(all_sessions) < min_sessions or len(all_claims) < min_claims: + continue + + score = len(all_sessions) * math.log(1 + len(all_claims)) + merged.append(ThemeCluster( + entities=sorted(all_entities), + claim_ids=sorted(all_claims), + session_ids=sorted(all_sessions), + score=round(score, 4), + session_count=len(all_sessions), + claim_count=len(all_claims), + )) + return merged + + +def _resolvable_entities(store: KBStore) -> set[str]: + """Return the set of entity ids that exist in the store.""" + return {e.id for e in store.list_entities()} + + +def _existing_theme_entity_sets(store: KBStore) -> set[frozenset[str]]: + """Return entity sets of existing theme pages and pending theme proposals.""" + result: set[frozenset[str]] = set() + for page in store.list_pages(): + if page.type == "theme" and page.entities: + result.add(frozenset(page.entities)) + for prop in store.list_proposals(ProposalStatus.PENDING): + if (prop.kind.value == "page" + and prop.payload.get("type") == "theme" + and prop.payload.get("entities")): + result.add(frozenset(prop.payload["entities"])) + return result + + +def propose_theme( + store: KBStore, + cluster: ThemeCluster, + *, + proposed_by: str, + session_id: str | None = None, +) -> dict[str, Any]: + """File a theme synthesis page through the review gate. + + The page body is deterministic (no LLM). It lists the entities, the + supporting claims, and the sessions that contribute to the cluster. + """ + # Guard: must have entities and claims. + if not cluster.entities: + raise ProposalError("cluster has no entities") + if not cluster.claim_ids: + raise ProposalError("cluster has no supporting claims") + + # Verify all referenced claims still exist and are eligible. + valid_claims: list[str] = [] + for cid in cluster.claim_ids: + try: + claim = store.get_claim(cid) + if claim.status not in _EXCLUDED_STATUSES: + valid_claims.append(cid) + except Exception: + pass + if not valid_claims: + raise ProposalError("no eligible claims remain in cluster") + + # Verify entities exist. + valid_entities: list[str] = [] + for eid in cluster.entities: + try: + store.get_entity(eid) + valid_entities.append(eid) + except Exception: + pass + if not valid_entities: + raise ProposalError("no valid entities in cluster") + + slug = "theme-" + "-".join(valid_entities[:4]) + title = f"theme: {', '.join(valid_entities)}" + body = _build_theme_body(cluster, valid_claims, valid_entities) + + proposal = propose_page( + store, + title=title, + body=body, + page_type="theme", + claim_ids=valid_claims, + entity_ids=valid_entities, + proposed_by=proposed_by, + tags=["theme", "auto-detected"], + slug_hint=slug, + session_id=session_id, + rationale=( + f"recurring pattern across {cluster.session_count} sessions, " + f"{cluster.claim_count} claims (score {cluster.score})" + ), + ) + return { + "proposal_id": proposal.id, + "theme_page_id": slug, + "entities": valid_entities, + "claim_count": len(valid_claims), + "session_count": cluster.session_count, + "score": cluster.score, + } + + +def _build_theme_body( + cluster: ThemeCluster, + claim_ids: list[str], + entity_ids: list[str], +) -> str: + lines = [ + f"# theme: {', '.join(entity_ids)}", + "", + f"recurring pattern detected across {cluster.session_count} sessions " + f"with {len(claim_ids)} supporting claims.", + "", + "## entities", + "", + ] + for eid in entity_ids: + lines.append(f"- `{eid}`") + lines.extend([ + "", + "## supporting claims", + "", + ]) + for cid in claim_ids: + lines.append(f"- `{cid}`") + lines.extend([ + "", + "## sessions", + "", + ]) + for sid in cluster.session_ids: + lines.append(f"- `{sid}`") + lines.extend([ + "", + f"**score:** {cluster.score}", + ]) + return "\n".join(lines) diff --git a/src/vouch/volunteer_context.py b/src/vouch/volunteer_context.py index eef2f6bc..b1036e0f 100644 --- a/src/vouch/volunteer_context.py +++ b/src/vouch/volunteer_context.py @@ -66,7 +66,7 @@ def to_dict(self) -> dict[str, Any]: def load_config(store: KBStore) -> VolunteerConfig: """Read ``volunteer:`` from config.yaml; fall back to defaults.""" try: - loaded = yaml.safe_load(store.config_path.read_text()) + loaded = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) except (OSError, yaml.YAMLError): return VolunteerConfig() if not isinstance(loaded, dict): diff --git a/src/vouch/web/server.py b/src/vouch/web/server.py index 2473fadb..ed51af1b 100644 --- a/src/vouch/web/server.py +++ b/src/vouch/web/server.py @@ -195,7 +195,7 @@ def _pending_page(store: KBStore, page: int, page_size: int proposals: list[Proposal] = [] for p in paths[lo:hi]: try: - proposals.append(Proposal.model_validate(_yaml_load(p.read_text()))) + proposals.append(Proposal.model_validate(_yaml_load(p.read_text(encoding="utf-8")))) except Exception as e: _log.warning("skipping unreadable proposal %s: %s", p.name, e) return proposals, page, pages, total diff --git a/tests/test_capture.py b/tests/test_capture.py new file mode 100644 index 00000000..9c4f2a2e --- /dev/null +++ b/tests/test_capture.py @@ -0,0 +1,653 @@ +"""Auto-capture: config, buffer, observe, finalize.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from vouch import capture as cap +from vouch.storage import KBStore, _starter_config + + +@pytest.fixture +def store(tmp_path: Path) -> KBStore: + return KBStore.init(tmp_path) + + +def test_load_config_defaults(store: KBStore) -> None: + cfg = cap.load_config(store) + assert cfg.enabled is True + assert cfg.min_observations == 3 + assert cfg.dedup_window_seconds == 60.0 + + +def test_load_config_reads_override(store: KBStore) -> None: + store.config_path.write_text( + "capture:\n enabled: false\n min_observations: 5\n" + ) + cfg = cap.load_config(store) + assert cfg.enabled is False + assert cfg.min_observations == 5 + + +def test_buffer_path_under_captures_dir(store: KBStore) -> None: + p = cap.buffer_path(store, "sess-123") + assert p == store.kb_dir / "captures" / "sess-123.jsonl" + + +def test_starter_config_has_capture_namespace() -> None: + assert _starter_config()["capture"]["enabled"] is True + + +def test_init_gitignores_captures(tmp_path: Path) -> None: + kb = KBStore.init(tmp_path) + assert "captures/" in (kb.kb_dir / ".gitignore").read_text() + + +def test_observe_appends_line(store: KBStore) -> None: + wrote = cap.observe(store, "s1", tool="Edit", summary="Edited a.py", now=100.0) + assert wrote is True + lines = cap.buffer_path(store, "s1").read_text().splitlines() + assert len(lines) == 1 + assert "Edited a.py" in lines[0] + + +def test_observe_dedups_within_window(store: KBStore) -> None: + assert cap.observe(store, "s1", tool="Read", summary="Read a.py", now=100.0) + # identical within 60s window -> skipped + assert cap.observe(store, "s1", tool="Read", summary="Read a.py", now=130.0) is False + # same key past the window -> written again + assert cap.observe(store, "s1", tool="Read", summary="Read a.py", now=200.0) + assert len(cap.buffer_path(store, "s1").read_text().splitlines()) == 2 + + +def test_observe_noop_when_disabled(store: KBStore) -> None: + store.config_path.write_text("capture:\n enabled: false\n") + assert cap.observe(store, "s1", tool="Edit", summary="x") is False + assert not cap.buffer_path(store, "s1").exists() + + +def test_summarize_tool_skips_unobserved() -> None: + assert cap.summarize_tool("mcp__vouch__kb_search", {}, "") is None + + +def test_summarize_tool_edit() -> None: + obs = cap.summarize_tool("Edit", {"file_path": "/repo/src/a.py"}, "ok") + assert obs is not None + assert obs["tool"] == "Edit" + assert obs["files"] == ["/repo/src/a.py"] + assert "a.py" in obs["summary"] + + +def test_summarize_tool_bash_flags_error() -> None: + obs = cap.summarize_tool("Bash", {"command": "pytest"}, "1 failed, error") + assert obs is not None + assert obs["cmd"] == "pytest" + assert "failed" in obs["summary"].lower() + + +def test_summarize_tool_read_grep_web_task() -> None: + assert "a.py" in cap.summarize_tool("Read", {"file_path": "/x/a.py"}, "")["summary"] + assert "TODO" in cap.summarize_tool("Grep", {"pattern": "TODO"}, "")["summary"] + web = cap.summarize_tool("WebFetch", {"url": "https://example.com"}, "") + assert "example.com" in web["summary"] + assert cap.summarize_tool("Task", {}, "")["summary"] == "Task completed" + + +def test_observe_stores_cmd_field(store: KBStore) -> None: + cap.observe(store, "s1", tool="Bash", summary="Ran: ls", cmd="ls -la", now=1.0) + line = cap.buffer_path(store, "s1").read_text() + assert "ls -la" in line + + +def test_load_config_malformed_yaml_falls_back(store: KBStore) -> None: + store.config_path.write_text("capture: [unclosed\n") + assert cap.load_config(store).enabled is True # default, not a crash + + +def test_load_config_non_dict_yaml_falls_back(store: KBStore) -> None: + store.config_path.write_text("just a string\n") + assert cap.load_config(store).min_observations == 3 + + +def test_load_config_capture_not_a_mapping(store: KBStore) -> None: + store.config_path.write_text("capture: 42\n") + assert cap.load_config(store).enabled is True + + +def test_read_observations_skips_blank_and_bad_lines(store: KBStore) -> None: + p = cap.buffer_path(store, "s1") + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text('\n{"ts": 1, "tool": "Edit", "summary": "ok"}\nnot-json\n') + obs = cap._read_observations(p) + assert len(obs) == 1 + assert obs[0]["summary"] == "ok" + + +def test_git_changes_in_a_real_repo(tmp_path: Path) -> None: + import subprocess + + def git(*args: str) -> None: + subprocess.run( + ["git", "-c", "user.email=t@t", "-c", "user.name=t", *args], + cwd=tmp_path, check=True, capture_output=True, text=True, + ) + + try: + git("init") + except (OSError, subprocess.CalledProcessError): + import pytest + pytest.skip("git not available") + (tmp_path / "a.py").write_text("x = 1\n") + git("add", "a.py") + git("commit", "-m", "init") + (tmp_path / "a.py").write_text("x = 2\n") # modify tracked file + files, stat = cap._git_changes(tmp_path) + assert "a.py" in files + assert "a.py" in stat + + +def test_git_changes_swallows_subprocess_error(tmp_path: Path, monkeypatch) -> None: + def boom(*a, **k): + raise OSError("git missing") + + monkeypatch.setattr(cap.subprocess, "run", boom) + assert cap._git_changes(tmp_path) == ([], "") + + +def test_git_changes_stat_error_returns_files_without_stat(tmp_path: Path, monkeypatch) -> None: + calls = {"n": 0} + + class _R: + stdout = "a.py\n" + + def run(*a, **k): + calls["n"] += 1 + if calls["n"] == 1: + return _R() # first call (names) succeeds + raise OSError("stat failed") # second call (stat) blows up + + monkeypatch.setattr(cap.subprocess, "run", run) + files, stat = cap._git_changes(tmp_path) + assert files == ["a.py"] + assert stat == "" + + +def test_build_summary_body_renders_git_and_commands() -> None: + obs = [{"ts": 1.0, "tool": "Bash", "summary": "Ran: pytest", "cmd": "pytest -q"}] + _, body = cap.build_summary_body( + "s1", obs, ["a.py"], "a.py | 2 +-", project="proj", generated_at="2026-07-01" + ) + assert "## git changes" in body + assert "pytest -q" in body + assert "## notable commands" in body + assert "proj" in body + + +def _seed(store: KBStore, sid: str, n: int) -> None: + for i in range(n): + cap.observe(store, sid, tool="Edit", summary=f"Edited f{i}.py", now=float(i)) + + +def test_finalize_files_one_pending_page(store: KBStore, tmp_path: Path) -> None: + from vouch.models import ProposalKind, ProposalStatus + + _seed(store, "s1", 3) + result = cap.finalize(store, "s1", cwd=tmp_path) + pid = result["summary_proposal_id"] + assert pid is not None + pend = store.list_proposals(ProposalStatus.PENDING) + match = [p for p in pend if p.id == pid] + assert len(match) == 1 + pr = match[0] + assert pr.kind == ProposalKind.PAGE + assert pr.proposed_by == cap.CAPTURE_ACTOR + assert pr.payload["type"] == cap.CAPTURE_PAGE_TYPE + assert pr.status == ProposalStatus.PENDING + + +def test_finalize_below_min_files_nothing(store: KBStore, tmp_path: Path) -> None: + from vouch.models import ProposalStatus + + _seed(store, "s1", 2) # below default min_observations=3, non-git cwd + result = cap.finalize(store, "s1", cwd=tmp_path) + assert result["summary_proposal_id"] is None + assert store.list_proposals(ProposalStatus.PENDING) == [] + + +def test_finalize_deletes_buffer(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 3) + cap.finalize(store, "s1", cwd=tmp_path) + assert not cap.buffer_path(store, "s1").exists() + + +def test_finalize_noop_when_disabled(store: KBStore, tmp_path: Path) -> None: + from vouch.models import ProposalStatus + + _seed(store, "s1", 5) + store.config_path.write_text("capture:\n enabled: false\n") + result = cap.finalize(store, "s1", cwd=tmp_path) + assert result["summary_proposal_id"] is None + assert store.list_proposals(ProposalStatus.PENDING) == [] + + +def test_build_summary_body_has_sections() -> None: + obs = [ + {"ts": 1.0, "tool": "Edit", "summary": "Edited a.py", "files": ["a.py"]}, + {"ts": 2.0, "tool": "Bash", "summary": "Ran: pytest", "cmd": "pytest"}, + ] + title, body = cap.build_summary_body("s1", obs, ["a.py"], "a.py | 2 +-") + assert "s1" in title + assert "files modified this session" in body.lower() + assert "## activity" in body.lower() + assert "a.py" in body + + +def test_pending_count_counts_capture_actor(store: KBStore, tmp_path: Path) -> None: + _seed(store, "s1", 3) + cap.finalize(store, "s1", cwd=tmp_path) + assert cap.pending_count(store) == 1 + + +import json as _json # noqa: E402 + +from click.testing import CliRunner # noqa: E402 + +from vouch.cli import cli # noqa: E402 +from vouch.models import ProposalStatus # noqa: E402 + + +def _run(store: KBStore, args: list[str], stdin: str = "") -> object: + runner = CliRunner() + return runner.invoke( + cli, args, input=stdin, + env={"VOUCH_KB_PATH": str(store.kb_dir)}, + ) + + +def test_cli_observe_appends(store: KBStore) -> None: + payload = _json.dumps({ + "session_id": "cc-1", + "tool_name": "Edit", + "tool_input": {"file_path": "/r/a.py"}, + "tool_response": "ok", + }) + res = _run(store, ["capture", "observe"], stdin=payload) + assert res.exit_code == 0 + assert cap.buffer_path(store, "cc-1").exists() + + +def test_cli_observe_never_errors_on_garbage(store: KBStore) -> None: + res = _run(store, ["capture", "observe"], stdin="not json") + assert res.exit_code == 0 + + +def test_cli_finalize_files_proposal(store: KBStore) -> None: + for i in range(3): + cap.observe(store, "cc-2", tool="Edit", summary=f"Edited f{i}.py", now=float(i)) + payload = _json.dumps({"session_id": "cc-2", "cwd": str(store.kb_dir.parent)}) + res = _run(store, ["capture", "finalize"], stdin=payload) + assert res.exit_code == 0 + pend = store.list_proposals(ProposalStatus.PENDING) + assert any(p.proposed_by == cap.CAPTURE_ACTOR for p in pend) + + +def test_cli_banner_emits_when_pending(store: KBStore) -> None: + for i in range(3): + cap.observe(store, "cc-3", tool="Edit", summary=f"Edited f{i}.py", now=float(i)) + cap.finalize(store, "cc-3", cwd=store.kb_dir.parent) + res = _run(store, ["capture", "banner"]) + assert res.exit_code == 0 + assert "awaiting review" in res.output + + +def test_cli_banner_silent_when_none(store: KBStore) -> None: + res = _run(store, ["capture", "banner"]) + assert res.exit_code == 0 + assert res.output.strip() == "" + + +def test_adapter_settings_wires_capture_hooks() -> None: + root = Path(__file__).resolve().parents[1] + settings = _json.loads( + (root / "adapters/claude-code/.claude/settings.json").read_text() + ) + hooks = settings["hooks"] + + def commands(event: str) -> list[str]: + out: list[str] = [] + for group in hooks.get(event, []): + for h in group.get("hooks", []): + out.append(h.get("command", "")) + return out + + assert any("capture observe" in c for c in commands("PostToolUse")) + assert any("capture finalize" in c for c in commands("SessionEnd")) + assert any("capture banner" in c for c in commands("SessionStart")) + + +def test_capture_finalize_all_cmd_with_old_buffers(tmp_path: Path, monkeypatch) -> None: + """CLI command should finalize old buffers and emit JSON.""" + import os + import time as time_mod + + store = _make_store(tmp_path) + current_sess = "current" + old_sess = "old-session" + + # Create old buffer + old_path = cap.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + observations = [ + '{"ts": 1.0, "tool": "Read", "summary": "test1"}', + '{"ts": 2.0, "tool": "Read", "summary": "test2"}', + '{"ts": 3.0, "tool": "Read", "summary": "test3"}', + ] + old_path.write_text("\n".join(observations) + "\n") + old_mtime = time_mod.time() - 7200 + os.utime(old_path, (old_mtime, old_mtime)) + + # Create current buffer + curr_path = cap.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + + # Run the CLI command + runner = CliRunner() + result = runner.invoke(cli, [ + "capture", "finalize-all", + "--session-id", current_sess, + "--max-age-seconds", "3600", + ], env={"VOUCH_KB_PATH": str(store.kb_dir)}) + + assert result.exit_code == 0 + output = _json.loads(result.output) + assert old_sess in output["finalized"] + assert current_sess in output["skipped_current"] + + +def test_capture_finalize_all_cmd_reads_session_from_env(tmp_path: Path, monkeypatch) -> None: + """CLI command should fall back to VOUCH_SESSION_ID env var.""" + store = _make_store(tmp_path) + current_sess = "from-env" + + # Create current session buffer + curr_path = cap.buffer_path(store, current_sess) + curr_path.parent.mkdir(parents=True, exist_ok=True) + curr_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + + runner = CliRunner() + result = runner.invoke(cli, [ + "capture", "finalize-all" + ], env={ + "VOUCH_KB_PATH": str(store.kb_dir), + "VOUCH_SESSION_ID": current_sess, + }) + + assert result.exit_code == 0 + output = _json.loads(result.output) + assert current_sess in output["skipped_current"] + + +def test_capture_finalize_all_cmd_silent_on_no_kb(tmp_path: Path, monkeypatch) -> None: + """CLI command should silently succeed if KB not found.""" + runner = CliRunner() + result = runner.invoke(cli, [ + "capture", "finalize-all", + "--session-id", "test", + ], env={"VOUCH_KB_PATH": str(tmp_path / "nonexistent")}) + + # Should exit 0, not fail + assert result.exit_code == 0 + + +def test_is_stale_buffer_with_recent_file(tmp_path): + """Recent file should not be stale.""" + import time as time_mod + f = tmp_path / "recent.jsonl" + f.write_text("test") + now = time_mod.time() + # File created 30 seconds ago; max_age=3600 + assert not cap.is_stale_buffer(f, max_age_seconds=3600, now_timestamp=now) + + +def test_is_stale_buffer_with_old_file(tmp_path): + """File older than max_age should be stale.""" + import os + import time as time_mod + f = tmp_path / "old.jsonl" + f.write_text("test") + old_time = time_mod.time() - 7200 # 2 hours ago + os.utime(f, (old_time, old_time)) # Set mtime to 2 hours ago + now = time_mod.time() + assert cap.is_stale_buffer(f, max_age_seconds=3600, now_timestamp=now) + + +def test_is_stale_buffer_with_exact_boundary(tmp_path): + """File at exact max_age boundary should not be stale (>=).""" + import os + import time as time_mod + f = tmp_path / "boundary.jsonl" + f.write_text("test") + exact_time = time_mod.time() - 3600 # Exactly 1 hour ago + os.utime(f, (exact_time, exact_time)) + now = exact_time + 3600 + assert not cap.is_stale_buffer(f, max_age_seconds=3600, now_timestamp=now) + + +def _make_store(tmp_path: Path) -> KBStore: + """Helper to create a KBStore for testing.""" + return KBStore.init(tmp_path) + + +def test_finalize_all_except_skips_current_session(tmp_path): + """Should not finalize the current session buffer.""" + store = _make_store(tmp_path) + sess_id = "current-session" + + # Create a current session buffer with observations + path = cap.buffer_path(store, sess_id) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + + result = cap.finalize_all_except( + store, sess_id, max_age_seconds=3600.0 + ) + + assert result["skipped_current"] == [sess_id] + assert path.exists() # Not removed + + +def test_finalize_all_except_finalizes_old_buffer(tmp_path): + """Should finalize buffers older than max_age, except current session.""" + import os + import time as time_mod + store = _make_store(tmp_path) + current_sess = "current" + old_sess = "old-session" + + # Create old buffer (2 hours old) + old_path = cap.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + old_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + old_mtime = time_mod.time() - 7200 + os.utime(old_path, (old_mtime, old_mtime)) + + # Create current buffer (recent) + curr_path = cap.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 2.0, "tool": "Write", "summary": "test2"}\n') + + result = cap.finalize_all_except( + store, current_sess, max_age_seconds=3600.0 + ) + + assert old_sess in result["finalized"] + assert current_sess in result["skipped_current"] + assert not old_path.exists() # Removed after finalize + assert curr_path.exists() # Current session untouched + + +def test_finalize_all_except_skips_recent_buffers(tmp_path): + """Should not finalize buffers younger than max_age.""" + import os + import time as time_mod + store = _make_store(tmp_path) + current_sess = "current" + recent_sess = "recent-other" + + # Create recent buffer (30 minutes old) + recent_path = cap.buffer_path(store, recent_sess) + recent_path.parent.mkdir(parents=True, exist_ok=True) + recent_path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + recent_mtime = time_mod.time() - 1800 + os.utime(recent_path, (recent_mtime, recent_mtime)) + + result = cap.finalize_all_except( + store, current_sess, max_age_seconds=3600.0 + ) + + assert recent_sess in result["skipped_recent"] + assert recent_path.exists() # Not removed + + +def test_finalize_all_except_multiple_buffers(tmp_path): + """Should handle multiple old and recent buffers correctly.""" + import os + import time as time_mod + store = _make_store(tmp_path) + current_sess = "current" + + # Create 3 old buffers, 2 recent buffers + old_sesses = ["old1", "old2", "old3"] + recent_sesses = ["recent1", "recent2"] + + now = time_mod.time() + for sid in old_sesses: + path = cap.buffer_path(store, sid) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text('{"ts": 1.0, "tool": "Read", "summary": "test"}\n') + old_mtime = now - 7200 # 2 hours ago + os.utime(path, (old_mtime, old_mtime)) + + for sid in recent_sesses: + path = cap.buffer_path(store, sid) + path.write_text('{"ts": 2.0, "tool": "Read", "summary": "test"}\n') + recent_mtime = now - 600 # 10 minutes ago + os.utime(path, (recent_mtime, recent_mtime)) + + # Create current session buffer + curr_path = cap.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 3.0, "tool": "Write", "summary": "test"}\n') + + result = cap.finalize_all_except( + store, current_sess, max_age_seconds=3600.0, now_timestamp=now + ) + + assert set(result["finalized"]) == set(old_sesses) + assert set(result["skipped_recent"]) == set(recent_sesses) + assert result["skipped_current"] == [current_sess] + + # Verify old buffers are removed, others exist + for sid in old_sesses: + assert not cap.buffer_path(store, sid).exists() + for sid in [*recent_sesses, current_sess]: + assert cap.buffer_path(store, sid).exists() + + +def test_finalize_all_except_empty_captures_dir(tmp_path): + """Should handle empty or missing captures directory gracefully.""" + store = _make_store(tmp_path) + result = cap.finalize_all_except( + store, "current-session", max_age_seconds=3600.0 + ) + + assert result["finalized"] == [] + assert result["skipped_recent"] == [] + assert result["skipped_current"] == [] + + +def test_finalize_all_except_returns_proposal_ids(tmp_path): + """finalize_all_except should return proposal IDs of finalized buffers.""" + import os + import time as time_mod + store = _make_store(tmp_path) + old_sess = "old-session" + current_sess = "current" + + # Create old buffer with enough observations + old_path = cap.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + observations = [ + '{"ts": 1.0, "tool": "Read", "summary": "test1"}', + '{"ts": 2.0, "tool": "Read", "summary": "test2"}', + '{"ts": 3.0, "tool": "Read", "summary": "test3"}', + ] + old_path.write_text("\n".join(observations) + "\n") + old_mtime = time_mod.time() - 7200 + os.utime(old_path, (old_mtime, old_mtime)) + + # Create current session buffer + curr_path = cap.buffer_path(store, current_sess) + curr_path.write_text('{"ts": 4.0, "tool": "Write", "summary": "test"}\n') + + result = cap.finalize_all_except( + store, current_sess, max_age_seconds=3600.0 + ) + + assert old_sess in result["finalized"] + # Verify a proposal was created + from vouch.models import ProposalStatus + pending = store.list_proposals(ProposalStatus.PENDING) + assert len(pending) > 0 + + +def test_capture_e2e_sessionstart_cleanup_then_finalize(tmp_path): + """End-to-end: old buffers cleaned up on sessionstart, current session on finalize.""" + import os + import time as time_mod + + store = _make_store(tmp_path) + + # Simulate a previous session that crashed/closed without finalize + old_sess = "crashed-session" + old_path = cap.buffer_path(store, old_sess) + old_path.parent.mkdir(parents=True, exist_ok=True) + observations = [ + '{"ts": 1.0, "tool": "Read", "summary": "test1"}', + '{"ts": 2.0, "tool": "Read", "summary": "test2"}', + '{"ts": 3.0, "tool": "Read", "summary": "test3"}', + ] + old_path.write_text("\n".join(observations) + "\n") + old_mtime = time_mod.time() - 7200 # 2 hours ago + os.utime(old_path, (old_mtime, old_mtime)) + + # Simulate a new session starting + new_sess = "new-session" + + # 1. SessionStart cleanup (finalize old buffers) + cleanup_result = cap.finalize_all_except( + store, new_sess, max_age_seconds=3600.0 + ) + assert old_sess in cleanup_result["finalized"] + assert not old_path.exists() + + # Verify old session was proposed + pending_before = store.list_proposals(ProposalStatus.PENDING) + old_proposals = [p for p in pending_before if p.session_id == old_sess] + assert len(old_proposals) == 1 + + # 2. SessionEnd finalize (current session) + new_path = cap.buffer_path(store, new_sess) + new_path.write_text("\n".join(observations) + "\n") + + finalize_result = cap.finalize(store, new_sess) + assert finalize_result["summary_proposal_id"] is not None + assert not new_path.exists() + + # Verify new session was proposed + pending_after = store.list_proposals(ProposalStatus.PENDING) + new_proposals = [p for p in pending_after if p.session_id == new_sess] + assert len(new_proposals) == 1 + + # Total proposals: old + new + assert len(pending_after) >= 2 diff --git a/tests/test_install_adapter.py b/tests/test_install_adapter.py index 882428dd..e299e396 100644 --- a/tests/test_install_adapter.py +++ b/tests/test_install_adapter.py @@ -113,6 +113,126 @@ def test_install_claude_code_is_idempotent(tmp_path: Path) -> None: } +def test_settings_json_merges_into_existing(tmp_path: Path) -> None: + """User already has .claude/settings.json — vouch merges its hooks and + permission allowlist in without clobbering the user's content.""" + settings_dir = tmp_path / ".claude" + settings_dir.mkdir() + (settings_dir / "settings.json").write_text(json.dumps({ + "permissions": {"allow": ["Bash(ls:*)"]}, + "hooks": { + "SessionStart": [ + {"matcher": "*", "hooks": [{"type": "command", "command": "my-own-hook"}]} + ] + }, + })) + result = install("claude-code", target=tmp_path, tier="T4") + merged = json.loads((settings_dir / "settings.json").read_text()) + + # user content preserved + assert "Bash(ls:*)" in merged["permissions"]["allow"] + start_cmds = [h["command"] for g in merged["hooks"]["SessionStart"] for h in g["hooks"]] + assert "my-own-hook" in start_cmds + + # vouch content merged in + assert "mcp__vouch__kb_status" in merged["permissions"]["allow"] + assert any("capture banner" in c for c in start_cmds) + post = [h["command"] for g in merged["hooks"].get("PostToolUse", []) for h in g["hooks"]] + end = [h["command"] for g in merged["hooks"].get("SessionEnd", []) for h in g["hooks"]] + assert any("capture observe" in c for c in post) + assert any("capture finalize" in c for c in end) + + assert ".claude/settings.json" in result.merged + assert ".claude/settings.json" not in result.skipped + assert ".claude/settings.json" not in result.written + + +def test_settings_json_merge_is_idempotent(tmp_path: Path) -> None: + (tmp_path / ".claude").mkdir() + (tmp_path / ".claude" / "settings.json").write_text(json.dumps({"hooks": {}})) + install("claude-code", target=tmp_path, tier="T4") + first = (tmp_path / ".claude" / "settings.json").read_text() + second = install("claude-code", target=tmp_path, tier="T4") + after = (tmp_path / ".claude" / "settings.json").read_text() + + assert first == after # no change on re-run + assert ".claude/settings.json" in second.skipped + assert ".claude/settings.json" not in second.merged + + data = json.loads(after) + observe_cmds = [ + h["command"] + for g in data["hooks"]["PostToolUse"] + for h in g["hooks"] + if "capture observe" in h["command"] + ] + assert len(observe_cmds) == 1 # not duplicated + + +def test_settings_json_written_fresh_when_absent(tmp_path: Path) -> None: + result = install("claude-code", target=tmp_path, tier="T4") + assert ".claude/settings.json" in result.written + assert ".claude/settings.json" not in result.merged + + +def test_settings_json_malformed_existing_is_skipped(tmp_path: Path) -> None: + (tmp_path / ".claude").mkdir() + (tmp_path / ".claude" / "settings.json").write_text("{ not valid json ") + before = (tmp_path / ".claude" / "settings.json").read_text() + result = install("claude-code", target=tmp_path, tier="T4") + # unreadable user file is left untouched, not clobbered + assert (tmp_path / ".claude" / "settings.json").read_text() == before + assert ".claude/settings.json" in result.skipped + assert ".claude/settings.json" not in result.merged + + +def test_settings_json_non_object_existing_is_skipped(tmp_path: Path) -> None: + (tmp_path / ".claude").mkdir() + (tmp_path / ".claude" / "settings.json").write_text("[1, 2, 3]") + result = install("claude-code", target=tmp_path, tier="T4") + assert ".claude/settings.json" in result.skipped + + +def test_merge_settings_coerces_non_dict_fields() -> None: + from vouch.install_adapter import _merge_settings + + dst = {"permissions": "oops", "hooks": "also-oops"} + src = { + "permissions": {"allow": ["mcp__vouch__kb_status"]}, + "hooks": {"PostToolUse": [ + {"matcher": "*", "hooks": [{"type": "command", "command": "vouch capture observe"}]} + ]}, + } + changed = _merge_settings(src, dst) + assert changed is True + assert "mcp__vouch__kb_status" in dst["permissions"]["allow"] + cmds = [h["command"] for g in dst["hooks"]["PostToolUse"] for h in g["hooks"]] + assert "vouch capture observe" in cmds + + +def test_merge_settings_ignores_malformed_src_groups() -> None: + from vouch.install_adapter import _merge_settings + + dst: dict = {} + # a non-list event value and a non-dict group are both skipped defensively + src = {"hooks": {"BadEvent": "not-a-list", "PostToolUse": ["not-a-dict"]}} + assert _merge_settings(src, dst) is False # nothing addable → no change + + +def test_merge_settings_new_matcher_group_when_none_matches() -> None: + from vouch.install_adapter import _merge_settings + + dst = {"hooks": {"PostToolUse": [ + {"matcher": "Edit", "hooks": [{"type": "command", "command": "user-hook"}]} + ]}} + src = {"hooks": {"PostToolUse": [ + {"matcher": "*", "hooks": [{"type": "command", "command": "vouch capture observe"}]} + ]}} + assert _merge_settings(src, dst) is True + matchers = [g.get("matcher") for g in dst["hooks"]["PostToolUse"]] + assert "Edit" in matchers and "*" in matchers # user group kept, ours added + + def test_install_claude_md_appends_when_existing_unfenced(tmp_path: Path) -> None: """User has their own CLAUDE.md — our snippet appends inside a fence so their content is untouched and we can detect ourselves on re-install.""" diff --git a/tests/test_recall.py b/tests/test_recall.py new file mode 100644 index 00000000..a08a02ce --- /dev/null +++ b/tests/test_recall.py @@ -0,0 +1,136 @@ +"""Session-start recall digest — inject approved knowledge into new sessions.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from vouch import recall +from vouch.models import ClaimStatus +from vouch.proposals import approve, propose_claim, propose_page +from vouch.storage import KBStore, _starter_config + + +@pytest.fixture +def store(tmp_path: Path) -> KBStore: + return KBStore.init(tmp_path) + + +def _approve_claim(store: KBStore, text: str): + src = store.put_source(b"evidence") + pr = propose_claim(store, text=text, evidence=[src.id], proposed_by="a") + return approve(store, pr.id, approved_by="u") + + +def _approve_page(store: KBStore, title: str): + pr = propose_page(store, title=title, body="body", proposed_by="a") + return approve(store, pr.id, approved_by="u") + + +def test_digest_includes_approved_claim_and_page(store: KBStore) -> None: + _approve_claim(store, "JWT chosen over sessions") + _approve_page(store, "auth design") + d = recall.build_digest(store) + assert "" in d + assert "JWT chosen over sessions" in d + assert "auth design" in d + + +def test_digest_excludes_retracted_claims(store: KBStore) -> None: + _approve_claim(store, "live fact") + archived = _approve_claim(store, "archived fact") + archived.status = ClaimStatus.ARCHIVED + store.update_claim(archived) + d = recall.build_digest(store) + assert "live fact" in d + assert "archived fact" not in d + + +def test_empty_kb_digest_is_empty(store: KBStore) -> None: + assert recall.build_digest(store) == "" + + +def test_digest_truncates_with_notice(store: KBStore) -> None: + for i in range(40): + _approve_claim(store, f"fact number {i} " + "x" * 80) + d = recall.build_digest(store, max_chars=600) + assert len(d) <= 800 + assert "truncated" in d.lower() + + +def test_load_config_defaults(store: KBStore) -> None: + cfg = recall.load_config(store) + assert cfg.enabled is True + assert cfg.max_chars == recall.DEFAULT_MAX_CHARS + + +def test_load_config_override(store: KBStore) -> None: + store.config_path.write_text( + "recall:\n enabled: false\n max_chars: 500\n", encoding="utf-8" + ) + cfg = recall.load_config(store) + assert cfg.enabled is False + assert cfg.max_chars == 500 + + +def test_starter_config_has_recall_namespace() -> None: + assert _starter_config()["recall"]["enabled"] is True + + +def test_load_config_malformed_yaml_falls_back(store: KBStore) -> None: + store.config_path.write_text("recall: [unclosed\n", encoding="utf-8") + assert recall.load_config(store).enabled is True + + +def test_load_config_non_dict_yaml_falls_back(store: KBStore) -> None: + store.config_path.write_text("plain string\n", encoding="utf-8") + assert recall.load_config(store).max_chars == recall.DEFAULT_MAX_CHARS + + +def test_load_config_recall_not_a_mapping(store: KBStore) -> None: + store.config_path.write_text("recall: 7\n", encoding="utf-8") + assert recall.load_config(store).enabled is True + + +def test_cli_recall_emits_digest(store: KBStore) -> None: + from click.testing import CliRunner + + from vouch.cli import cli + + _approve_claim(store, "prefer ruff over flake8") + res = CliRunner().invoke( + cli, ["recall"], env={"VOUCH_KB_PATH": str(store.kb_dir)} + ) + assert res.exit_code == 0 + assert "prefer ruff over flake8" in res.output + assert "" in res.output + + +def test_cli_recall_silent_when_disabled(store: KBStore) -> None: + from click.testing import CliRunner + + from vouch.cli import cli + + _approve_claim(store, "some fact") + store.config_path.write_text("recall:\n enabled: false\n", encoding="utf-8") + res = CliRunner().invoke( + cli, ["recall"], env={"VOUCH_KB_PATH": str(store.kb_dir)} + ) + assert res.exit_code == 0 + assert res.output.strip() == "" + + +def test_adapter_sessionstart_runs_recall() -> None: + import json as _json + + root = Path(__file__).resolve().parents[1] + settings = _json.loads( + (root / "adapters/claude-code/.claude/settings.json").read_text() + ) + cmds = [ + h.get("command", "") + for g in settings["hooks"]["SessionStart"] + for h in g.get("hooks", []) + ] + assert any("vouch recall" in c for c in cmds) diff --git a/tests/test_storage.py b/tests/test_storage.py index ce1d643f..03ce0fba 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -831,3 +831,32 @@ def test_cite_resolves_source_and_evidence(store: KBStore) -> None: for c in citations } assert "source" in kinds and "evidence" in kinds + + +# --- resilience: a single corrupt file must not break bulk listing -------- + + +def test_list_proposals_skips_unreadable_file(store: KBStore) -> None: + """One unparseable proposal file must not take down `vouch pending`.""" + src = store.put_source(b"evidence") + good = propose_claim(store, text="good", evidence=[src.id], + proposed_by="agent") + + # a raw U+0080 (C1 control byte) — pyyaml's loader rejects it even though + # its dumper would have escaped it. mirrors a hand-edited / mojibake file. + corrupt = store.kb_dir / "proposed" / "20990101-000000-corrupt.yaml" + corrupt.write_bytes(b"text: bad\xc2\x80value\n") + + pending = store.list_proposals(ProposalStatus.PENDING) + assert [p.id for p in pending] == [good.id] + + +def test_list_claims_skips_unreadable_file(store: KBStore) -> None: + """Same resilience for durable claim listing (vouch search/status).""" + src = store.put_source(b"e") + store.put_claim(Claim(id="c-ok", text="x", evidence=[src.id])) + (store.kb_dir / "claims" / "c-bad.yaml").write_bytes( + b"text: bad\xc2\x80value\n") + + claims = store.list_claims() + assert [c.id for c in claims] == ["c-ok"] diff --git a/tests/test_themes.py b/tests/test_themes.py new file mode 100644 index 00000000..5035308f --- /dev/null +++ b/tests/test_themes.py @@ -0,0 +1,203 @@ +"""Cross-session pattern detection — detect_themes + propose_theme.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from vouch import sessions as sess_mod +from vouch import themes +from vouch.proposals import ProposalError, approve, propose_claim, propose_entity +from vouch.storage import KBStore + + +@pytest.fixture +def store(tmp_path: Path) -> KBStore: + return KBStore.init(tmp_path) + + +def _seed_multi_session_claims(store: KBStore) -> dict: + """Create two sessions with overlapping entity claims.""" + src = store.put_source(b"evidence-material") + + # Register entities so propose_page can reference them. + e1 = propose_entity( + store, name="auth", entity_type="concept", + proposed_by="setup", slug_hint="auth", + ) + e2 = propose_entity( + store, name="jwt", entity_type="concept", + proposed_by="setup", slug_hint="jwt", + ) + e3 = propose_entity( + store, name="session-mgmt", entity_type="concept", + proposed_by="setup", slug_hint="session-mgmt", + ) + approve(store, e1.id, approved_by="human") + approve(store, e2.id, approved_by="human") + approve(store, e3.id, approved_by="human") + + # Session 1: claims mentioning auth + jwt. + s1 = sess_mod.session_start(store, agent="agent-a", task="review auth") + c1 = propose_claim( + store, text="auth uses jwt for token validation", + evidence=[src.id], proposed_by="agent-a", + entities=["auth", "jwt"], session_id=s1.id, + ) + c2 = propose_claim( + store, text="jwt tokens expire after 1 hour", + evidence=[src.id], proposed_by="agent-a", + entities=["auth", "jwt"], session_id=s1.id, + slug_hint="jwt-expiry", + ) + approve(store, c1.id, approved_by="human") + approve(store, c2.id, approved_by="human") + sess_mod.session_end(store, s1.id) + + # Session 2: claims also mentioning auth + jwt + session-mgmt. + s2 = sess_mod.session_start(store, agent="agent-b", task="review sessions") + c3 = propose_claim( + store, text="auth middleware validates jwt on every request", + evidence=[src.id], proposed_by="agent-b", + entities=["auth", "jwt"], session_id=s2.id, + slug_hint="auth-middleware-jwt", + ) + c4 = propose_claim( + store, text="session management depends on auth and jwt", + evidence=[src.id], proposed_by="agent-b", + entities=["auth", "jwt", "session-mgmt"], session_id=s2.id, + slug_hint="session-depends-auth", + ) + approve(store, c3.id, approved_by="human") + approve(store, c4.id, approved_by="human") + sess_mod.session_end(store, s2.id) + + return {"sessions": [s1.id, s2.id], "source": src.id} + + +def test_detect_themes_finds_clusters(store: KBStore) -> None: + _seed_multi_session_claims(store) + result = themes.detect_themes(store, min_sessions=2, min_claims=2) + assert len(result.clusters) > 0 + # The auth+jwt pair should be the strongest cluster. + top = result.clusters[0] + assert "auth" in top.entities + assert "jwt" in top.entities + assert top.session_count >= 2 + assert top.claim_count >= 2 + assert top.score > 0 + + +def test_detect_themes_respects_min_sessions(store: KBStore) -> None: + _seed_multi_session_claims(store) + result = themes.detect_themes(store, min_sessions=10, min_claims=1) + assert len(result.clusters) == 0 + + +def test_detect_themes_respects_min_claims(store: KBStore) -> None: + _seed_multi_session_claims(store) + result = themes.detect_themes(store, min_sessions=1, min_claims=100) + assert len(result.clusters) == 0 + + +def test_detect_themes_read_only(store: KBStore) -> None: + """detect_themes must not create any proposals or pages.""" + _seed_multi_session_claims(store) + pages_before = len(store.list_pages()) + proposals_before = len(store.list_proposals()) + themes.detect_themes(store, min_sessions=2, min_claims=2) + assert len(store.list_pages()) == pages_before + assert len(store.list_proposals()) == proposals_before + + +def test_detect_themes_excludes_archived(store: KBStore) -> None: + """Archived claims should not contribute to theme detection.""" + from vouch import lifecycle as life + + _seed_multi_session_claims(store) + # Archive all claims — themes should vanish. + for claim in store.list_claims(): + life.archive(store, claim_id=claim.id, actor="human") + result = themes.detect_themes(store, min_sessions=1, min_claims=1) + assert len(result.clusters) == 0 + + +def test_detect_themes_disabled_config(store: KBStore) -> None: + """When themes.enabled=false in config, returns empty.""" + import yaml + + _seed_multi_session_claims(store) + cfg = yaml.safe_load(store.config_path.read_text()) or {} + cfg["themes"] = {"enabled": False} + store.config_path.write_text(yaml.dump(cfg)) + result = themes.detect_themes(store, min_sessions=1, min_claims=1) + assert len(result.clusters) == 0 + assert result.config_used.get("enabled") is False + + +def test_propose_theme(store: KBStore) -> None: + _seed_multi_session_claims(store) + result = themes.detect_themes(store, min_sessions=2, min_claims=2) + assert len(result.clusters) > 0 + cluster = result.clusters[0] + proposal_result = themes.propose_theme( + store, cluster, proposed_by="theme-agent", + ) + assert "proposal_id" in proposal_result + assert proposal_result["claim_count"] >= 2 + # The proposal should appear in pending. + pending = store.list_proposals() + theme_proposals = [ + p for p in pending + if p.kind.value == "page" and p.payload.get("type") == "theme" + ] + assert len(theme_proposals) == 1 + + +def test_propose_theme_dedup(store: KBStore) -> None: + """Proposing the same cluster twice should deduplicate on detect.""" + _seed_multi_session_claims(store) + result = themes.detect_themes(store, min_sessions=2, min_claims=2) + cluster = result.clusters[0] + themes.propose_theme(store, cluster, proposed_by="agent") + + # Detect again — already-proposed themes should be excluded. + result2 = themes.detect_themes(store, min_sessions=2, min_claims=2) + matching = [ + c for c in result2.clusters + if set(c.entities) == set(cluster.entities) + ] + assert len(matching) == 0 + + +def test_propose_theme_validates_claims(store: KBStore) -> None: + """Proposing with no valid claims should raise.""" + cluster = themes.ThemeCluster( + entities=["nonexistent-entity"], + claim_ids=["nonexistent-claim"], + session_ids=["sess-fake"], + score=1.0, + session_count=1, + claim_count=1, + ) + with pytest.raises(ProposalError): + themes.propose_theme(store, cluster, proposed_by="agent") + + +def test_detect_themes_deterministic(store: KBStore) -> None: + """Running detect_themes twice should produce identical results.""" + _seed_multi_session_claims(store) + r1 = themes.detect_themes(store, min_sessions=2, min_claims=2) + r2 = themes.detect_themes(store, min_sessions=2, min_claims=2) + assert len(r1.clusters) == len(r2.clusters) + for c1, c2 in zip(r1.clusters, r2.clusters, strict=True): + assert c1.entities == c2.entities + assert c1.score == c2.score + assert c1.claim_ids == c2.claim_ids + + +def test_detect_themes_top_k(store: KBStore) -> None: + _seed_multi_session_claims(store) + result = themes.detect_themes(store, min_sessions=1, min_claims=1, top_k=1) + assert len(result.clusters) <= 1