From a4e0c18f330200c9d54922ddccb76ed9a50fb83c Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 17:05:48 +0000 Subject: [PATCH 1/7] feat(analysis): add runChangePack diff-scoped core Generalizes computeVerdict's internal diff->upstream fan-out but RETAINS the impacted subgraph instead of collapsing it to a scalar blastRadius, and surfaces the affected tests that runImpact classifies-then-drops today. runChangePack(store, query) composes four sections: - impacted subgraph: union of per-changed-symbol runImpact(upstream) fan-outs, deduped (nodes by id keeping min depth, edges by from/type/to), capped at 5000 nodes deterministically; production-only by default, includeTestsInSubgraph retains tests. - verdict: delegates to computeVerdict verbatim. - affected tests: upstream reachability filtered by isTestPath, each carrying reachedFromSymbol + depth, sorted (filePath, id). - cost attribution: char-heuristic token estimate (len/4; the repo ships no real tokenizer, tokenizerId is provenance-only) for the pack body vs a blind baseline summing every impacted file; self-labeled estimate:true with tokenizer-model char-heuristic-v1. Never claims model tokens. Deterministic: canonical-JSON, all collections pre-sorted, content hash over the placeholder-blanked envelope. Reads the graph only (no re-derived edges, no mutation); no LLM. 11 unit + determinism tests. --- packages/analysis/src/change-pack-types.ts | 119 +++++ packages/analysis/src/change-pack.test.ts | 454 +++++++++++++++++++ packages/analysis/src/change-pack.ts | 495 +++++++++++++++++++++ packages/analysis/src/index.ts | 16 + 4 files changed, 1084 insertions(+) create mode 100644 packages/analysis/src/change-pack-types.ts create mode 100644 packages/analysis/src/change-pack.test.ts create mode 100644 packages/analysis/src/change-pack.ts diff --git a/packages/analysis/src/change-pack-types.ts b/packages/analysis/src/change-pack-types.ts new file mode 100644 index 00000000..899e731f --- /dev/null +++ b/packages/analysis/src/change-pack-types.ts @@ -0,0 +1,119 @@ +/** + * Public types for the diff-scoped change-pack capability. + * + * A change-pack is a deterministic, diff-scoped object with four sections: + * the impacted subgraph (the union of per-symbol upstream fan-outs, retained + * rather than collapsed to a scalar), the composite verdict, the affected + * tests (which impact analysis classifies-then-drops today), and a + * cost-attribution estimate. Every field is readonly so results can cross + * serialization boundaries without defensive copying. + */ + +import type { VerdictResponse } from "./verdict-types.js"; + +/** Input options accepted by `runChangePack`. */ +export interface ChangePackQuery { + readonly repoPath: string; + /** Base git ref (default: "main"). */ + readonly base?: string; + /** Head git ref (default: "HEAD"). */ + readonly head?: string; + /** Upstream traversal depth cap (default: 4). */ + readonly depth?: number; + /** Traversal confidence floor (default: 0.7). */ + readonly minConfidence?: number; + /** + * Context budget in heuristic tokens. Recorded in the hashed envelope for + * provenance; v1 does not enforce trimming (default: 100_000). + */ + readonly budget?: number; + /** + * When false (default), the impacted subgraph reflects production code only + * — test-path nodes surface exclusively in `affectedTests`, matching the + * verdict's production-only blast radius. When true, tests are also + * retained in the subgraph. + */ + readonly includeTestsInSubgraph?: boolean; +} + +/** A changed symbol resolved from the diff. */ +export interface ChangedSymbol { + readonly id: string; + readonly name: string; + readonly filePath: string; + readonly kind: string; +} + +/** + * One node in the impacted subgraph. `minDepth` is the shallowest depth at + * which this node was reached across every per-symbol upstream fan-out. + */ +export interface ImpactedSubgraphNode { + readonly id: string; + readonly name: string; + readonly filePath: string; + readonly kind: string; + readonly minDepth: number; +} + +/** One edge in the impacted subgraph (deduplicated by `from`/`type`/`to`). */ +export interface ImpactedSubgraphEdge { + readonly fromId: string; + readonly toId: string; + readonly type: string; + readonly confidence: number; +} + +/** + * The retained impacted subgraph: the union of every per-symbol upstream + * fan-out, deduplicated. `truncated` is true when the node set was capped at + * the hard ceiling. + */ +export interface ImpactedSubgraph { + readonly nodes: readonly ImpactedSubgraphNode[]; + readonly edges: readonly ImpactedSubgraphEdge[]; + readonly nodeCount: number; + readonly edgeCount: number; + readonly truncated: boolean; +} + +/** One test reached by an upstream fan-out from a changed symbol. */ +export interface AffectedTest { + readonly id: string; + readonly name: string; + readonly filePath: string; + /** Id of the changed symbol this test was first reached from. */ + readonly reachedFromSymbol: string; + /** Shallowest depth at which the test was reached. */ + readonly depth: number; +} + +/** + * Cost attribution for the change-pack. All token figures are estimates from + * a character heuristic, never model-tokenizer counts — `estimate` is always + * true and `tokenizerModel` self-labels the basis. + */ +export interface CostAttribution { + readonly estimate: true; + readonly tokenizerModel: "char-heuristic-v1"; + /** Heuristic tokens for the change-pack context body the agent consumes. */ + readonly changePackTokens: number; + /** Heuristic tokens an agent would read by opening every impacted file blind. */ + readonly blindBaselineTokens: number; + readonly tokensSaved: number; + readonly tokensSavedPct: number; + readonly affectedTestCount: number; + readonly totalTestCount: number; + readonly ciTestsSkipped: number; +} + +/** The full diff-scoped change-pack. */ +export interface ChangePack { + readonly changedFiles: readonly string[]; + readonly changedSymbols: readonly ChangedSymbol[]; + readonly impactedSubgraph: ImpactedSubgraph; + readonly verdict: VerdictResponse; + readonly affectedTests: readonly AffectedTest[]; + readonly costAttribution: CostAttribution; + readonly changePackHash: string; +} diff --git a/packages/analysis/src/change-pack.test.ts b/packages/analysis/src/change-pack.test.ts new file mode 100644 index 00000000..107f2ca6 --- /dev/null +++ b/packages/analysis/src/change-pack.test.ts @@ -0,0 +1,454 @@ +import assert from "node:assert/strict"; +import { test } from "node:test"; +import { canonicalJson } from "@opencodehub/core-types"; +import type { IGraphStore } from "@opencodehub/storage"; +import { type ChangePackInternal, charHeuristicTokens, runChangePack } from "./change-pack.js"; +import { FakeStore } from "./test-utils.js"; +import type { DetectChangesResult } from "./types.js"; +import type { VerdictQuery, VerdictResponse } from "./verdict-types.js"; + +// --------------------------------------------------------------------------- +// Hermetic seams. runChangePack composes runDetectChanges + computeVerdict, +// both of which shell out to git. The internal seams let the suite inject a +// canned diff + verdict and an in-memory file reader so nothing spawns git or +// touches disk. +// --------------------------------------------------------------------------- + +function stubVerdict(): VerdictResponse { + return { + verdict: "single_review", + confidence: 0.85, + decisionBoundary: { distancePercent: 50, nextTier: "dual_review" }, + reasoningChain: [], + recommendedReviewers: [], + githubLabels: ["review:single"], + reviewCommentMarkdown: "", + exitCode: 0, + blastRadius: 3, + communitiesTouched: [], + changedFileCount: 1, + changedFiles: ["src/a.ts"], + affectedSymbolCount: 1, + }; +} + +function makeInternal( + changes: DetectChangesResult, + files: Readonly> = {}, + verdict: VerdictResponse = stubVerdict(), +): ChangePackInternal { + return { + detectChanges: () => Promise.resolve(changes), + computeVerdict: (_store: IGraphStore, _q: VerdictQuery) => Promise.resolve(verdict), + readFileText: (absPath: string) => { + const v = files[absPath]; + if (v === undefined) return Promise.reject(new Error(`ENOENT: ${absPath}`)); + return Promise.resolve(v); + }, + }; +} + +function detect(symbols: DetectChangesResult["affectedSymbols"]): DetectChangesResult { + const fileSet = new Set(); + for (const s of symbols) fileSet.add(s.filePath); + const changedFiles = [...fileSet].sort(); + return { + changedFiles, + affectedSymbols: symbols, + affectedProcesses: [], + summary: { + fileCount: changedFiles.length, + symbolCount: symbols.length, + processCount: 0, + risk: "LOW", + }, + }; +} + +/** + * Fixture: one changed symbol `foo` in src/a.ts, with three upstream callers: + * a production caller `bar` (src/b.ts), a test `foo.test.ts:itFoo`, and a + * deeper production caller `baz` reached via `bar`. + * + * bar --CALLS--> foo (bar is depth-1 upstream of foo) + * itFoo --CALLS--> foo (test, depth-1) + * baz --CALLS--> bar (baz is depth-2 upstream of foo) + */ +function fooFixture(): FakeStore { + const store = new FakeStore(); + store.addNode({ + id: "Function:src/a.ts:foo#0", + kind: "Function", + name: "foo", + filePath: "src/a.ts", + }); + store.addNode({ + id: "Function:src/b.ts:bar#0", + kind: "Function", + name: "bar", + filePath: "src/b.ts", + }); + store.addNode({ + id: "Function:src/c.ts:baz#0", + kind: "Function", + name: "baz", + filePath: "src/c.ts", + }); + store.addNode({ + id: "Function:src/foo.test.ts:itFoo#0", + kind: "Function", + name: "itFoo", + filePath: "src/foo.test.ts", + }); + // File nodes back the suite-size count (totalTestCount = test-path File nodes). + store.addNode({ id: "File:src/a.ts:src/a.ts", kind: "File", name: "a.ts", filePath: "src/a.ts" }); + store.addNode({ id: "File:src/b.ts:src/b.ts", kind: "File", name: "b.ts", filePath: "src/b.ts" }); + store.addNode({ id: "File:src/c.ts:src/c.ts", kind: "File", name: "c.ts", filePath: "src/c.ts" }); + store.addNode({ + id: "File:src/foo.test.ts:src/foo.test.ts", + kind: "File", + name: "foo.test.ts", + filePath: "src/foo.test.ts", + }); + store.addEdge({ + fromId: "Function:src/b.ts:bar#0", + toId: "Function:src/a.ts:foo#0", + type: "CALLS", + confidence: 1.0, + }); + store.addEdge({ + fromId: "Function:src/foo.test.ts:itFoo#0", + toId: "Function:src/a.ts:foo#0", + type: "CALLS", + confidence: 1.0, + }); + store.addEdge({ + fromId: "Function:src/c.ts:baz#0", + toId: "Function:src/b.ts:bar#0", + type: "CALLS", + confidence: 1.0, + }); + return store; +} + +const FOO_CHANGE: DetectChangesResult["affectedSymbols"] = [ + { + id: "Function:src/a.ts:foo#0", + name: "foo", + filePath: "src/a.ts", + kind: "Function", + changedLines: [1, 2], + }, +]; + +// --------------------------------------------------------------------------- +// Subgraph union + dedup + minDepth +// --------------------------------------------------------------------------- + +test("runChangePack: impacted subgraph unions upstream fan-out, excludes tests by default", async () => { + const store = fooFixture(); + const pack = await runChangePack(store, { repoPath: "/repo" }, makeInternal(detect(FOO_CHANGE))); + + const ids = pack.impactedSubgraph.nodes.map((n) => n.id); + // Production callers retained; the test node is excluded from the subgraph. + assert.deepEqual(ids, ["Function:src/b.ts:bar#0", "Function:src/c.ts:baz#0"]); + assert.equal(pack.impactedSubgraph.nodeCount, 2); + assert.equal(pack.impactedSubgraph.truncated, false); + + const bar = pack.impactedSubgraph.nodes.find((n) => n.id === "Function:src/b.ts:bar#0"); + const baz = pack.impactedSubgraph.nodes.find((n) => n.id === "Function:src/c.ts:baz#0"); + assert.equal(bar?.minDepth, 1, "bar is a direct upstream caller"); + assert.equal(baz?.minDepth, 2, "baz reaches foo through bar"); + + // Edges retained only between surviving production nodes / the root symbol. + // bar→foo (foo is the changed root) and baz→bar both survive; itFoo→foo drops. + const edgeKeys = pack.impactedSubgraph.edges.map((e) => `${e.fromId}|${e.type}|${e.toId}`); + assert.ok(edgeKeys.includes("Function:src/b.ts:bar#0|CALLS|Function:src/a.ts:foo#0")); + assert.ok(edgeKeys.includes("Function:src/c.ts:baz#0|CALLS|Function:src/b.ts:bar#0")); + assert.ok( + !edgeKeys.some((k) => k.includes("foo.test.ts")), + "test-incident edges must be dropped from the subgraph", + ); + assert.equal(pack.impactedSubgraph.edgeCount, pack.impactedSubgraph.edges.length); +}); + +test("runChangePack: includeTestsInSubgraph retains test nodes + edges", async () => { + const store = fooFixture(); + const pack = await runChangePack( + store, + { repoPath: "/repo", includeTestsInSubgraph: true }, + makeInternal(detect(FOO_CHANGE)), + ); + const ids = pack.impactedSubgraph.nodes.map((n) => n.id); + assert.ok(ids.includes("Function:src/foo.test.ts:itFoo#0"), "test node retained when opted in"); + const edgeKeys = pack.impactedSubgraph.edges.map((e) => `${e.fromId}|${e.type}|${e.toId}`); + assert.ok( + edgeKeys.includes("Function:src/foo.test.ts:itFoo#0|CALLS|Function:src/a.ts:foo#0"), + "test-incident edge retained when opted in", + ); +}); + +test("runChangePack: dedup keeps min depth across two changed symbols", async () => { + // Two changed symbols foo + bar both upstream-reach baz at different depths. + const store = new FakeStore(); + store.addNode({ + id: "Function:src/a.ts:foo#0", + kind: "Function", + name: "foo", + filePath: "src/a.ts", + }); + store.addNode({ + id: "Function:src/b.ts:bar#0", + kind: "Function", + name: "bar", + filePath: "src/b.ts", + }); + store.addNode({ + id: "Function:src/c.ts:baz#0", + kind: "Function", + name: "baz", + filePath: "src/c.ts", + }); + // baz calls bar (depth-1 from bar) and baz calls foo through bar (depth-2 from foo). + store.addEdge({ + fromId: "Function:src/c.ts:baz#0", + toId: "Function:src/b.ts:bar#0", + type: "CALLS", + confidence: 1.0, + }); + store.addEdge({ + fromId: "Function:src/b.ts:bar#0", + toId: "Function:src/a.ts:foo#0", + type: "CALLS", + confidence: 1.0, + }); + + const symbols: DetectChangesResult["affectedSymbols"] = [ + { + id: "Function:src/a.ts:foo#0", + name: "foo", + filePath: "src/a.ts", + kind: "Function", + changedLines: [1], + }, + { + id: "Function:src/b.ts:bar#0", + name: "bar", + filePath: "src/b.ts", + kind: "Function", + changedLines: [1], + }, + ]; + const pack = await runChangePack(store, { repoPath: "/repo" }, makeInternal(detect(symbols))); + const baz = pack.impactedSubgraph.nodes.find((n) => n.id === "Function:src/c.ts:baz#0"); + assert.ok(baz, "baz must be in the subgraph"); + assert.equal(baz.minDepth, 1, "baz reached at depth-1 from bar wins over depth-2 from foo"); +}); + +// --------------------------------------------------------------------------- +// Affected-test selection +// --------------------------------------------------------------------------- + +test("runChangePack: affected tests = upstream isTestPath hits with reachedFromSymbol + depth", async () => { + const store = fooFixture(); + const pack = await runChangePack(store, { repoPath: "/repo" }, makeInternal(detect(FOO_CHANGE))); + + assert.equal(pack.affectedTests.length, 1); + const t = pack.affectedTests[0]; + assert.ok(t); + assert.equal(t.id, "Function:src/foo.test.ts:itFoo#0"); + assert.equal(t.filePath, "src/foo.test.ts"); + assert.equal(t.reachedFromSymbol, "Function:src/a.ts:foo#0"); + assert.equal(t.depth, 1); +}); + +test("runChangePack: affected tests sorted by (filePath, id), deduped by id", async () => { + const store = new FakeStore(); + store.addNode({ + id: "Function:src/a.ts:foo#0", + kind: "Function", + name: "foo", + filePath: "src/a.ts", + }); + // Two tests in two files; the second sorts before the first by filePath. + store.addNode({ + id: "Function:tests/z.spec.ts:t1#0", + kind: "Function", + name: "t1", + filePath: "tests/z.spec.ts", + }); + store.addNode({ + id: "Function:tests/a.spec.ts:t2#0", + kind: "Function", + name: "t2", + filePath: "tests/a.spec.ts", + }); + store.addEdge({ + fromId: "Function:tests/z.spec.ts:t1#0", + toId: "Function:src/a.ts:foo#0", + type: "CALLS", + confidence: 1.0, + }); + store.addEdge({ + fromId: "Function:tests/a.spec.ts:t2#0", + toId: "Function:src/a.ts:foo#0", + type: "CALLS", + confidence: 1.0, + }); + + const pack = await runChangePack(store, { repoPath: "/repo" }, makeInternal(detect(FOO_CHANGE))); + const paths = pack.affectedTests.map((t) => t.filePath); + assert.deepEqual(paths, ["tests/a.spec.ts", "tests/z.spec.ts"], "sorted by filePath asc"); +}); + +// --------------------------------------------------------------------------- +// Cost attribution +// --------------------------------------------------------------------------- + +test("charHeuristicTokens: max(1, ceil(len/4))", () => { + assert.equal(charHeuristicTokens(""), 1); + assert.equal(charHeuristicTokens("abc"), 1); + assert.equal(charHeuristicTokens("abcd"), 1); + assert.equal(charHeuristicTokens("abcde"), 2); + assert.equal(charHeuristicTokens("a".repeat(40)), 10); +}); + +test("runChangePack: cost attribution computes baseline, savings, pct, ci skip", async () => { + const store = fooFixture(); + // Give the two retained production files large bodies so the blind baseline + // dwarfs the scoped pack and savings are positive. + const bigBody = "x".repeat(8000); + const files = { + "/repo/src/b.ts": bigBody, + "/repo/src/c.ts": bigBody, + }; + const pack = await runChangePack( + store, + { repoPath: "/repo" }, + makeInternal(detect(FOO_CHANGE), files), + ); + + const cost = pack.costAttribution; + assert.equal(cost.estimate, true); + assert.equal(cost.tokenizerModel, "char-heuristic-v1"); + // Baseline = sum over the two impacted files (src/b.ts, src/c.ts). + const expectedBaseline = charHeuristicTokens(bigBody) * 2; + assert.equal(cost.blindBaselineTokens, expectedBaseline); + assert.ok(cost.changePackTokens > 0); + assert.equal(cost.tokensSaved, Math.max(0, cost.blindBaselineTokens - cost.changePackTokens)); + assert.equal( + cost.tokensSavedPct, + Math.round((cost.tokensSaved / cost.blindBaselineTokens) * 100), + ); + // One test file in the graph (foo.test.ts); one affected → zero CI skip. + assert.equal(cost.totalTestCount, 1); + assert.equal(cost.affectedTestCount, 1); + assert.equal(cost.ciTestsSkipped, 0); +}); + +test("runChangePack: unreadable impacted file is skipped without breaking the baseline", async () => { + const store = fooFixture(); + // Only src/b.ts is readable; src/c.ts is missing from the fs map. + const files = { "/repo/src/b.ts": "y".repeat(400) }; + const pack = await runChangePack( + store, + { repoPath: "/repo" }, + makeInternal(detect(FOO_CHANGE), files), + ); + assert.equal(pack.costAttribution.blindBaselineTokens, charHeuristicTokens("y".repeat(400))); +}); + +// --------------------------------------------------------------------------- +// Empty-diff short-circuit +// --------------------------------------------------------------------------- + +test("runChangePack: empty diff → empty-but-valid pack, verdict still present", async () => { + const store = fooFixture(); + const pack = await runChangePack(store, { repoPath: "/repo" }, makeInternal(detect([]))); + + assert.deepEqual(pack.changedSymbols, []); + assert.deepEqual(pack.impactedSubgraph.nodes, []); + assert.deepEqual(pack.impactedSubgraph.edges, []); + assert.equal(pack.impactedSubgraph.truncated, false); + assert.deepEqual(pack.affectedTests, []); + assert.equal(pack.costAttribution.blindBaselineTokens, 0); + assert.equal(pack.costAttribution.tokensSaved, 0); + assert.equal(pack.costAttribution.tokensSavedPct, 0); + // totalTestCount still reflects the graph; affected=0 → all skipped. + assert.equal(pack.costAttribution.totalTestCount, 1); + assert.equal(pack.costAttribution.ciTestsSkipped, 1); + // Verdict is still computed. + assert.equal(pack.verdict.verdict, "single_review"); + assert.ok(pack.changePackHash.length === 64, "hash present on empty pack"); +}); + +// --------------------------------------------------------------------------- +// Truncation guard +// --------------------------------------------------------------------------- + +test("runChangePack: subgraph truncates deterministically past the hard ceiling", async () => { + const store = new FakeStore(); + store.addNode({ + id: "Function:src/a.ts:foo#0", + kind: "Function", + name: "foo", + filePath: "src/a.ts", + }); + // 5001 direct upstream callers → exceeds the 5000 cap by one. + const total = 5001; + for (let i = 0; i < total; i += 1) { + const id = `Function:src/callers.ts:c_${String(i).padStart(5, "0")}#0`; + store.addNode({ id, kind: "Function", name: `c_${i}`, filePath: "src/callers.ts" }); + store.addEdge({ fromId: id, toId: "Function:src/a.ts:foo#0", type: "CALLS", confidence: 1.0 }); + } + const pack = await runChangePack(store, { repoPath: "/repo" }, makeInternal(detect(FOO_CHANGE))); + assert.equal(pack.impactedSubgraph.truncated, true); + assert.equal(pack.impactedSubgraph.nodeCount, 5000); + assert.equal(pack.impactedSubgraph.nodes.length, 5000); +}); + +// --------------------------------------------------------------------------- +// Determinism + hash +// --------------------------------------------------------------------------- + +test("runChangePack: deterministic — two runs yield identical hash + bytes", async () => { + const big = "z".repeat(1234); + const files = { "/repo/src/b.ts": big, "/repo/src/c.ts": big }; + + const store1 = fooFixture(); + const store2 = fooFixture(); + const pack1 = await runChangePack( + store1, + { repoPath: "/repo" }, + makeInternal(detect(FOO_CHANGE), files), + ); + const pack2 = await runChangePack( + store2, + { repoPath: "/repo" }, + makeInternal(detect(FOO_CHANGE), files), + ); + + assert.equal(pack1.changePackHash, pack2.changePackHash, "hashes must match"); + assert.equal(pack1.changePackHash.length, 64); + assert.equal( + canonicalJson(pack1), + canonicalJson(pack2), + "canonical-JSON bytes must be identical", + ); +}); + +test("runChangePack: hash changes when depth changes (envelope folded into preimage)", async () => { + const store1 = fooFixture(); + const store2 = fooFixture(); + const pack1 = await runChangePack( + store1, + { repoPath: "/repo", depth: 4 }, + makeInternal(detect(FOO_CHANGE)), + ); + const pack2 = await runChangePack( + store2, + { repoPath: "/repo", depth: 2 }, + makeInternal(detect(FOO_CHANGE)), + ); + assert.notEqual(pack1.changePackHash, pack2.changePackHash, "depth is hashed"); +}); diff --git a/packages/analysis/src/change-pack.ts b/packages/analysis/src/change-pack.ts new file mode 100644 index 00000000..6eabc1c0 --- /dev/null +++ b/packages/analysis/src/change-pack.ts @@ -0,0 +1,495 @@ +/** + * Diff-scoped change-pack. + * + * `runChangePack` generalizes what `computeVerdict` already does internally + * (diff → per-symbol upstream fan-out) but RETAINS the impacted subgraph + * instead of collapsing it to a scalar blast radius, and SURFACES the tests + * that impact analysis classifies-then-drops today. + * + * The function is read-only over the graph: it composes `runDetectChanges`, + * `runImpact`, and `computeVerdict`, emits no new nodes or edges, calls no + * LLM, and produces byte-deterministic output with a content hash. It never + * throws — an empty or symbol-free diff resolves to an empty-but-valid pack, + * mirroring the verdict engine's empty-diff path. + */ + +import { readFile } from "node:fs/promises"; +import path from "node:path"; +import { canonicalJson, sha256Hex } from "@opencodehub/core-types"; +import type { IGraphStore } from "@opencodehub/storage"; +import type { + AffectedTest, + ChangedSymbol, + ChangePack, + ChangePackQuery, + CostAttribution, + ImpactedSubgraph, + ImpactedSubgraphEdge, + ImpactedSubgraphNode, +} from "./change-pack-types.js"; +import { runDetectChanges } from "./detect-changes.js"; +import { isTestPath, runImpact } from "./impact.js"; +import type { DetectChangesQuery, DetectChangesResult } from "./types.js"; +import { computeVerdict } from "./verdict.js"; +import type { VerdictQuery, VerdictResponse } from "./verdict-types.js"; + +const DEFAULT_BASE = "main"; +const DEFAULT_HEAD = "HEAD"; +const DEFAULT_DEPTH = 4; +const DEFAULT_MIN_CONFIDENCE = 0.7; +const DEFAULT_BUDGET = 100_000; +/** Hard ceiling on retained subgraph nodes; larger sets truncate deterministically. */ +const MAX_SUBGRAPH_NODES = 5000; + +/** + * Seam for reading impacted-file source so the blind-baseline computation can + * be exercised without touching disk. The default reads UTF-8 bytes from the + * filesystem; tests inject an in-memory map. + */ +export type ReadFileText = (absPath: string) => Promise; + +const defaultReadFileText: ReadFileText = (absPath) => readFile(absPath, "utf8"); + +/** + * Internal seams for hermetic testing. Production callers pass nothing — the + * defaults wire the real `runDetectChanges` / `computeVerdict` (which shell + * out to git) and a filesystem read. Tests inject deterministic stand-ins so + * the suite never spawns git or touches disk. + */ +export interface ChangePackInternal { + readonly readFileText?: ReadFileText; + readonly detectChanges?: ( + store: IGraphStore, + q: DetectChangesQuery, + ) => Promise; + readonly computeVerdict?: (store: IGraphStore, q: VerdictQuery) => Promise; +} + +/** + * Character-count token heuristic. Matches the pack's degraded counter + * (`max(1, ceil(len / 4))`). This is an estimate, not a model tokenizer. + */ +export function charHeuristicTokens(text: string): number { + return Math.max(1, Math.ceil(text.length / 4)); +} + +/** Resolved query envelope, folded into the hash so identical inputs hash alike. */ +interface ChangePackEnvelope { + readonly base: string; + readonly head: string; + readonly depth: number; + readonly minConfidence: number; + readonly budget: number; + readonly includeTestsInSubgraph: boolean; +} + +/** + * Compose a change-pack for the given git range. Never throws: git failures + * fail open to an empty diff, per-symbol traversal errors are swallowed, and + * an unreadable impacted file is skipped without breaking determinism. + */ +export async function runChangePack( + store: IGraphStore, + query: ChangePackQuery, + internal: ChangePackInternal = {}, +): Promise { + const repoPath = query.repoPath; + const base = query.base ?? DEFAULT_BASE; + const head = query.head ?? DEFAULT_HEAD; + const depth = query.depth ?? DEFAULT_DEPTH; + const minConfidence = query.minConfidence ?? DEFAULT_MIN_CONFIDENCE; + const budget = query.budget ?? DEFAULT_BUDGET; + const includeTestsInSubgraph = query.includeTestsInSubgraph ?? false; + const readFileText = internal.readFileText ?? defaultReadFileText; + const detectChanges = internal.detectChanges ?? runDetectChanges; + const computeVerdictFn = internal.computeVerdict ?? computeVerdict; + const envelope: ChangePackEnvelope = { + base, + head, + depth, + minConfidence, + budget, + includeTestsInSubgraph, + }; + + // ---- 1. diff → changed symbols (two-dot compare range, PR semantics) ---- + const compareRef = `${base}..${head}`; + const changes = await detectChanges(store, { + scope: "compare", + compareRef, + repoPath, + }); + + // The verdict section is always computed, even on an empty diff, so callers + // get a coherent 5-tier signal regardless of blast radius. + const verdict = await computeVerdictFn(store, { repoPath, base, head }); + + const changedFiles = sortStrings(changes.changedFiles); + const changedSymbols = sortChangedSymbols( + changes.affectedSymbols.map((s) => ({ + id: s.id, + name: s.name, + filePath: s.filePath, + kind: s.kind, + })), + ); + + if (changedSymbols.length === 0) { + // Empty or symbol-free diff: empty subgraph + tests, zero-savings cost. + return finalisePack({ + changedFiles, + changedSymbols, + impactedSubgraph: emptySubgraph(), + verdict, + affectedTests: [], + costAttribution: await emptyCostAttribution(store), + envelope, + }); + } + + // ---- 2. per-symbol upstream fan-out → union the subgraph + collect tests ---- + // One `includeTests:true` pass per changed symbol yields both the impacted + // subgraph nodes/edges AND the test nodes (with depth) in a single + // traversal. We post-filter: the subgraph excludes test paths by default + // (production-only, matching the verdict), while the test set keeps only + // test paths. + const subgraphNodes = new Map(); + const tests = new Map(); + // Every traversed edge, deduped by (from,type,to). We retain or drop edges + // in a single post-pass once the full node set is known, so an edge incident + // to a test node is only dropped when that node is genuinely excluded. + const allEdges = new Map(); + // Node ids that resolve to a test path — used to filter edges when tests are + // excluded from the subgraph. + const testNodeIds = new Set(); + // Ids of the changed symbols (the fan-out roots). Edges incident to a root + // are always retained: the root is a production symbol the diff touched. + const changedSymbolIds = new Set(changedSymbols.map((s) => s.id)); + + for (const sym of changedSymbols) { + let result: Awaited>; + try { + result = await runImpact(store, { + // `targetUid` is the zero-ambiguity resolver path; `target` is a + // required field used only as the fallback label. + target: sym.id, + targetUid: sym.id, + direction: "upstream", + maxDepth: depth, + minConfidence, + includeTests: true, + }); + } catch { + // Partial data is acceptable; a failed traversal contributes nothing. + continue; + } + + for (const bucket of result.byDepth) { + for (const node of bucket.nodes) { + const isTest = isTestPath(node.filePath); + if (isTest) { + testNodeIds.add(node.id); + // Affected-test selection: keep the shallowest depth. The first + // reachedFromSymbol wins by changed-symbol id order; because + // `changedSymbols` is pre-sorted by id and iterated in order, the + // earliest writer already holds the lowest id, so we only update + // reachedFromSymbol when we improve on depth from the same writer is + // not needed — keep the first one recorded. + const existing = tests.get(node.id); + if (existing === undefined) { + tests.set(node.id, { + id: node.id, + name: node.name, + filePath: node.filePath, + reachedFromSymbol: sym.id, + depth: bucket.depth, + }); + } else if (bucket.depth < existing.depth) { + tests.set(node.id, { + ...existing, + depth: bucket.depth, + }); + } + } + // Subgraph retention: exclude test paths unless the caller opts in. + if (isTest && !includeTestsInSubgraph) continue; + const prior = subgraphNodes.get(node.id); + const minDepth = + prior === undefined ? bucket.depth : Math.min(prior.minDepth, bucket.depth); + if (prior === undefined || minDepth < prior.minDepth) { + subgraphNodes.set(node.id, { + id: node.id, + name: node.name, + filePath: node.filePath, + kind: node.kind, + minDepth, + }); + } + } + } + + for (const edge of result.traversedEdges) { + const key = `${edge.fromId}|${edge.type}|${edge.toId}`; + if (!allEdges.has(key)) { + allEdges.set(key, { + fromId: edge.fromId, + toId: edge.toId, + type: edge.type, + confidence: edge.confidence, + }); + } + } + } + + const impactedSubgraph = buildSubgraph( + subgraphNodes, + allEdges, + includeTestsInSubgraph ? new Set() : testNodeIds, + changedSymbolIds, + ); + const affectedTests = sortAffectedTests([...tests.values()]); + + const costAttribution = await computeCostAttribution({ + store, + repoPath, + impactedSubgraph, + affectedTests, + body: buildHashBody({ + changedFiles, + changedSymbols, + impactedSubgraph, + verdict, + affectedTests, + }), + readFileText, + }); + + return finalisePack({ + changedFiles, + changedSymbols, + impactedSubgraph, + verdict, + affectedTests, + costAttribution, + envelope, + }); +} + +// --------------------------------------------------------------------------- +// Subgraph assembly +// --------------------------------------------------------------------------- + +function emptySubgraph(): ImpactedSubgraph { + return { nodes: [], edges: [], nodeCount: 0, edgeCount: 0, truncated: false }; +} + +/** + * Build the retained subgraph from the deduped node + edge maps. Nodes are + * sorted by `(minDepth, id)` and capped at `MAX_SUBGRAPH_NODES`; edges are + * filtered to those whose endpoints survive (both ends are either a retained + * node or a changed-symbol root) and dropped if incident to an excluded test + * node. Final ordering is `(fromId, type, toId)` for byte-identity. + */ +function buildSubgraph( + nodeMap: ReadonlyMap, + edgeMap: ReadonlyMap, + excludedNodeIds: ReadonlySet, + rootIds: ReadonlySet, +): ImpactedSubgraph { + // Deterministic node ordering: shallowest depth first, then id. Truncate + // past the hard ceiling so the subgraph is always bounded. + const orderedNodes = [...nodeMap.values()].sort((a, b) => { + if (a.minDepth !== b.minDepth) return a.minDepth - b.minDepth; + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); + const truncated = orderedNodes.length > MAX_SUBGRAPH_NODES; + const keptNodes = truncated ? orderedNodes.slice(0, MAX_SUBGRAPH_NODES) : orderedNodes; + + // An edge endpoint is valid when it is a retained node or a changed-symbol + // root (the roots are diff-touched production symbols, never in byDepth). + const keptNodeIds = new Set(keptNodes.map((n) => n.id)); + const endpointValid = (id: string): boolean => keptNodeIds.has(id) || rootIds.has(id); + + const keptEdges: ImpactedSubgraphEdge[] = []; + for (const edge of edgeMap.values()) { + if (excludedNodeIds.has(edge.fromId) || excludedNodeIds.has(edge.toId)) continue; + if (!endpointValid(edge.fromId) || !endpointValid(edge.toId)) continue; + keptEdges.push(edge); + } + keptEdges.sort((a, b) => { + if (a.fromId !== b.fromId) return a.fromId < b.fromId ? -1 : 1; + if (a.type !== b.type) return a.type < b.type ? -1 : 1; + if (a.toId !== b.toId) return a.toId < b.toId ? -1 : 1; + return 0; + }); + + // Re-sort retained nodes by id for the emitted collection (deterministic + // node ordering). Depth ordering above is only the truncation key. + const emittedNodes = [...keptNodes].sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)); + + return { + nodes: emittedNodes, + edges: keptEdges, + nodeCount: emittedNodes.length, + edgeCount: keptEdges.length, + truncated, + }; +} + +// --------------------------------------------------------------------------- +// Cost attribution +// --------------------------------------------------------------------------- + +interface CostAttributionInput { + readonly store: IGraphStore; + readonly repoPath: string; + readonly impactedSubgraph: ImpactedSubgraph; + readonly affectedTests: readonly AffectedTest[]; + /** The change-pack context body (minus costAttribution + hash) the agent consumes. */ + readonly body: unknown; + readonly readFileText: ReadFileText; +} + +async function computeCostAttribution(input: CostAttributionInput): Promise { + const { store, repoPath, impactedSubgraph, affectedTests, body, readFileText } = input; + + const changePackTokens = charHeuristicTokens(canonicalJson(body)); + + // Blind baseline: the cost an agent pays by reading every impacted file + // whole. Distinct File paths in the subgraph; read each once from disk. + const distinctFiles = new Set(); + for (const node of impactedSubgraph.nodes) { + if (node.filePath.length > 0) distinctFiles.add(node.filePath); + } + // Deterministic read order (irrelevant to the sum, but keeps any future + // logging stable). + const orderedFiles = [...distinctFiles].sort(); + let blindBaselineTokens = 0; + for (const relPath of orderedFiles) { + let text: string; + try { + text = await readFileText(path.join(repoPath, relPath)); + } catch { + // Unreadable file (deleted, permissions): skip, stay deterministic. + continue; + } + blindBaselineTokens += charHeuristicTokens(text); + } + + const tokensSaved = Math.max(0, blindBaselineTokens - changePackTokens); + const tokensSavedPct = + blindBaselineTokens > 0 ? Math.round((tokensSaved / blindBaselineTokens) * 100) : 0; + + const totalTestCount = await countTestFiles(store); + const affectedTestCount = affectedTests.length; + const ciTestsSkipped = Math.max(0, totalTestCount - affectedTestCount); + + return { + estimate: true, + tokenizerModel: "char-heuristic-v1", + changePackTokens, + blindBaselineTokens, + tokensSaved, + tokensSavedPct, + affectedTestCount, + totalTestCount, + ciTestsSkipped, + }; +} + +async function emptyCostAttribution(store: IGraphStore): Promise { + // An empty pack consumes a fixed, tiny body; the blind baseline is zero + // because there are no impacted files. Report total tests for context. + const totalTestCount = await countTestFiles(store); + return { + estimate: true, + tokenizerModel: "char-heuristic-v1", + changePackTokens: 0, + blindBaselineTokens: 0, + tokensSaved: 0, + tokensSavedPct: 0, + affectedTestCount: 0, + totalTestCount, + ciTestsSkipped: totalTestCount, + }; +} + +/** Count distinct test-path File nodes in the repo graph (approximation of suite size). */ +async function countTestFiles(store: IGraphStore): Promise { + try { + const files = await store.listNodesByKind("File"); + let count = 0; + for (const node of files) { + if (isTestPath(node.filePath)) count += 1; + } + return count; + } catch { + // File-kind enumeration unavailable on a partial index: report zero. + return 0; + } +} + +// --------------------------------------------------------------------------- +// Hashing + finalisation +// --------------------------------------------------------------------------- + +interface PackBody { + readonly changedFiles: readonly string[]; + readonly changedSymbols: readonly ChangedSymbol[]; + readonly impactedSubgraph: ImpactedSubgraph; + readonly verdict: VerdictResponse; + readonly affectedTests: readonly AffectedTest[]; +} + +/** + * The context body the agent consumes — the change-pack minus its + * cost-attribution block and content hash. Cost tokens are counted over this. + */ +function buildHashBody(body: PackBody): PackBody { + return body; +} + +interface FinaliseInput extends PackBody { + readonly costAttribution: CostAttribution; + readonly envelope: ChangePackEnvelope; +} + +/** + * Assemble the final ChangePack and compute `changePackHash` over the + * canonical-JSON form with the hash field blanked, folding the query envelope + * (base/head/depth/minConfidence/budget/includeTestsInSubgraph) into the + * preimage so identical inputs hash alike and different ones diverge. + */ +function finalisePack(input: FinaliseInput): ChangePack { + const pack: ChangePack = { + changedFiles: input.changedFiles, + changedSymbols: input.changedSymbols, + impactedSubgraph: input.impactedSubgraph, + verdict: input.verdict, + affectedTests: input.affectedTests, + costAttribution: input.costAttribution, + changePackHash: "", + }; + const preimage = canonicalJson({ ...pack, changePackHash: "", envelope: input.envelope }); + const changePackHash = sha256Hex(preimage); + return { ...pack, changePackHash }; +} + +// --------------------------------------------------------------------------- +// Deterministic sorters +// --------------------------------------------------------------------------- + +function sortStrings(values: readonly string[]): readonly string[] { + return [...values].sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); +} + +function sortChangedSymbols(symbols: readonly ChangedSymbol[]): readonly ChangedSymbol[] { + return [...symbols].sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0)); +} + +function sortAffectedTests(tests: readonly AffectedTest[]): readonly AffectedTest[] { + return [...tests].sort((a, b) => { + if (a.filePath !== b.filePath) return a.filePath < b.filePath ? -1 : 1; + return a.id < b.id ? -1 : a.id > b.id ? 1 : 0; + }); +} diff --git a/packages/analysis/src/index.ts b/packages/analysis/src/index.ts index cb231ac4..159c6d1d 100644 --- a/packages/analysis/src/index.ts +++ b/packages/analysis/src/index.ts @@ -1,5 +1,21 @@ export type { ApiImpactFilter, ApiImpactRow } from "./api-impact.js"; export { listApiImpact, scoreRisk, worseRisk } from "./api-impact.js"; +export { + type ChangePackInternal, + charHeuristicTokens, + type ReadFileText, + runChangePack, +} from "./change-pack.js"; +export type { + AffectedTest, + ChangedSymbol, + ChangePack, + ChangePackQuery, + CostAttribution, + ImpactedSubgraph, + ImpactedSubgraphEdge, + ImpactedSubgraphNode, +} from "./change-pack-types.js"; export type { DeadCodeResult, Deadness, From b44774a40b06a1a37cf8b749de86a4d969f85fe6 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 17:06:05 +0000 Subject: [PATCH 2/7] feat(mcp,cli): expose change_pack with full CLI<->MCP parity Both surfaces delegate to the single @opencodehub/analysis.runChangePack core, so they cannot disagree on values. - MCP change_pack tool: withStore + withNextSteps + staleness + error envelope, registered in server.ts; structuredContent recases top-level keys plus the interiors of impacted_subgraph and cost_attribution to snake_case. - CLI codehub change-pack: commander registration, --base/--head/--depth/ --min-confidence/--budget/--include-tests-in-subgraph/--json; --json is a pure passthrough of the raw ChangePack; exit code = verdict.exitCode so CI gate semantics match codehub verdict. - First CLI<->MCP parity test in the repo, split to respect package rootDir: the CLI test asserts --json deep-equals the raw pack (passthrough); the MCP test asserts toStructured recases losslessly (recase-back == raw pack), so together they prove both surfaces serialize identical values. - server roster contract bumped 28 -> 29 (change_pack added). --- packages/cli/src/commands/change-pack.test.ts | 325 ++++++++++++++++++ packages/cli/src/commands/change-pack.ts | 109 ++++++ packages/cli/src/index.ts | 33 ++ packages/mcp/src/server.test.ts | 3 +- packages/mcp/src/server.ts | 6 +- .../mcp/src/tools/change-pack.parity.test.ts | 184 ++++++++++ packages/mcp/src/tools/change-pack.test.ts | 151 ++++++++ packages/mcp/src/tools/change-pack.ts | 173 ++++++++++ 8 files changed, 981 insertions(+), 3 deletions(-) create mode 100644 packages/cli/src/commands/change-pack.test.ts create mode 100644 packages/cli/src/commands/change-pack.ts create mode 100644 packages/mcp/src/tools/change-pack.parity.test.ts create mode 100644 packages/mcp/src/tools/change-pack.test.ts create mode 100644 packages/mcp/src/tools/change-pack.ts diff --git a/packages/cli/src/commands/change-pack.test.ts b/packages/cli/src/commands/change-pack.test.ts new file mode 100644 index 00000000..35700b5e --- /dev/null +++ b/packages/cli/src/commands/change-pack.test.ts @@ -0,0 +1,325 @@ +/** + * `codehub change-pack` CLI surface tests. + * + * Covers: + * 1. `--json` → raw camelCase ChangePack on stdout (changePackHash + + * costAttribution.estimate === true), exit code = verdict.exitCode. + * 2. Default (no `--json`) → human summary, exit code = verdict.exitCode. + * 3. The query envelope (base/head/depth/min-confidence/budget/ + * include-tests) is threaded through to the injected runner verbatim. + * 4. The store is always closed (finally), even on the summary path. + * + * Each test injects an `_openStore` factory + an `_runChangePack` stand-in + * so nothing hits lbug/DuckDB or git. The CLI's contract under test is the + * exit-code passthrough (`pack.verdict.exitCode`) and the JSON shape — not + * the analysis module's compose logic, which has its own suite. + */ + +import assert from "node:assert/strict"; +import { test } from "node:test"; +import type { ChangePack, ChangePackQuery } from "@opencodehub/analysis"; +import type { IGraphStore, Store } from "@opencodehub/storage"; +import { type ChangePackOptions, runChangePackCmd } from "./change-pack.js"; + +// --- fixtures -------------------------------------------------------------- + +const FAKE_GRAPH: IGraphStore = { + open: async () => undefined, + close: async () => undefined, +} as unknown as IGraphStore; + +interface FakeStoreHandle { + readonly store: Store; + closed(): boolean; +} + +function fakeStore(): FakeStoreHandle { + let wasClosed = false; + const store = { + graph: FAKE_GRAPH, + temporal: {} as unknown, + graphFile: "/tmp/fake-repo/.codehub/graph.lbug", + temporalFile: "/tmp/fake-repo/.codehub/temporal.duckdb", + close: async () => { + wasClosed = true; + }, + } as unknown as Store; + return { store, closed: () => wasClosed }; +} + +function packFixture(exitCode: 0 | 1 | 2, overrides: Partial = {}): ChangePack { + const base: ChangePack = { + changedFiles: ["src/a.ts"], + changedSymbols: [ + { id: "Function:src/a.ts:foo#0", name: "foo", filePath: "src/a.ts", kind: "Function" }, + ], + impactedSubgraph: { + nodes: [ + { + id: "Function:src/b.ts:bar#0", + name: "bar", + filePath: "src/b.ts", + kind: "Function", + minDepth: 1, + }, + ], + edges: [ + { + fromId: "Function:src/b.ts:bar#0", + toId: "Function:src/a.ts:foo#0", + type: "CALLS", + confidence: 1, + }, + ], + nodeCount: 1, + edgeCount: 1, + truncated: false, + }, + verdict: { + verdict: exitCode === 0 ? "single_review" : exitCode === 1 ? "dual_review" : "expert_review", + confidence: 0.85, + decisionBoundary: { distancePercent: 50, nextTier: "dual_review" }, + reasoningChain: [{ label: "blast_radius", value: 3, severity: "warn" }], + recommendedReviewers: [], + githubLabels: ["review:single"], + reviewCommentMarkdown: "", + exitCode, + blastRadius: 3, + communitiesTouched: [], + changedFileCount: 1, + changedFiles: ["src/a.ts"], + affectedSymbolCount: 1, + }, + affectedTests: [ + { + id: "Function:src/foo.test.ts:itFoo#0", + name: "itFoo", + filePath: "src/foo.test.ts", + reachedFromSymbol: "Function:src/a.ts:foo#0", + depth: 1, + }, + ], + costAttribution: { + estimate: true, + tokenizerModel: "char-heuristic-v1", + changePackTokens: 120, + blindBaselineTokens: 480, + tokensSaved: 360, + tokensSavedPct: 75, + affectedTestCount: 1, + totalTestCount: 4, + ciTestsSkipped: 3, + }, + changePackHash: "deadbeef".repeat(8), + }; + return { ...base, ...overrides }; +} + +function stubRun( + pack: ChangePack, + capture?: (query: ChangePackQuery) => void, +): NonNullable { + return async (_store: IGraphStore, query: ChangePackQuery) => { + capture?.(query); + return pack; + }; +} + +interface StdoutCapture { + readonly chunks: string[]; + restore(): void; +} + +function captureLog(): StdoutCapture { + const chunks: string[] = []; + const orig = console.log; + console.log = (...args: unknown[]) => { + chunks.push(args.map((a) => (typeof a === "string" ? a : String(a))).join(" ")); + }; + return { + chunks, + restore: () => { + console.log = orig; + }, + }; +} + +async function withExitCode(fn: () => Promise): Promise<{ result: T; exitCode: number }> { + const prev = process.exitCode; + process.exitCode = 0; + try { + const result = await fn(); + const exitCode = typeof process.exitCode === "number" ? process.exitCode : 0; + return { result, exitCode }; + } finally { + process.exitCode = prev; + } +} + +// --- tests ----------------------------------------------------------------- + +test("runChangePackCmd --json emits raw camelCase ChangePack, exit = verdict.exitCode", async () => { + const handle = fakeStore(); + const cap = captureLog(); + const { exitCode } = await withExitCode(async () => { + try { + await runChangePackCmd({ + json: true, + _openStore: async () => ({ store: handle.store, repoPath: "/tmp/fake-repo" }), + _runChangePack: stubRun(packFixture(2)), + }); + } finally { + cap.restore(); + } + }); + const output = cap.chunks.join("\n"); + const fixture = packFixture(2); + const parsed = JSON.parse(output) as ChangePack; + assert.equal(parsed.changePackHash, "deadbeef".repeat(8)); + assert.equal(parsed.costAttribution.estimate, true); + assert.equal(parsed.verdict.verdict, "expert_review"); + // CLI<->MCP parity (CLI half): `--json` is a PURE passthrough — the emitted + // object deep-equals the analysis ChangePack with zero reshaping. The MCP + // half (that toStructured recases the same values losslessly) is asserted in + // packages/mcp/src/tools/change-pack.parity.test.ts. Together they prove both + // surfaces serialize identical values. + assert.deepEqual(parsed, fixture, "CLI --json must emit the raw ChangePack unchanged"); + // No human-summary prose leaks into JSON mode. + assert.doesNotMatch(output, /^change-pack: /m); + // Exit code is the verdict's own code (expert_review → 2). + assert.equal(exitCode, 2); + assert.ok(handle.closed(), "store must be closed in finally"); +}); + +test("runChangePackCmd default (no --json) → human summary, exit = verdict.exitCode", async () => { + const handle = fakeStore(); + const cap = captureLog(); + const { exitCode } = await withExitCode(async () => { + try { + await runChangePackCmd({ + _openStore: async () => ({ store: handle.store, repoPath: "/tmp/fake-repo" }), + _runChangePack: stubRun(packFixture(1)), + }); + } finally { + cap.restore(); + } + }); + const output = cap.chunks.join("\n"); + assert.match(output, /change-pack: 1 file\(s\), 1 symbol\(s\) changed\. Verdict: dual_review\./); + assert.match(output, /Impacted subgraph: 1 node\(s\), 1 edge\(s\)\./); + assert.match(output, /Affected tests \(1\):/); + assert.match(output, /• itFoo — src\/foo\.test\.ts/); + assert.match( + output, + /Est\. tokens saved: 360 \(75%\) vs blind read; CI tests skippable: 3\/4 \(est\.\)/, + ); + // Summary mode is not JSON. + assert.doesNotMatch(output, /"changePackHash"/); + // dual_review → exit 1. + assert.equal(exitCode, 1); + assert.ok(handle.closed(), "store must be closed in finally"); +}); + +test("runChangePackCmd: auto_merge-tier exit code is 0", async () => { + const handle = fakeStore(); + const cap = captureLog(); + const { exitCode } = await withExitCode(async () => { + try { + await runChangePackCmd({ + json: true, + _openStore: async () => ({ store: handle.store, repoPath: "/tmp/fake-repo" }), + _runChangePack: stubRun(packFixture(0)), + }); + } finally { + cap.restore(); + } + }); + assert.equal(exitCode, 0); +}); + +test("runChangePackCmd: subgraph truncation surfaces in the summary", async () => { + const handle = fakeStore(); + const cap = captureLog(); + await withExitCode(async () => { + try { + await runChangePackCmd({ + _openStore: async () => ({ store: handle.store, repoPath: "/tmp/fake-repo" }), + _runChangePack: stubRun( + packFixture(0, { + impactedSubgraph: { + nodes: [], + edges: [], + nodeCount: 5000, + edgeCount: 9000, + truncated: true, + }, + }), + ), + }); + } finally { + cap.restore(); + } + }); + const output = cap.chunks.join("\n"); + assert.match(output, /Impacted subgraph: 5000 node\(s\), 9000 edge\(s\) \(truncated\)\./); +}); + +test("runChangePackCmd threads base/head/depth/min-confidence/budget/include-tests into the runner", async () => { + const handle = fakeStore(); + const cap = captureLog(); + let seen: ChangePackQuery | undefined; + await withExitCode(async () => { + try { + await runChangePackCmd({ + base: "release", + head: "feature/x", + depth: 6, + minConfidence: 0.5, + budget: 50_000, + includeTestsInSubgraph: true, + json: true, + _openStore: async () => ({ store: handle.store, repoPath: "/tmp/fake-repo" }), + _runChangePack: stubRun(packFixture(0), (q) => { + seen = q; + }), + }); + } finally { + cap.restore(); + } + }); + assert.ok(seen); + assert.equal(seen?.repoPath, "/tmp/fake-repo"); + assert.equal(seen?.base, "release"); + assert.equal(seen?.head, "feature/x"); + assert.equal(seen?.depth, 6); + assert.equal(seen?.minConfidence, 0.5); + assert.equal(seen?.budget, 50_000); + assert.equal(seen?.includeTestsInSubgraph, true); +}); + +test("runChangePackCmd omits unset query fields (defaults handled in the analysis layer)", async () => { + const handle = fakeStore(); + const cap = captureLog(); + let seen: ChangePackQuery | undefined; + await withExitCode(async () => { + try { + await runChangePackCmd({ + json: true, + _openStore: async () => ({ store: handle.store, repoPath: "/tmp/fake-repo" }), + _runChangePack: stubRun(packFixture(0), (q) => { + seen = q; + }), + }); + } finally { + cap.restore(); + } + }); + assert.ok(seen); + assert.equal(seen?.repoPath, "/tmp/fake-repo"); + assert.equal(seen?.base, undefined); + assert.equal(seen?.head, undefined); + assert.equal(seen?.depth, undefined); + assert.equal(seen?.minConfidence, undefined); + assert.equal(seen?.budget, undefined); + assert.equal(seen?.includeTestsInSubgraph, undefined); +}); diff --git a/packages/cli/src/commands/change-pack.ts b/packages/cli/src/commands/change-pack.ts new file mode 100644 index 00000000..676d91b8 --- /dev/null +++ b/packages/cli/src/commands/change-pack.ts @@ -0,0 +1,109 @@ +/** + * `codehub change-pack` — diff-scoped change-pack: impacted subgraph + + * verdict + affected tests + cost estimate. + * + * Delegates to `@opencodehub/analysis.runChangePack`. The CLI layer only + * resolves the scope flags, opens a read-only store, and formats the + * result. CLI sibling of the `change_pack` MCP tool — usable from CI + * without launching the MCP server. + * + * Exit codes mirror `codehub verdict`: the verdict already carries the + * 0|1|2 ladder (auto_merge/single_review → 0, dual_review → 1, + * expert_review/block → 2), so we reuse `pack.verdict.exitCode` verbatim + * for both the JSON and human-summary paths. CI gates configured against + * `codehub verdict` therefore behave identically against `change-pack`. + */ + +import type { ChangePack, ChangePackQuery } from "@opencodehub/analysis"; +import { runChangePack } from "@opencodehub/analysis"; +import type { IGraphStore } from "@opencodehub/storage"; +import { type OpenStoreResult, openStoreForCommand } from "./open-store.js"; + +export interface ChangePackOptions { + readonly base?: string; + readonly head?: string; + readonly depth?: number; + readonly minConfidence?: number; + readonly budget?: number; + readonly includeTestsInSubgraph?: boolean; + readonly repo?: string; + readonly home?: string; + readonly json?: boolean; + /** + * Test seam — inject a custom store factory. Production callers leave + * this unset; the runtime calls {@link openStoreForCommand}. Mirrors the + * `_store` / `_generatePack` seams on `verdict` / `code-pack`. + */ + readonly _openStore?: (opts: ChangePackOptions) => Promise; + /** + * Test seam — inject a custom `runChangePack`. Production callers leave + * this unset; the runtime uses `@opencodehub/analysis.runChangePack`, + * which shells out to git. Tests inject a deterministic stand-in so the + * suite never spawns git or touches disk — mirrors verdict.ts's + * `computeVerdictFn` seam. + */ + readonly _runChangePack?: (store: IGraphStore, query: ChangePackQuery) => Promise; +} + +export async function runChangePackCmd(opts: ChangePackOptions = {}): Promise { + const openStore = opts._openStore ?? openStoreForCommand; + const run = opts._runChangePack ?? runChangePack; + const { store, repoPath } = await openStore(opts); + try { + const query: ChangePackQuery = { + repoPath, + ...(opts.base !== undefined ? { base: opts.base } : {}), + ...(opts.head !== undefined ? { head: opts.head } : {}), + ...(opts.depth !== undefined ? { depth: opts.depth } : {}), + ...(opts.minConfidence !== undefined ? { minConfidence: opts.minConfidence } : {}), + ...(opts.budget !== undefined ? { budget: opts.budget } : {}), + ...(opts.includeTestsInSubgraph !== undefined + ? { includeTestsInSubgraph: opts.includeTestsInSubgraph } + : {}), + }; + const pack = await run(store.graph, query); + + if (opts.json) { + console.log(JSON.stringify(pack, null, 2)); + } else { + const { changedFiles, changedSymbols, impactedSubgraph, verdict, affectedTests } = pack; + console.log( + `change-pack: ${changedFiles.length} file(s), ${changedSymbols.length} symbol(s) changed. Verdict: ${verdict.verdict}.`, + ); + const truncatedNote = impactedSubgraph.truncated ? " (truncated)" : ""; + console.log( + `Impacted subgraph: ${impactedSubgraph.nodeCount} node(s), ${impactedSubgraph.edgeCount} edge(s)${truncatedNote}.`, + ); + const lead = verdict.reasoningChain[0]; + if (lead !== undefined) { + console.log(`Verdict reasoning: ${lead.label} = ${lead.value} [${lead.severity}]`); + } + if (changedFiles.length > 0) { + console.log("Changed files:"); + for (const f of changedFiles.slice(0, 30)) console.log(` • ${f}`); + if (changedFiles.length > 30) { + console.log(` … ${changedFiles.length - 30} more`); + } + } + if (affectedTests.length > 0) { + console.log(`Affected tests (${affectedTests.length}):`); + for (const t of affectedTests.slice(0, 30)) { + console.log(` • ${t.name} — ${t.filePath}`); + } + if (affectedTests.length > 30) { + console.log(` … ${affectedTests.length - 30} more`); + } + } + const cost = pack.costAttribution; + console.log( + `Est. tokens saved: ${cost.tokensSaved} (${cost.tokensSavedPct}%) vs blind read; CI tests skippable: ${cost.ciTestsSkipped}/${cost.totalTestCount} (est.)`, + ); + } + + // Reuse the verdict's own 0|1|2 exit code so CI gate semantics match + // `codehub verdict` exactly — set in both the JSON and summary paths. + process.exitCode = pack.verdict.exitCode; + } finally { + await store.close(); + } +} diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index c62c2893..fafc2e3c 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -554,6 +554,39 @@ program }); }); +program + .command("change-pack") + .description( + "Diff-scoped change-pack: impacted subgraph + verdict + affected tests + cost estimate (CLI sibling of the change_pack MCP tool)", + ) + .option("--repo ", "Registered repo name (default: current directory)") + .option("--base ", "Base git ref (default: main)") + .option("--head ", "Head git ref (default: HEAD)") + .option("--depth ", "Upstream traversal depth (default: 4)", (v) => Number.parseInt(v, 10)) + .option("--min-confidence ", "Traversal confidence floor 0-1 (default: 0.7)", (v) => + Number.parseFloat(v), + ) + .option("--budget ", "Context budget in heuristic tokens (default: 100000)", (v) => + Number.parseInt(v, 10), + ) + .option("--include-tests-in-subgraph", "Retain test nodes in the impacted subgraph") + .option("--json", "Emit JSON on stdout") + .action(async (opts: Record) => { + const mod = await import("./commands/change-pack.js"); + await mod.runChangePackCmd({ + ...(typeof opts["repo"] === "string" ? { repo: opts["repo"] } : {}), + ...(typeof opts["base"] === "string" ? { base: opts["base"] } : {}), + ...(typeof opts["head"] === "string" ? { head: opts["head"] } : {}), + ...(typeof opts["depth"] === "number" ? { depth: opts["depth"] } : {}), + ...(typeof opts["minConfidence"] === "number" + ? { minConfidence: opts["minConfidence"] } + : {}), + ...(typeof opts["budget"] === "number" ? { budget: opts["budget"] } : {}), + ...(opts["includeTestsInSubgraph"] === true ? { includeTestsInSubgraph: true } : {}), + json: opts["json"] === true, + }); + }); + // `codehub group ...` — cross-repo groups. We register placeholder // subcommands so `commander` routes the invocation correctly, and load the // real handler lazily on .action(). This keeps `codehub --help` snappy. diff --git a/packages/mcp/src/server.test.ts b/packages/mcp/src/server.test.ts index ba4b3186..30297446 100644 --- a/packages/mcp/src/server.test.ts +++ b/packages/mcp/src/server.test.ts @@ -52,6 +52,7 @@ test("buildServer registers zero prompts — ListPrompts returns an empty set", */ const EXPECTED_TOOL_NAMES = [ "api_impact", + "change_pack", "context", "dependencies", "detect_changes", @@ -104,7 +105,7 @@ test("buildServer registers exactly the expected read-only tool set", async () = !registered.includes("remove_dead_code"), "source-mutating remove_dead_code tool must NOT be registered", ); - assert.equal(registered.length, 28); + assert.equal(registered.length, 29); } finally { await running.shutdown(); } diff --git a/packages/mcp/src/server.ts b/packages/mcp/src/server.ts index 68d76d18..f413640d 100644 --- a/packages/mcp/src/server.ts +++ b/packages/mcp/src/server.ts @@ -26,6 +26,7 @@ import { registerRepoProcessesResource } from "./resources/repo-processes.js"; import { registerRepoSchemaResource } from "./resources/repo-schema.js"; import { registerReposResource } from "./resources/repos.js"; import { registerApiImpactTool } from "./tools/api-impact.js"; +import { registerChangePackTool } from "./tools/change-pack.js"; import { registerContextTool } from "./tools/context.js"; import { registerDependenciesTool } from "./tools/dependencies.js"; import { registerDetectChangesTool } from "./tools/detect-changes.js"; @@ -61,9 +62,9 @@ const SERVER_VERSION = "0.0.0"; const INSTRUCTIONS = [ "OpenCodeHub exposes indexed code graphs for MCP agents.", "Typical flow: call `list_repos` first to discover indexed repos, then route subsequent calls through one of those repo names.", - "Every per-repo tool (`query`, `context`, `impact`, `detect_changes`, `sql`, `scan`, `list_findings`, `list_findings_delta`, `list_dead_code`, `license_audit`, `project_profile`, `dependencies`, `owners`, `risk_trends`, `verdict`) accepts an optional `repo` argument (registry name) or a `repo_uri` alias (Sourcegraph-style URI like `github.com/org/repo`, or `local:` for unpublished repos; wins when both are provided). When exactly one repo is registered, both are optional and the tool defaults to that repo. When ≥ 2 repos are registered and neither is supplied, the tool returns `AMBIGUOUS_REPO` — the structured envelope carries `structuredContent.error.choices[]` (capped at 10, with `{repo_uri, default_branch, group}`) plus `total_matches`, so a caller can retry with one of `choices[].repo_uri`.", + "Every per-repo tool (`query`, `context`, `impact`, `detect_changes`, `sql`, `scan`, `list_findings`, `list_findings_delta`, `list_dead_code`, `license_audit`, `project_profile`, `dependencies`, `owners`, `risk_trends`, `verdict`, `change_pack`) accepts an optional `repo` argument (registry name) or a `repo_uri` alias (Sourcegraph-style URI like `github.com/org/repo`, or `local:` for unpublished repos; wins when both are provided). When exactly one repo is registered, both are optional and the tool defaults to that repo. When ≥ 2 repos are registered and neither is supplied, the tool returns `AMBIGUOUS_REPO` — the structured envelope carries `structuredContent.error.choices[]` (capped at 10, with `{repo_uri, default_branch, group}`) plus `total_matches`, so a caller can retry with one of `choices[].repo_uri`.", "Every tool response includes a `next_steps` array under structuredContent and a `_meta.codehub/staleness` entry when the index may be behind HEAD.", - "Use `query` to locate symbols, `context` for a 360-degree view, `impact` for blast radius (plan a refactor before you edit — OpenCodeHub does not edit source), `detect_changes` to map a diff to flows (verify a refactor after you apply it), `dependencies` for the external package list, `license_audit` for a copyleft/unknown/proprietary tier check of dependencies, `list_findings` to browse SARIF findings, `list_findings_delta` to diff the latest scan against a frozen baseline (new/fixed/unchanged/updated buckets), `scan` to run Priority-1 scanners (openWorld — spawns processes), `verdict` for a 5-tier PR decision (exit codes 0/1/2), `risk_trends` for per-community trend lines and 30-day projections, and `sql` for bespoke queries.", + "Use `query` to locate symbols, `context` for a 360-degree view, `impact` for blast radius (plan a refactor before you edit — OpenCodeHub does not edit source), `detect_changes` to map a diff to flows (verify a refactor after you apply it), `dependencies` for the external package list, `license_audit` for a copyleft/unknown/proprietary tier check of dependencies, `list_findings` to browse SARIF findings, `list_findings_delta` to diff the latest scan against a frozen baseline (new/fixed/unchanged/updated buckets), `scan` to run Priority-1 scanners (openWorld — spawns processes), `verdict` for a 5-tier PR decision (exit codes 0/1/2), `change_pack` for a deterministic diff-scoped pack (impacted subgraph + verdict + affected tests + char-heuristic cost estimate; CI-oriented), `risk_trends` for per-community trend lines and 30-day projections, and `sql` for bespoke queries.", "For cross-repo work, call `group_list` to discover named repo groups, then `group_query`/`group_status` to fan out BM25 search and staleness across the group. `group_query` returns `{ group, query, results: [{ _repo, _rrf_score, ... }], per_repo, warnings }`; results are tagged with the source repo and per-repo errors surface in `per_repo[].error` + `warnings[]` (the fan-out never aborts on a single-repo failure). Use `group_sync` to materialize a cross-repo contract registry (HTTP / gRPC / topic) under `~/.codehub/groups//contracts.json`, then `group_contracts` to list the DuckDB-backed FETCHES↔Route edges together with the registry's signature-matched cross-links.", ].join(" "); @@ -169,6 +170,7 @@ export function buildServer(opts: StartServerOptions = {}): RunningServer { registerListDeadCodeTool(server, ctx); registerScanTool(server, ctx); registerVerdictTool(server, ctx); + registerChangePackTool(server, ctx); registerRiskTrendsTool(server, ctx); registerRouteMapTool(server, ctx); registerApiImpactTool(server, ctx); diff --git a/packages/mcp/src/tools/change-pack.parity.test.ts b/packages/mcp/src/tools/change-pack.parity.test.ts new file mode 100644 index 00000000..965ce844 --- /dev/null +++ b/packages/mcp/src/tools/change-pack.parity.test.ts @@ -0,0 +1,184 @@ +// biome-ignore-all lint/complexity/useLiteralKeys: dot-access disallowed on Record index signatures +/** + * CLI <-> MCP parity for change-pack — MCP half. + * + * Both surfaces delegate to the single `@opencodehub/analysis.runChangePack` + * core, so they cannot disagree on VALUES. They differ only in serialization: + * + * - CLI (`packages/cli/src/commands/change-pack.ts:67`): emits the raw + * camelCase ChangePack via `JSON.stringify(pack, null, 2)` — a PURE + * passthrough, no reshaping. The CLI half of parity (that `--json` equals + * the raw pack) is asserted in `packages/cli/.../change-pack.test.ts`. + * - MCP (this package, `toStructured`): recases the top-level keys plus the + * interiors of `impacted_subgraph` and `cost_attribution` to snake_case, + * leaving nested array elements and the verdict camelCase. + * + * The MCP `toStructured` recasing is the ONLY place the two surfaces can drift + * in serialization, so this test pins it: `toStructured(pack)` must recase + * LOSSLESSLY — recasing it back must reproduce the raw pack value-for-value. + * A field `toStructured` drops or a key it mis-spells cannot round-trip, so + * this guards every field of the shared contract. Cross-package source import + * is deliberately avoided (it violates the MCP package `rootDir`); the CLI + * passthrough is verified in the CLI package's own hermetic test. + */ +import { strict as assert } from "node:assert"; +import { test } from "node:test"; +import type { ChangePack, CostAttribution, ImpactedSubgraph } from "@opencodehub/analysis"; +import { toStructured } from "./change-pack.js"; + +// One realistic ChangePack — the value contract both surfaces must preserve. +const FIXTURE: ChangePack = { + changedFiles: ["src/auth/token.ts", "src/auth/session.ts"], + changedSymbols: [ + { + id: "Function:src/auth/token.ts:refreshToken#1", + name: "refreshToken", + filePath: "src/auth/token.ts", + kind: "Function", + }, + { + id: "Method:src/auth/session.ts:Session.rotate#0", + name: "Session.rotate", + filePath: "src/auth/session.ts", + kind: "Method", + }, + ], + impactedSubgraph: { + nodes: [ + { + id: "Function:src/api/login.ts:handleLogin#1", + name: "handleLogin", + filePath: "src/api/login.ts", + kind: "Function", + minDepth: 1, + }, + { + id: "Function:src/api/refresh.ts:handleRefresh#1", + name: "handleRefresh", + filePath: "src/api/refresh.ts", + kind: "Function", + minDepth: 2, + }, + ], + edges: [ + { + fromId: "Function:src/api/login.ts:handleLogin#1", + toId: "Function:src/auth/token.ts:refreshToken#1", + type: "CALLS", + confidence: 1, + }, + ], + nodeCount: 2, + edgeCount: 1, + truncated: false, + } satisfies ImpactedSubgraph, + verdict: { + verdict: "dual_review", + confidence: 0.82, + decisionBoundary: { distancePercent: 40, nextTier: "expert_review" }, + reasoningChain: [{ label: "blastRadius", value: 12, severity: "warn" }], + recommendedReviewers: [], + githubLabels: ["review:dual"], + reviewCommentMarkdown: "## Verdict: dual_review\n", + exitCode: 1, + blastRadius: 12, + communitiesTouched: ["auth"], + changedFileCount: 2, + changedFiles: ["src/auth/token.ts", "src/auth/session.ts"], + affectedSymbolCount: 2, + }, + affectedTests: [ + { + id: "Function:src/auth/token.test.ts:refreshToken_expiry#0", + name: "refreshToken_expiry", + filePath: "src/auth/token.test.ts", + reachedFromSymbol: "Function:src/auth/token.ts:refreshToken#1", + depth: 1, + }, + ], + costAttribution: { + estimate: true, + tokenizerModel: "char-heuristic-v1", + changePackTokens: 1280, + blindBaselineTokens: 9600, + tokensSaved: 8320, + tokensSavedPct: 87, + affectedTestCount: 1, + totalTestCount: 40, + ciTestsSkipped: 39, + } satisfies CostAttribution, + changePackHash: "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2", +}; + +/** + * Recase the MCP snake_case payload back to CLI camelCase using the EXACT + * partial map `toStructured` applies: top-level keys + the interiors of + * impacted_subgraph and cost_attribution; nested element objects and the + * verdict stay camelCase. + */ +function mcpToCamel(mcp: Record): Record { + const sub = mcp["impacted_subgraph"] as Record; + const cost = mcp["cost_attribution"] as Record; + return { + changedFiles: mcp["changed_files"], + changedSymbols: mcp["changed_symbols"], + impactedSubgraph: { + nodes: sub["nodes"], + edges: sub["edges"], + nodeCount: sub["node_count"], + edgeCount: sub["edge_count"], + truncated: sub["truncated"], + }, + verdict: mcp["verdict"], + affectedTests: mcp["affected_tests"], + costAttribution: { + estimate: cost["estimate"], + tokenizerModel: cost["tokenizer_model"], + changePackTokens: cost["change_pack_tokens"], + blindBaselineTokens: cost["blind_baseline_tokens"], + tokensSaved: cost["tokens_saved"], + tokensSavedPct: cost["tokens_saved_pct"], + affectedTestCount: cost["affected_test_count"], + totalTestCount: cost["total_test_count"], + ciTestsSkipped: cost["ci_tests_skipped"], + }, + changePackHash: mcp["change_pack_hash"], + }; +} + +test("MCP toStructured recases losslessly — values match the raw CLI pack", () => { + // The CLI emits exactly this raw object (JSON round-trip models its + // JSON.stringify passthrough). The MCP payload, recased back, must equal it. + const cliRaw = JSON.parse(JSON.stringify(FIXTURE)) as Record; + const mcpCamel = mcpToCamel(toStructured(FIXTURE)); + assert.deepEqual( + mcpCamel, + cliRaw, + "MCP structuredContent must carry identical values to the CLI's raw ChangePack", + ); +}); + +test("content hash is preserved verbatim through the MCP recasing", () => { + assert.equal(toStructured(FIXTURE)["change_pack_hash"], FIXTURE.changePackHash); +}); + +test("cost attribution stays an estimate with values intact through MCP", () => { + const cost = toStructured(FIXTURE)["cost_attribution"] as Record; + assert.equal(cost["estimate"], true); + assert.equal(cost["tokenizer_model"], "char-heuristic-v1"); + assert.equal(cost["tokens_saved"], FIXTURE.costAttribution.tokensSaved); + assert.equal(cost["ci_tests_skipped"], FIXTURE.costAttribution.ciTestsSkipped); +}); + +test("no ChangePack field is dropped by toStructured", () => { + // Every top-level ChangePack key must be represented in the recased payload. + const recased = mcpToCamel(toStructured(FIXTURE)); + for (const key of Object.keys(FIXTURE)) { + assert.ok(key in recased, `toStructured dropped top-level field: ${key}`); + assert.notEqual( + (recased as Record)[key], + undefined, + `toStructured produced undefined for: ${key}`, + ); + } +}); diff --git a/packages/mcp/src/tools/change-pack.test.ts b/packages/mcp/src/tools/change-pack.test.ts new file mode 100644 index 00000000..1b36dac3 --- /dev/null +++ b/packages/mcp/src/tools/change-pack.test.ts @@ -0,0 +1,151 @@ +// biome-ignore-all lint/complexity/useLiteralKeys: dot-access disallowed on Record index signatures +import { strict as assert } from "node:assert"; +import { test } from "node:test"; +import { getToolHandler, makeFakeGraphStore, withMcpHarness } from "../test-utils.js"; +import { registerChangePackTool } from "./change-pack.js"; +import type { ToolContext } from "./shared.js"; + +/** + * The analysis `runChangePack` never throws — it fails open to an empty diff + * when git is unavailable (which it is in the temp harness repo). That gives + * the MCP tool a coherent, deterministic ChangePack to wrap without needing a + * real lbug graph DB or a git checkout. These tests assert the snake_case + * `structuredContent` shape the CLI parity test keys against. + */ +async function withHarness( + fn: ( + ctx: ToolContext, + server: import("@modelcontextprotocol/sdk/server/mcp.js").McpServer, + ) => Promise, +): Promise { + await withMcpHarness( + { + tmpPrefix: "codehub-mcp-change-pack-", + storeFactory: () => makeFakeGraphStore(), + }, + async ({ server, pool, home }) => { + const ctx: ToolContext = { pool, home }; + await fn(ctx, server); + }, + ); +} + +interface StructuredChangePack { + changed_files: unknown[]; + changed_symbols: unknown[]; + impacted_subgraph: { + nodes: unknown[]; + edges: unknown[]; + node_count: number; + edge_count: number; + truncated: boolean; + }; + verdict: { verdict: string; exitCode: number }; + affected_tests: unknown[]; + cost_attribution: { + estimate: boolean; + tokenizer_model: string; + change_pack_tokens: number; + blind_baseline_tokens: number; + tokens_saved: number; + tokens_saved_pct: number; + affected_test_count: number; + total_test_count: number; + ci_tests_skipped: number; + }; + change_pack_hash: string; + next_steps: string[]; +} + +test("change_pack returns the snake_case structuredContent envelope", async () => { + await withHarness(async (ctx, server) => { + registerChangePackTool(server, ctx); + const handler = getToolHandler(server, "change_pack"); + const result = await handler({ repo: "fakerepo" }, {}); + assert.equal(result.isError, undefined); + const sc = result.structuredContent as unknown as StructuredChangePack; + + // Top-level snake_case keys present. + assert.ok(Array.isArray(sc.changed_files)); + assert.ok(Array.isArray(sc.changed_symbols)); + assert.ok(Array.isArray(sc.affected_tests)); + assert.ok(sc.impacted_subgraph); + assert.ok(sc.verdict); + assert.ok(sc.cost_attribution); + + // Impacted subgraph counts are snake_cased and numeric. + assert.equal(typeof sc.impacted_subgraph.node_count, "number"); + assert.equal(typeof sc.impacted_subgraph.edge_count, "number"); + assert.equal(typeof sc.impacted_subgraph.truncated, "boolean"); + }); +}); + +test("change_pack cost attribution is always a char-heuristic estimate", async () => { + await withHarness(async (ctx, server) => { + registerChangePackTool(server, ctx); + const handler = getToolHandler(server, "change_pack"); + const result = await handler({ repo: "fakerepo" }, {}); + const sc = result.structuredContent as unknown as StructuredChangePack; + + assert.equal(sc.cost_attribution.estimate, true); + assert.equal(sc.cost_attribution.tokenizer_model, "char-heuristic-v1"); + assert.equal(typeof sc.cost_attribution.change_pack_tokens, "number"); + assert.equal(typeof sc.cost_attribution.blind_baseline_tokens, "number"); + assert.equal(typeof sc.cost_attribution.tokens_saved, "number"); + assert.equal(typeof sc.cost_attribution.tokens_saved_pct, "number"); + assert.equal(typeof sc.cost_attribution.affected_test_count, "number"); + assert.equal(typeof sc.cost_attribution.total_test_count, "number"); + assert.equal(typeof sc.cost_attribution.ci_tests_skipped, "number"); + }); +}); + +test("change_pack hash is a deterministic hex string", async () => { + await withHarness(async (ctx, server) => { + registerChangePackTool(server, ctx); + const handler = getToolHandler(server, "change_pack"); + const first = await handler({ repo: "fakerepo" }, {}); + const second = await handler({ repo: "fakerepo" }, {}); + const a = (first.structuredContent as unknown as StructuredChangePack).change_pack_hash; + const b = (second.structuredContent as unknown as StructuredChangePack).change_pack_hash; + + assert.equal(typeof a, "string"); + assert.ok(/^[0-9a-f]+$/.test(a), `expected hex hash, got ${a}`); + // Identical inputs hash alike — the envelope folds the query into the hash. + assert.equal(a, b); + }); +}); + +test("change_pack carries next_steps toward verdict and impact", async () => { + await withHarness(async (ctx, server) => { + registerChangePackTool(server, ctx); + const handler = getToolHandler(server, "change_pack"); + const result = await handler({ repo: "fakerepo" }, {}); + const sc = result.structuredContent as unknown as StructuredChangePack; + + assert.ok(Array.isArray(sc.next_steps)); + assert.ok(sc.next_steps.some((s) => s.includes("verdict"))); + assert.ok(sc.next_steps.some((s) => s.includes("impact"))); + }); +}); + +test("change_pack threads optional knobs through without error", async () => { + await withHarness(async (ctx, server) => { + registerChangePackTool(server, ctx); + const handler = getToolHandler(server, "change_pack"); + const result = await handler( + { + repo: "fakerepo", + base: "main", + head: "HEAD", + depth: 2, + minConfidence: 1, + budget: 50_000, + includeTestsInSubgraph: true, + }, + {}, + ); + assert.equal(result.isError, undefined); + const sc = result.structuredContent as unknown as StructuredChangePack; + assert.equal(sc.cost_attribution.estimate, true); + }); +}); diff --git a/packages/mcp/src/tools/change-pack.ts b/packages/mcp/src/tools/change-pack.ts new file mode 100644 index 00000000..a255a4dc --- /dev/null +++ b/packages/mcp/src/tools/change-pack.ts @@ -0,0 +1,173 @@ +/** + * `change_pack` — deterministic, diff-scoped context pack for CI agents. + * + * Wraps `@opencodehub/analysis.runChangePack`, which composes the diff → + * per-symbol upstream fan-out (RETAINED as an impacted subgraph, not collapsed + * to a scalar), the 5-tier verdict, the affected tests, and a char-heuristic + * cost-attribution estimate. Read-only over the graph; shells out to git for + * the diff (fails open to an empty pack); calls no LLM and mutates no nodes. + * + * Output: the full {@link ChangePack} snake-cased under `structuredContent` + * (see {@link toStructured}), plus a concise human summary in `content`. The + * field VALUES are identical to the CLI's `--json` raw camelCase ChangePack; + * only the key casing differs (the parity test normalizes casing). + */ + +import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { runChangePack as analysisRunChangePack, type ChangePack } from "@opencodehub/analysis"; +import { z } from "zod"; +import { toolErrorFromUnknown } from "../error-envelope.js"; +import { withNextSteps } from "../next-step-hints.js"; +import { stalenessFromMeta } from "../staleness.js"; +import { + fromToolResult, + repoArgShape, + type ToolContext, + type ToolResult, + toToolResult, + withStore, +} from "./shared.js"; + +const ChangePackInput = { + base: z.string().optional().describe("Base git ref (default 'main')."), + head: z.string().optional().describe("Head git ref (default 'HEAD')."), + depth: z.number().int().positive().optional().describe("Upstream traversal depth (default 4)."), + minConfidence: z + .number() + .min(0) + .max(1) + .optional() + .describe("Traversal confidence floor (default 0.7; 1.0 = SCIP-precise edges only)."), + budget: z + .number() + .int() + .positive() + .optional() + .describe("Context budget in heuristic tokens (default 100000)."), + includeTestsInSubgraph: z + .boolean() + .optional() + .describe( + "Retain test nodes in the impacted subgraph; default false = tests surface only in affected_tests.", + ), + ...repoArgShape, +}; + +interface ChangePackArgs { + readonly base?: string | undefined; + readonly head?: string | undefined; + readonly depth?: number | undefined; + readonly minConfidence?: number | undefined; + readonly budget?: number | undefined; + readonly includeTestsInSubgraph?: boolean | undefined; + readonly repo?: string | undefined; + readonly repo_uri?: string | undefined; +} + +/** + * Snake-case the ChangePack for `structuredContent`. The field VALUES match + * the analysis camelCase ChangePack one-for-one; only the keys are recased so + * the wire payload follows the rest of the MCP surface's snake_case + * convention. The parity test compares this (minus envelope keys) against the + * CLI's raw `--json` ChangePack after normalizing casing. + */ +export function toStructured(pack: ChangePack): Record { + return { + changed_files: pack.changedFiles, + changed_symbols: pack.changedSymbols, + impacted_subgraph: { + nodes: pack.impactedSubgraph.nodes, + edges: pack.impactedSubgraph.edges, + node_count: pack.impactedSubgraph.nodeCount, + edge_count: pack.impactedSubgraph.edgeCount, + truncated: pack.impactedSubgraph.truncated, + }, + verdict: pack.verdict, + affected_tests: pack.affectedTests, + cost_attribution: { + estimate: pack.costAttribution.estimate, + tokenizer_model: pack.costAttribution.tokenizerModel, + change_pack_tokens: pack.costAttribution.changePackTokens, + blind_baseline_tokens: pack.costAttribution.blindBaselineTokens, + tokens_saved: pack.costAttribution.tokensSaved, + tokens_saved_pct: pack.costAttribution.tokensSavedPct, + affected_test_count: pack.costAttribution.affectedTestCount, + total_test_count: pack.costAttribution.totalTestCount, + ci_tests_skipped: pack.costAttribution.ciTestsSkipped, + }, + change_pack_hash: pack.changePackHash, + }; +} + +/** Concise human summary mirroring the verdict/detect_changes text style. */ +function renderText(pack: ChangePack): string { + const lines: string[] = []; + lines.push( + `Change-pack: ${pack.changedFiles.length} file(s), ${pack.changedSymbols.length} symbol(s) changed.`, + ); + lines.push( + `Impacted subgraph: ${pack.impactedSubgraph.nodeCount} node(s), ${pack.impactedSubgraph.edgeCount} edge(s)${ + pack.impactedSubgraph.truncated ? " (truncated)" : "" + }.`, + ); + lines.push(`Verdict: ${pack.verdict.verdict} (exit ${pack.verdict.exitCode}).`); + lines.push( + `Affected tests: ${pack.costAttribution.affectedTestCount} of ${pack.costAttribution.totalTestCount}; CI tests skippable: ${pack.costAttribution.ciTestsSkipped}.`, + ); + lines.push( + `Tokens saved (est.): ${pack.costAttribution.tokensSaved} (${pack.costAttribution.tokensSavedPct}% vs. blind baseline).`, + ); + return lines.join("\n"); +} + +export async function runChangePack(ctx: ToolContext, args: ChangePackArgs): Promise { + const call = await withStore(ctx, args, async (store, resolved) => { + try { + const pack = await analysisRunChangePack(store.graph, { + repoPath: resolved.repoPath, + ...(args.base !== undefined ? { base: args.base } : {}), + ...(args.head !== undefined ? { head: args.head } : {}), + ...(args.depth !== undefined ? { depth: args.depth } : {}), + ...(args.minConfidence !== undefined ? { minConfidence: args.minConfidence } : {}), + ...(args.budget !== undefined ? { budget: args.budget } : {}), + ...(args.includeTestsInSubgraph !== undefined + ? { includeTestsInSubgraph: args.includeTestsInSubgraph } + : {}), + }); + + const next = [ + "call `verdict` for the full PR review gate (reasoning chain + recommended reviewers)", + "call `impact` on a changed symbol to drill its individual blast radius", + ]; + + return withNextSteps( + renderText(pack), + toStructured(pack), + next, + stalenessFromMeta(resolved.meta), + ); + } catch (err) { + return toolErrorFromUnknown(err); + } + }); + return toToolResult(call); +} + +export function registerChangePackTool(server: McpServer, ctx: ToolContext): void { + server.registerTool( + "change_pack", + { + title: "Diff-scoped change-pack", + description: + "Deterministic, diff-scoped context pack for a git range: the impacted upstream subgraph (retained, not collapsed), the 5-tier verdict, the affected tests, and a char-heuristic cost estimate (tokens saved vs. opening every impacted file blind). CI-oriented — read-only, no LLM, byte-deterministic with a content hash.", + inputSchema: ChangePackInput, + annotations: { + readOnlyHint: true, + destructiveHint: false, + openWorldHint: false, + idempotentHint: false, + }, + }, + async (args) => fromToolResult(await runChangePack(ctx, args)), + ); +} From 2a46fc12f3cb96a7eb231c130dda44335bcd0cda Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 17:06:20 +0000 Subject: [PATCH 3/7] docs(repo): add EARS spec 007 for change-pack The ERPAVal spec + derived task list driving the change_pack feature: the four output sections, CLI/MCP parity AC, determinism/byte-identity, affected-test selection semantics, and the char-heuristic cost-attribution decision (tokenizer is provenance-only in the repo, so cost figures are explicitly estimates). --- .erpaval/specs/007-change-pack/spec.md | 81 +++++++++++++++++++++++++ .erpaval/specs/007-change-pack/tasks.md | 30 +++++++++ 2 files changed, 111 insertions(+) create mode 100644 .erpaval/specs/007-change-pack/spec.md create mode 100644 .erpaval/specs/007-change-pack/tasks.md diff --git a/.erpaval/specs/007-change-pack/spec.md b/.erpaval/specs/007-change-pack/spec.md new file mode 100644 index 00000000..fc20ad5c --- /dev/null +++ b/.erpaval/specs/007-change-pack/spec.md @@ -0,0 +1,81 @@ +# EARS Spec 007 — Diff-scoped change-pack (impact + affected-tests + cost attribution) + +**Session**: session-6afa8d · **Branch**: `feat/change-pack` (cut from `origin/main` @ `94b9165`) · **Parent roadmap**: `.erpaval/ROADMAP.md` (M5 deterministic code-packs is the sibling pattern) + +**Decision:** `change-pack` is a deterministic, diff-scoped capability that generalizes what `computeVerdict` already does internally (diff → per-symbol upstream fan-out) but **retains the impacted subgraph** instead of collapsing it to a scalar `blastRadius`, **surfaces the affected tests** that `impact.ts:490` currently classifies-then-drops, and adds a **cost-attribution** block. It ships with full CLI↔MCP parity backed by one shared core function, and the first CLI↔MCP parity test in the repo. + +## Context (Explore + Research consolidated) + +Full detail in `.erpaval/sessions/session-6afa8d/{explore-detect-changes,explore-verdict-impact,explore-pack,explore-parity,research-tokenizer-tests}.md`. + +### What exists to build on +- **`runDetectChanges(graph, {scope,compareRef,repoPath})`** (`packages/analysis/src/detect-changes.ts:182-237`) → `DetectChangesResult{changedFiles, affectedSymbols[{id,name,filePath,kind,changedLines}], affectedProcesses, summary{fileCount,symbolCount,processCount,risk}}`. This is the diff→symbols extractor. Reuse verbatim. +- **`runImpact(graph, {target,direction,maxDepth,minConfidence,relationTypes,includeTests})`** (`packages/analysis/src/impact.ts:393-553`). Upstream traversal = callers/dependents. Already plumbs `includeTests` (default false → tests dropped at `impact.ts:486-493` via exported `isTestPath`). `ImpactResult{byDepth,traversedEdges,affectedProcesses,affectedModules,totalAffected,risk}`. +- **`computeVerdict(graph, {repoPath,base,head,config})`** (`packages/analysis/src/verdict.ts:115-312`) → 5-tier `VerdictResponse`. Internally calls `runDetectChanges` (scope compare, two-dot `base..head`) then loops first 20 symbols through `runImpact(direction:"upstream",maxDepth:3)` keeping `max(totalAffected)`. **It does NOT set includeTests, so its blast radius is production-only.** +- **`@opencodehub/pack`** determinism contract: `buildManifest`→`packHash = sha256(canonicalJson(snake_case manifest, pack_hash:""))` (`manifest.ts:51-66`); `canonicalJson` (RFC-8785 sorted keys) in `core-types/src/hash.ts`; per-body `sha256`; bodies written, manifest LAST. Determinism test pattern at `pack-determinism.test.ts` (run twice, assert packHash + readdir + `Buffer.compare===0`). +- **Edge model**: `CodeRelation{from,to,type,confidence,step?}`, 25 string `RelationType`s (`core-types/src/edges.ts:3-28`). Direction: `from`=actor, `to`=acted-upon. **CALLS = caller→callee; to find tests of a changed symbol, walk `direction:"up"` (incoming).** SCIP edges carry `confidence=1.0`, heuristic `0.5`; default floor `0.7`. +- **Test classification is PATH-BASED, no `Test` NodeKind.** Three predicates exist: `isTestPath` (`impact.ts:66-77`, narrow), a private copy (`processes.ts:575`), and the multi-language `isTestFile`/`pairedTestCandidates` (`ingestion/.../temporal-helpers/test-pair.ts:13-37`). +- **Parity pattern**: both CLI command + MCP tool call ONE shared `@opencodehub/analysis` (or pack) function (cleanest exemplar: `detect_changes`). CLI = commander v15 + per-command `--json`. MCP = `withStore`+`withNextSteps`+`stalenessFromMeta`+`toolErrorFromUnknown`, registered in `server.ts`, NO `outputSchema`. **No CLI↔MCP parity test exists — change-pack adds the first.** + +### Load-bearing research decisions (settle the spec's hard choices) +- **TOKENIZER IS METADATA-ONLY.** The `openai:o200k_base@tiktoken-0.8.0` pin is a provenance label; no encoder runs. Pack counts chonkie's `'character'` tokenizer (1 char = 1 token) or a `len/4` degraded heuristic. There is no tiktoken/anthropic dep in the repo. **DECISION: v1 cost-attribution reuses the `len/4` heuristic for BOTH the scoped-pack tokens and the blind baseline (zero new deps, byte-deterministic, single token model), and the output block MUST self-label `estimate: true` with `tokenizer_model: "char-heuristic-v1"`. No model-token claims; no borrowed marketing percentages in the artifact.** +- **AFFECTED TESTS = upstream reachability, read from the graph.** `runImpact(direction:"upstream", includeTests:true)`, keep only hits where `isTestPath` is true. **MUST read the already-ingested graph via `runImpact`; MUST NOT re-derive edges** (prior lessons `scip-callee-definition-site` over-reports 27× and `scip-0-indexed-vs-graph-1-indexed` drops ~85% if re-derived wrong). Inherit correct SCIP resolution for free. +- **Determinism**: storage returns `(depth, nodeId)` order; change-pack MUST re-sort affected tests + subgraph nodes by `id` asc and edges by `(from,type,to,step)` for byte-identity, matching the pack's canonical-JSON discipline. + +### Convention guardrails +- **`commitlint.config.mjs` scope-enum already includes `pack`** (verified this session: scope list includes `analysis, cli, mcp, pack, core-types, storage, …`). No new scope needed if change-pack core lands in `@opencodehub/analysis` or `@opencodehub/pack`. **First commit MUST verify scope-enum covers chosen package(s).** +- **`scripts/check-banned-strings.sh`**: `change`, `pack`, `test`, `cost`, `token` are all safe — no collision. +- **`mise run check`** = lint (biome) → typecheck (`pnpm -r exec tsc --noEmit`) → test → banned-strings. Every commit exits 0. +- **graphHash byte-identity** (ROADMAP constraint 6): change-pack is read-only over the graph; it emits NO new nodes/edges, so the invariant holds trivially. MUST NOT backfill or mutate the graph. +- **No LLM in query path** (ROADMAP rail 4): change-pack does zero LLM calls. Summarizer untouched. + +## Ubiquitous requirements + +- **U1**: `change_pack_hash` byte-identity — same `(commit, base, head, tokenizer_model, budget, depth, minConfidence)` → byte-identical change-pack output and same hash. Verified by a determinism suite (mirror `pack-determinism.test.ts`). +- **U2**: CLI (`codehub change-pack`) and MCP (`change_pack`) MUST produce structurally identical payloads for identical inputs — both call the single shared core `runChangePack`. Verified by a CLI↔MCP parity test (the repo's first). +- **U3**: change-pack MUST read the ingested graph only (via `runImpact`/`runDetectChanges`); it MUST NOT re-derive edges, MUST NOT mutate the graph, MUST NOT call any LLM. +- **U4**: `bash scripts/check-banned-strings.sh` exits 0; `mise run check` exits 0 after every commit. +- **U5**: All output collections MUST be deterministically ordered — nodes/tests by `id` asc, edges by `(from,type,to,step)`, files by path. No locale-dependent or insertion-order leakage into hashed bytes. +- **U6**: The cost-attribution block MUST self-label as an estimate (`estimate:true`, `tokenizer_model:"char-heuristic-v1"`). It MUST NOT present heuristic counts as model tokens. + +## Event-driven requirements + +- **E1**: When a user runs `codehub change-pack --base --head ` (defaults base=`main`, head=`HEAD`), the CLI MUST emit a `ChangePack` object containing all four sections: `impacted_subgraph`, `verdict`, `affected_tests`, `cost_attribution`. With `--json` it prints `JSON.stringify(changePack,null,2)`; without, a human summary. +- **E2**: When the `change_pack` MCP tool is called with `{base?,head?,...repoArgShape}`, it MUST return the same `ChangePack` payload as `structuredContent` (snake_case), wrapped via `withNextSteps` with staleness + next-step hints, sharing the identical `runChangePack` core with the CLI. +- **E3**: When change-pack assembles the impacted subgraph, it MUST fan out from each changed symbol via `runImpact(direction:"upstream", maxDepth:, minConfidence:)`, UNION the per-symbol `byDepth` nodes + `traversedEdges` into one deduplicated subgraph (dedup nodes by `id`, edges by `(from,type,to,step)`), and retain it — NOT collapse to a scalar. +- **E4**: When change-pack selects affected tests, it MUST run the upstream fan-out with `includeTests:true`, filter hits to those whose `filePath` satisfies the test predicate, dedupe by node id, and return `affected_tests[]` each with `{id,name,filePath,reachedFromSymbol,depth}`, sorted by `(filePath,id)`. +- **E5**: When change-pack computes cost attribution, it MUST emit `{estimate:true, tokenizer_model, change_pack_tokens, blind_baseline_tokens, tokens_saved, tokens_saved_pct, affected_test_count, total_test_count, ci_tests_skipped}` where `change_pack_tokens` = `len/4` over the serialized pack body, and `blind_baseline_tokens` = the same heuristic over the full text of every File node in the impacted subgraph (the conservative "agent reads each impacted file" baseline). `ci_tests_skipped = total_test_count − affected_test_count`. +- **E6**: When the diff is empty or touches only files with no graph symbols, change-pack MUST short-circuit to an empty-but-valid `ChangePack` (verdict `auto_merge`, empty subgraph, empty affected_tests, cost_attribution with zero savings), mirroring `finaliseEmpty` in verdict.ts — never throw. +- **E7**: When the repo is not indexed / ambiguous / git fails, the MCP tool MUST return the conventional structured error envelope (via `withStore` + `toolErrorFromUnknown`); the CLI MUST exit non-zero with a readable message. (Git failures fail-open to empty per `runGit` convention.) + +## State-driven requirements + +- **S1**: While the index is stale relative to the working tree, change-pack MUST still produce a result AND surface the staleness envelope (`stalenessFromMeta`) so the caller knows the graph may lag the diff. (Parity: CLI prints a staleness note in non-JSON mode.) +- **S2**: While `--include-tests-in-subgraph` is false (default), the impacted_subgraph counts/risk MUST reflect production code only (tests excluded from the subgraph), EXACTLY as verdict does today — the affected_tests section is the dedicated place tests surface. When true, tests are also retained in the subgraph. + +## Optional-feature requirements + +- **O1**: Where `--budget ` is supplied (default 100_000), change-pack MAY trim the impacted_subgraph context body to fit the budget by dropping lowest-PageRank/highest-depth nodes first, recording `budget_applied:true` and the drop count in the manifest. Trimming MUST be deterministic (stable sort before drop). v1 MAY ship budget as a recorded-but-not-enforced field if trimming risks determinism; the spec permits deferring enforcement to a follow-up as long as the field is present and documented. +- **O2**: Where `--depth ` is supplied (default 4 — one deeper than verdict's 3, since tests sit deep in call chains per research), it sets the upstream `maxDepth`. Where `--min-confidence ` is supplied (default 0.7), it sets the traversal floor; `--min-confidence 1.0` yields SCIP-precise-only edges. + +## Unwanted-behavior requirements + +- **UW1**: If change-pack would emit non-deterministic bytes (unsorted collection, timestamp, absolute path, run id), that is a defect — the determinism test MUST catch it. No wall-clock, no `Math.random`, no absolute paths in hashed bytes. +- **UW2**: If a traversal hits the depth cap with more nodes than a hard ceiling (e.g. 5000), change-pack MUST truncate deterministically (by `id` after depth ordering) and record `truncated:true` + the cap — never emit an unbounded subgraph or hang. +- **UW3**: change-pack MUST NOT introduce ERPAVal spec-coordinate leakage (`AC-*`, `E*`, `U*`, `S*`) into source comments, tool descriptions, CLI help, or test names (prior lesson `no-spec-coordinate-leakage-into-source`). Coordinates live in commits/PR body only. + +## Acceptance criteria → task seeds + +| AC | Requirement | Package | Parallel-safe | +|----|-------------|---------|---------------| +| AC-1 | `runChangePack(graph, query): Promise` core — reuse runDetectChanges + runImpact upstream fan-out, union/dedupe subgraph, retain it | `@opencodehub/analysis` | [P] foundational | +| AC-2 | Affected-test selection: includeTests upstream fan-out + isTestPath filter + stable sort + reachedFromSymbol/depth | `@opencodehub/analysis` | Dependencies: AC-1 | +| AC-3 | Cost-attribution: char-heuristic counter, blind-baseline from impacted File nodes, estimate-labeled block | `@opencodehub/analysis` | Dependencies: AC-1 | +| AC-4 | Deterministic serialization + `change_pack_hash` (canonicalJson, sorted collections) | `@opencodehub/analysis` (+ reuse core-types hash) | Dependencies: AC-1, AC-2, AC-3 | +| AC-5 | MCP `change_pack` tool — withStore/withNextSteps/staleness/error envelope, registered in server.ts | `@opencodehub/mcp` | Dependencies: AC-4 | +| AC-6 | CLI `codehub change-pack` command — commander registration + --json + flags matching MCP fields | `@opencodehub/cli` | Dependencies: AC-4 | +| AC-7 | CLI↔MCP parity test (first in repo) — same args → deep-equal payload minus envelope keys | `@opencodehub/cli` or `mcp` test | Dependencies: AC-5, AC-6 | +| AC-8 | Determinism test — run twice, assert hash + byte-identity | `@opencodehub/analysis` test | Dependencies: AC-4 | +| AC-9 | Unit tests per core fn (subgraph union/dedup, test selection, cost math, empty-diff short-circuit) | `@opencodehub/analysis` test | Dependencies: AC-1..AC-3 | + +**Decision — where the core lives:** `@opencodehub/analysis` (NOT pack). Rationale: it consumes `runImpact`+`runDetectChanges` which already live there; the verdict precedent is there; pack depends on analysis, so analysis is the lower layer and avoids a cycle. The output is a structured JSON object (not a 9-item BOM on disk), so it does not need pack's file-writing machinery — though it borrows `canonicalJson`/`sha256Hex` from `core-types` exactly as pack does. diff --git a/.erpaval/specs/007-change-pack/tasks.md b/.erpaval/specs/007-change-pack/tasks.md new file mode 100644 index 00000000..748a212d --- /dev/null +++ b/.erpaval/specs/007-change-pack/tasks.md @@ -0,0 +1,30 @@ +# Tasks 007 — change-pack (derived from spec.md ACs) + +Branch `feat/change-pack`. Core lives in `@opencodehub/analysis`. Waves are dependency-ordered; tasks marked [P] run in parallel within a wave. + +## Wave 1 — analysis core (foundational) +- **T-AC-1** `@opencodehub/analysis`: add `runChangePack(graph, query): Promise` + types (`ChangePackQuery{repoPath,base?,head?,depth?,minConfidence?,budget?,includeTestsInSubgraph?}`, `ChangePack{impactedSubgraph,verdict,affectedTests,costAttribution,changePackHash}`). Reuse `runDetectChanges` (scope compare, two-dot base..head) for changed symbols; fan out each symbol via `runImpact(direction:"upstream",maxDepth:depth,minConfidence)`; UNION byDepth nodes + traversedEdges, dedupe (nodes by id, edges by (from,type,to,step)), retain full subgraph. Wire `computeVerdict` for the verdict section. Export from `analysis/src/index.ts`. **No new deps. No graph mutation. No LLM.** (spec AC-1, E3, E6, U3) + +## Wave 2 — core dimensions (parallel, all depend on T-AC-1) +- **T-AC-2** [P] Affected-test selection: in `runChangePack`, run the upstream fan-out a second time (or reuse with includeTests) with `includeTests:true`, filter hits to `isTestPath(filePath)` true, dedupe by id, map each to `{id,name,filePath,reachedFromSymbol,depth}`, sort `(filePath,id)`. Decide predicate: start with exported `isTestPath` (impact.ts:66); note `test-pair.ts` multi-language variant as a follow-up if coverage gaps. (AC-2, E4, S2) +- **T-AC-3** [P] Cost attribution: `charHeuristicTokens(text)=max(1,ceil(len/4))`; `change_pack_tokens` over serialized pack body; `blind_baseline_tokens` = sum of charHeuristicTokens over full text of every File node in impacted subgraph (read file bytes via repoPath join); emit `{estimate:true,tokenizer_model:"char-heuristic-v1",change_pack_tokens,blind_baseline_tokens,tokens_saved,tokens_saved_pct,affected_test_count,total_test_count,ci_tests_skipped}`. (AC-3, E5, U6) + +## Wave 3 — determinism (depends on T-AC-1,2,3) +- **T-AC-4** Deterministic serialization: serialize `ChangePack` via `canonicalJson` (core-types/src/hash.ts), all collections pre-sorted (U5); `changePackHash = sha256Hex(canonicalJson({...pack, changePackHash:""}))`. Add depth/minConfidence/budget/tokenizer_model into the hashed envelope (U1). Truncation guard: cap subgraph at 5000 nodes, deterministic by (depth,id), record `truncated`. (AC-4, U1, U5, UW1, UW2) + +## Wave 4 — surfaces (parallel, both depend on T-AC-4) +- **T-AC-5** [P] MCP `change_pack` tool `packages/mcp/src/tools/change-pack.ts`: copy detect-changes skeleton — `ChangePackInput={base?,head?,depth?,minConfidence?,budget?,includeTestsInSubgraph?,...repoArgShape}`, `runChangePack(ctx,args)` via `withStore`→`analysisRunChangePack(store.graph,query)`→`withNextSteps(text, snake_case payload, nextSteps, stalenessFromMeta(meta))`, `try/catch→toolErrorFromUnknown`. `registerChangePackTool(server,ctx)` annotations readOnlyHint:true,destructiveHint:false,openWorldHint:false,idempotentHint:false. Wire import + registration in `server.ts`; add to instructions prose. NO outputSchema. (AC-5, E2, E7, S1) +- **T-AC-6** [P] CLI `codehub change-pack` `packages/cli/src/commands/change-pack.ts`: `runChangePackCmd(opts)` → `openStoreForCommand`→`analysisRunChangePack(store.graph,query)`→`--json`?JSON.stringify:human summary, try/finally store.close. Register in `cli/src/index.ts` near the CLI-siblings section, flags matching MCP field names + `--json`. Set process.exitCode non-zero on error. (AC-6, E1, E7, S1) + +## Wave 5 — verification (depends on surfaces + core) +- **T-AC-7** [P] CLI↔MCP parity test (repo's FIRST): run `runChangePackCmd({json:true,...})` capturing stdout JSON + `runChangePack(ctx,sameArgs)` reading structuredContent; assert CLI JSON deep-equals MCP structuredContent minus `{next_steps,_meta}`. Hermetic via test seams + fake store/registry. (AC-7, U2) +- **T-AC-8** [P] Determinism test `analysis`: run `runChangePack` twice on same fixture+query, assert `changePackHash` equal + serialized bytes identical. Mirror pack-determinism.test.ts. (AC-8, U1) +- **T-AC-9** [P] Unit tests: subgraph union/dedup, affected-test selection (upstream reachability + isTestPath filter), cost math (char heuristic + baseline), empty-diff short-circuit, truncation guard. (AC-9, E6, UW2) + +## Validate (Gate 2) +- `mise run check` exit 0 (lint+typecheck+test+banned-strings); full `pnpm -r test` green; parity + determinism tests green. Every failure is a blocker. + +## Compound +- Commit per wave (conventional, scope `analysis`/`mcp`/`cli`), push `feat/change-pack`, open PR. Extract lessons (CL-LESSONS): candidate lessons — "tokenizerId is provenance not encoder (cost features need explicit heuristic labeling)", "generalize verdict's scalar fan-out by retaining the subgraph", "CLI↔MCP parity = one shared analysis core + first parity test pattern". Update INDEX, record lessons.yaml. +``` +``` From 05041d61863e0c7542236c226621ec463222eadc Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 17:10:32 +0000 Subject: [PATCH 4/7] docs(repo): compound change-pack lessons Two durable ERPAVal lessons from the change-pack session + INDEX pointers: - tokenizerId is provenance, not an encoder (cost features need a labeled heuristic or a real tokenizer dep; the repo ships neither today). - CLI<->MCP parity via one shared analysis core + a split parity test that respects the mcp package rootDir; the server roster contract bumps by design. --- .erpaval/INDEX.md | 2 + ...ity-via-one-shared-core-plus-split-test.md | 62 +++++++++++++++++++ ...kenizer-id-is-provenance-not-an-encoder.md | 58 +++++++++++++++++ 3 files changed, 122 insertions(+) create mode 100644 .erpaval/solutions/best-practices/cli-mcp-parity-via-one-shared-core-plus-split-test.md create mode 100644 .erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md diff --git a/.erpaval/INDEX.md b/.erpaval/INDEX.md index b45dcb8d..c94ef0ef 100644 --- a/.erpaval/INDEX.md +++ b/.erpaval/INDEX.md @@ -35,6 +35,7 @@ development sessions. Solutions are reusable; specs are per-feature. - [Fixed-offset asset resolvers break on bundle collapse](solutions/best-practices/fixed-offset-asset-resolvers-break-on-bundle-collapse.md) — after a tsup collapse flattens `dist/commands/x.js`→`dist/x-.js`, EVERY `import.meta.url` resolver with a fixed `..` offset shifts by a level. #189 fixed only doctor.ts; 6 others shipped broken ~5 days (init/ci-init/setup/betterleaks + 2 ingestion WASM resolvers), two SILENTLY (`analyze` → 0 symbols, exit 0). Convert all to walk-up probes; the CLI bundles deps from `dist/` so rebuild in dep order; verify from a packed-tarball global install, not the hot node_modules. - [Silent degradation needs a run-level guard](solutions/best-practices/silent-degradation-needs-a-run-level-guard.md) — a `catch→warn→continue` loop hides a GLOBAL stage failure: every item fails identically, the run exits 0 with empty output. Fix is a PAIR — (a) a thrown sentinel distinguishing global-init failure from per-item failure (set `.name`, Piscina drops the prototype across threads), (b) a run-level "processed N but produced 0 expected outputs" guard with a distinct exit code. Pick the denominator carefully (exclude regex-provider/placeholder output) and make smoke tests assert a KNOWN symbol, not "any non-empty result". - [Squash-merge masks pre-existing repo-wide debt](solutions/best-practices/squash-merge-masks-pre-existing-debt.md) — first action on a fresh branch from main is `mise run check` BEFORE starting work; lint rules / transitive deps / cross-package test assertions drift across squash boundaries even when per-commit gating was green inside the prior PR. +- [CLI↔MCP parity via one shared core + a split parity test](solutions/best-practices/cli-mcp-parity-via-one-shared-core-plus-split-test.md) — parity = both surfaces call ONE `@opencodehub/analysis` core with identical query args (copy `detect_changes`), never per-surface logic. A cross-package test (importing `cli/src` from `mcp/src`) breaks the mcp `rootDir`; SPLIT it — CLI asserts `--json` deep-equals the raw result, MCP asserts `toStructured` recases losslessly. Adding a tool trips `server.test.ts`'s `EXPECTED_TOOL_NAMES` + `length===N` roster contract by design — bump both. - [No spec-coordinate leakage into source](solutions/best-practices/no-spec-coordinate-leakage-into-source.md) — ERPAVal `AC-*`, `M-*`, `W-*`, `CL-*` prefixes belong in commits, PR bodies, ADR refs sections — NOT in JSDoc, inline comments, CLI flag help, MCP tool descriptions, or test names. Sweep `rg -n "AC-[A-Z]-[0-9]" packages/` before every PR-open; LLM clients pick up the leakage and start citing it back. - [release: published events need PAT or inline](solutions/conventions/release-published-event-needs-pat-or-inline.md) — release-please-action with default `GITHUB_TOKEN` does NOT fire downstream `release: [published]` workflows; inline asset-attach in `release-please.yml` gated on `steps.release.outputs.release_created`. Fixed AC-D-4; sbom.yml has same latent bug for follow-on. - [Dogfood pre-push hook catches CLI spec drift on first push](solutions/best-practices/dogfood-prepush-hook-caught-cli-spec-mismatch.md) — the first `git push` of the commit that adds a self-targeting pre-push hook is where spec/CLI-flag mismatches and "missing index" foot-guns surface. Pattern: SKIP-with-message shape from `pack-determinism-audit.sh` for any gate that depends on a derived artifact. @@ -57,6 +58,7 @@ development sessions. Solutions are reusable; specs are per-feature. - [Collapse parallel switches into a Record registry](solutions/architecture-patterns/collapse-parallel-switches-into-record-registry.md) — when 2+ functions each switch over the same closed union (one per derived attribute), fold them into `Record`. tsc preserves exhaustiveness, the functions become one-line lookups, honest `| null` replaces placeholder lies, and ONE table-driven test with a `Record` fixture pins every (key, attribute) pair — better coverage than the zero direct tests the switches had. - [A vendored-artifact dep bump must re-vendor in the same PR](solutions/conventions/vendored-artifact-bump-must-revendor-in-same-pr.md) — bumping a dep that has a committed vendored artifact (`web-tree-sitter` → `vendor/wasms/*.wasm` + manifest) without re-running the vendor script passes ALL CI (the `prepublishOnly` guard isn't a CI step) and detonates at `npm publish`, aborting the dependency-ordered multi-package release mid-stream. Scan Dependabot consolidations for vendored-artifact deps and re-vendor before merge. +- [tokenizerId is provenance, not an encoder](solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md) — the `openai:o200k_base@tiktoken-0.8.0` pin threaded through `@opencodehub/pack` is a reproducibility label, NOT an enforced tokenizer. No tiktoken/anthropic encoder is installed; chonkie's `'character'` tokenizer (1 char = 1 token) does the counting, `len/4` on the degraded path. Any "token cost / tokens saved" feature must add a real tokenizer dep OR compute an explicitly-labeled char heuristic (`estimate:true`) — never present char counts as model tokens. - [npm trusted publisher matches the ENTRY workflow, not the reusable one](solutions/conventions/npm-trusted-publisher-matches-entry-workflow-not-reusable.md) — npm OIDC matches "Workflow filename" against the workflow that INITIATED the run, not the one running `npm publish`. With `release-please.yml` → `workflow_call` → `release.yml`, register `release-please.yml`. Wrong registration silently 404s the token exchange; only `workflow_dispatch` runs (entry = release.yml) ever publish, so npm lags the tags. Config is web-UI-only, passkey-gated, one entry per package (17 here). diff --git a/.erpaval/solutions/best-practices/cli-mcp-parity-via-one-shared-core-plus-split-test.md b/.erpaval/solutions/best-practices/cli-mcp-parity-via-one-shared-core-plus-split-test.md new file mode 100644 index 00000000..854e4a07 --- /dev/null +++ b/.erpaval/solutions/best-practices/cli-mcp-parity-via-one-shared-core-plus-split-test.md @@ -0,0 +1,62 @@ +--- +name: cli-mcp-parity-via-one-shared-core-plus-split-test +description: To give a capability true CLI<->MCP parity in OCH, BOTH surfaces must call one shared @opencodehub/analysis core function with structurally identical query args — never reimplement logic per surface. The CLI emits the raw camelCase result via JSON.stringify; the MCP tool recases a PARTIAL set of keys to snake_case (top-level + impacted_subgraph/cost_attribution interiors only; nested array elements + verdict stay camelCase). A cross-package test importing cli/src from mcp/src breaks the MCP package rootDir — so SPLIT the parity proof: CLI test asserts --json deep-equals the raw pack (passthrough), MCP test asserts toStructured recases losslessly (recase-back == raw). Also: adding a tool trips the server.test.ts roster contract (EXPECTED_TOOL_NAMES + length assertion) by design — bump both. +metadata: + type: best-practice + category: best-practices +tags: [cli, mcp, parity, change-pack, rootdir, tool-roster, contract-test, snake-case] +discovered: 2026-06-14 +session: session-6afa8d +related: + - tokenizer-id-is-provenance-not-an-encoder +--- + +# CLI<->MCP parity: one shared core + a split parity test + +## The pattern that gives parity + +Parity is NOT "two surfaces that happen to format the same." It is: both the CLI +command (`packages/cli/src/commands/X.ts`) and the MCP tool +(`packages/mcp/src/tools/X.ts`) delegate to ONE exported +`@opencodehub/analysis` (or `pack`) core function, passing structurally +identical query args. The cleanest exemplar is `detect_changes` → +`runDetectChanges`. Copy it. The core is the single source of truth; the +surfaces only resolve args, open a store, and format. + +Surface conventions to match: +- CLI: commander v15, per-command `--json` that does `console.log(JSON.stringify(result, null, 2))` (raw camelCase, no reshaping). Reuse `pack.verdict.exitCode` (0|1|2) so CI gates match `codehub verdict`. +- MCP: `withStore` + `withNextSteps` + `stalenessFromMeta` + `toolErrorFromUnknown`, registered in `server.ts`, NO `outputSchema`. The structuredContent uses a PARTIAL snake_case recasing — only the top-level keys plus the interiors of nested objects the tool explicitly recases; array element objects and pass-through objects (e.g. the verdict) stay camelCase. Document the exact map. + +## The split parity test (the non-obvious part) + +A single test that imports BOTH `cli/src/...` and the MCP tool from inside the +mcp package fails `tsc` with TS6059/TS6307: `cli/src/*` is not under the mcp +package's `rootDir`. Don't fight it. Split the proof so each half lives in its +own package and together they're airtight: + +- **CLI package test**: assert `--json` output deep-equals the analysis result + fixture → proves the CLI is a pure passthrough (no reshaping). +- **MCP package test**: assert `mcpToCamel(toStructured(fixture))` deep-equals + the fixture → proves the recasing is LOSSLESS (drops no field, mis-spells no + key). Export `toStructured` from the tool module so the test can call it. + +Pure passthrough + lossless recasing of the SAME core output ⇒ identical values +on both surfaces. Drive both hermetically via the `_`-prefixed seams +(`_openStore`/`_runChangePack` on the CLI; the analysis core is deterministic and +fails-open on a fake store) — no git, no DB, no process spawn. + +## The roster contract will fail — that's intentional + +`packages/mcp/src/server.test.ts` pins `EXPECTED_TOOL_NAMES` (sorted) AND +`assert.equal(registered.length, N)`. Registering a new tool makes both fail. +This is the guard against a tool silently dropping out of `buildServer`. Adding +a tool means: add the wire-name to `EXPECTED_TOOL_NAMES` and bump the length. +Also sweep for stale "N tools" counts in CLAUDE.md / AGENTS.md / docs prose +(those are not gated but go stale). + +## Why this matters + +Per-surface logic drifts the moment one side gets a fix the other doesn't. One +shared core makes drift structurally impossible; the split test makes the +serialization layer (the only place they CAN diverge) provably equivalent +without violating the package boundary. diff --git a/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md b/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md new file mode 100644 index 00000000..5358f44b --- /dev/null +++ b/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md @@ -0,0 +1,58 @@ +--- +name: tokenizer-id-is-provenance-not-an-encoder +description: The `openai:o200k_base@tiktoken-0.8.0` tokenizerId string threaded through @opencodehub/pack is a PROVENANCE LABEL, not an enforced encoder. No tiktoken/anthropic tokenizer is installed anywhere in the repo; @chonkiejs/core's default 'character' tokenizer (1 char = 1 token) does the counting, with a len/4 degraded fallback. So any feature that needs "tokens saved / token cost" numbers cannot get model-accurate counts for free — it must either add a real tokenizer dep or compute an explicitly-labeled char heuristic and never present it as model tokens. +metadata: + type: convention + category: conventions +tags: [tokenizer, pack, chonkie, cost-attribution, change-pack, determinism, estimate] +discovered: 2026-06-14 +session: session-6afa8d +related: + - tsup-collapse-monorepo-to-single-cli +--- + +# tokenizerId is provenance, not an encoder + +## The trap + +`@opencodehub/pack` threads a `tokenizerId` like `openai:o200k_base@tiktoken-0.8.0` +through the manifest and into `buildAstChunks`. It LOOKS like the pack counts +tokens with the o200k_base encoder. It does not. + +- `ast-chunker.ts` explicitly "does not interpret" the tokenizer id (only uses it + to pick `determinismClass`: `anthropic:` → `best_effort`, else `strict`). +- Grep the whole `pnpm-lock.yaml` for `tiktoken|gpt-tokenizer|o200k` → zero hits. + `@chonkiejs/token` (the HF-tokenizer add-on) is not installed. +- chonkie's `CodeChunker` defaults to the `'character'` tokenizer (1 char = 1 + token); the pack passes only `{language, chunkSize}`, never a tokenizer. The + degraded path uses `Math.max(1, Math.ceil(len / 4))`. +- `budgetTokens` maps straight to chonkie's `chunkSize` (per-chunk cap) — there + is no binary-search-to-fit and no total-pack budget enforcement. + +So the tokenizerId is a reproducibility *label* (it correctly busts `packHash` +when it changes, which is its real job) — not a guarantee that any o200k_base or +Anthropic encoder ran. + +## How to apply + +When a feature needs token counts (cost attribution, budget trimming, "tokens +saved"): + +1. **Do not assume model-accurate counts exist.** Reuse the existing `len/4` + char heuristic — it is zero-dep, byte-deterministic, and the same model the + pack already uses. +2. **Label the output an estimate.** `change-pack`'s `CostAttribution` carries + `estimate: true` + `tokenizerModel: "char-heuristic-v1"` so no caller mistakes + it for tiktoken counts. +3. **Compute the baseline auditably from the graph**, not from a borrowed + marketing percentage — e.g. sum char-heuristic tokens over every File node in + the impacted subgraph as the "agent reads each file blind" baseline. +4. If you genuinely need model-accurate counts, that's a real new dependency + (tiktoken / gpt-tokenizer) and an ADR — not a one-liner. + +## Why this matters + +A cost feature that silently reports character counts as "tokens" is worse than +no number — it reads as authoritative and is wrong by a model-specific factor. +Naming the heuristic in the output keeps the feature honest and lets a future +tokenizer upgrade swap the basis without changing the contract shape. From 20b8d2500a78df4a8a415f1920498bfe5da86308 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 17:55:15 +0000 Subject: [PATCH 5/7] feat(analysis): count real o200k_base tokens in change-pack cost attribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the char-heuristic estimate with real BPE token counts via `gpt-tokenizer` (pure-JS, MIT, zero-dep). Chose it over the native/WASM `tiktoken` package to honor the no-native-binding rail (ADR 0015); the `gpt-tokenizer/encoding/o200k_base` subpath bundles its BPE ranks inline, so `encode` is synchronous and deterministic — byte-identity and the content hash hold. - `countTokens` encodes via o200k_base, falling back to `max(1, ceil(len/4))` only on input that throws (rare, still deterministic) so cost never crashes the pack. - `CostAttribution` now reports `estimate: false` / `tokenizerModel: "openai/o200k_base"`; the type widens `estimate` to boolean and `tokenizerModel` to string. The field is folded into the hash, so the basis change is visible in `changePackHash`. - CLI + MCP summaries drop the "(est.)" hedge and name the tokenizer. - Tests, fixtures, EARS spec U6, and the tokenizer lesson updated to the real encoder; license allowlist + OSV clean (gpt-tokenizer is MIT, no advisories). --- .erpaval/INDEX.md | 2 +- ...kenizer-id-is-provenance-not-an-encoder.md | 40 ++++++++++++------- .erpaval/specs/007-change-pack/spec.md | 2 +- packages/analysis/package.json | 1 + packages/analysis/src/change-pack-types.ts | 17 ++++---- packages/analysis/src/change-pack.test.ts | 33 +++++++++------ packages/analysis/src/change-pack.ts | 39 +++++++++++++----- packages/analysis/src/index.ts | 3 +- packages/cli/src/commands/change-pack.test.ts | 11 ++--- packages/cli/src/commands/change-pack.ts | 2 +- .../mcp/src/tools/change-pack.parity.test.ts | 10 ++--- packages/mcp/src/tools/change-pack.test.ts | 8 ++-- packages/mcp/src/tools/change-pack.ts | 4 +- pnpm-lock.yaml | 9 +++++ 14 files changed, 118 insertions(+), 63 deletions(-) diff --git a/.erpaval/INDEX.md b/.erpaval/INDEX.md index c94ef0ef..ffa923d9 100644 --- a/.erpaval/INDEX.md +++ b/.erpaval/INDEX.md @@ -58,7 +58,7 @@ development sessions. Solutions are reusable; specs are per-feature. - [Collapse parallel switches into a Record registry](solutions/architecture-patterns/collapse-parallel-switches-into-record-registry.md) — when 2+ functions each switch over the same closed union (one per derived attribute), fold them into `Record`. tsc preserves exhaustiveness, the functions become one-line lookups, honest `| null` replaces placeholder lies, and ONE table-driven test with a `Record` fixture pins every (key, attribute) pair — better coverage than the zero direct tests the switches had. - [A vendored-artifact dep bump must re-vendor in the same PR](solutions/conventions/vendored-artifact-bump-must-revendor-in-same-pr.md) — bumping a dep that has a committed vendored artifact (`web-tree-sitter` → `vendor/wasms/*.wasm` + manifest) without re-running the vendor script passes ALL CI (the `prepublishOnly` guard isn't a CI step) and detonates at `npm publish`, aborting the dependency-ordered multi-package release mid-stream. Scan Dependabot consolidations for vendored-artifact deps and re-vendor before merge. -- [tokenizerId is provenance, not an encoder](solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md) — the `openai:o200k_base@tiktoken-0.8.0` pin threaded through `@opencodehub/pack` is a reproducibility label, NOT an enforced tokenizer. No tiktoken/anthropic encoder is installed; chonkie's `'character'` tokenizer (1 char = 1 token) does the counting, `len/4` on the degraded path. Any "token cost / tokens saved" feature must add a real tokenizer dep OR compute an explicitly-labeled char heuristic (`estimate:true`) — never present char counts as model tokens. +- [tokenizerId is provenance, not an encoder](solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md) — the `openai:o200k_base@tiktoken-0.8.0` pin in `@opencodehub/pack` is a reproducibility label, NOT a tokenizer; chonkie's `'character'` tokenizer does the pack's counting. For real "token cost / tokens saved" numbers, add a real encoder: `change-pack` ships `gpt-tokenizer` (pure-JS, MIT, zero-dep) via the `…/encoding/o200k_base` subpath for synchronous deterministic counts, char heuristic only as a throw-fallback. Pick pure-JS gpt-tokenizer over native/WASM `tiktoken` to honor the no-native-binding rail (ADR 0015). - [npm trusted publisher matches the ENTRY workflow, not the reusable one](solutions/conventions/npm-trusted-publisher-matches-entry-workflow-not-reusable.md) — npm OIDC matches "Workflow filename" against the workflow that INITIATED the run, not the one running `npm publish`. With `release-please.yml` → `workflow_call` → `release.yml`, register `release-please.yml`. Wrong registration silently 404s the token exchange; only `workflow_dispatch` runs (entry = release.yml) ever publish, so npm lags the tags. Config is web-UI-only, passkey-gated, one entry per package (17 here). diff --git a/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md b/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md index 5358f44b..24a23ed9 100644 --- a/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md +++ b/.erpaval/solutions/conventions/tokenizer-id-is-provenance-not-an-encoder.md @@ -1,6 +1,6 @@ --- name: tokenizer-id-is-provenance-not-an-encoder -description: The `openai:o200k_base@tiktoken-0.8.0` tokenizerId string threaded through @opencodehub/pack is a PROVENANCE LABEL, not an enforced encoder. No tiktoken/anthropic tokenizer is installed anywhere in the repo; @chonkiejs/core's default 'character' tokenizer (1 char = 1 token) does the counting, with a len/4 degraded fallback. So any feature that needs "tokens saved / token cost" numbers cannot get model-accurate counts for free — it must either add a real tokenizer dep or compute an explicitly-labeled char heuristic and never present it as model tokens. +description: The `openai:o200k_base@tiktoken-0.8.0` tokenizerId string threaded through @opencodehub/pack is a PROVENANCE LABEL, not an enforced encoder — @chonkiejs/core's default 'character' tokenizer (1 char = 1 token) does the pack's counting. A feature needing real "tokens saved / token cost" numbers must add a real encoder; change-pack ships gpt-tokenizer (pure-JS, MIT, zero-dep) via the isolated `gpt-tokenizer/encoding/o200k_base` subpath for synchronous deterministic o200k_base counts, with a len/4 char heuristic only as a throw-fallback. Prefer pure-JS gpt-tokenizer over native/WASM tiktoken to honor the no-native-binding rail (ADR 0015). metadata: type: convention category: conventions @@ -38,21 +38,33 @@ Anthropic encoder ran. When a feature needs token counts (cost attribution, budget trimming, "tokens saved"): -1. **Do not assume model-accurate counts exist.** Reuse the existing `len/4` - char heuristic — it is zero-dep, byte-deterministic, and the same model the - pack already uses. -2. **Label the output an estimate.** `change-pack`'s `CostAttribution` carries - `estimate: true` + `tokenizerModel: "char-heuristic-v1"` so no caller mistakes - it for tiktoken counts. -3. **Compute the baseline auditably from the graph**, not from a borrowed - marketing percentage — e.g. sum char-heuristic tokens over every File node in - the impacted subgraph as the "agent reads each file blind" baseline. -4. If you genuinely need model-accurate counts, that's a real new dependency - (tiktoken / gpt-tokenizer) and an ADR — not a one-liner. +1. **The pin is not a counter — add a real encoder if you want real tokens.** + `change-pack` ships `gpt-tokenizer` (v3.4.0, MIT, **pure-JS, zero-dep**) and + counts via `import { encode } from "gpt-tokenizer/encoding/o200k_base"`. The + encoding subpath bundles its BPE ranks inline, so `encode` is **synchronous + and deterministic** — no async rank fetch to break byte-identity. +2. **Pick pure-JS over native/WASM `tiktoken`.** The headline package + `tiktoken` is a WASM/native binding; OCH's no-native-binding-at-the- + npm-distributed-boundary rail (ADR 0015) rules it out. `gpt-tokenizer` is the + rail-compatible way to get the SAME o200k_base counts. +3. **Keep a heuristic FALLBACK, not as the primary.** `countTokens` wraps the + encoder in try/catch and falls back to `max(1, ceil(len/4))` only on + pathological input that throws — so cost attribution never crashes the pack, + but the normal path is real model tokens. Record the basis: + `estimate: false`, `tokenizerModel: "openai/o200k_base"` (fold it into the + content hash so a tokenizer swap changes the hash). +4. **Compute the baseline auditably from the graph**, not from a borrowed + marketing percentage — sum real tokens over every File node in the impacted + subgraph as the "agent reads each file blind" baseline. ## Why this matters A cost feature that silently reports character counts as "tokens" is worse than no number — it reads as authoritative and is wrong by a model-specific factor. -Naming the heuristic in the output keeps the feature honest and lets a future -tokenizer upgrade swap the basis without changing the contract shape. +The honest options are (a) a clearly-labeled heuristic or (b) a real encoder. +`change-pack` started at (a) and shipped (b) the same session — the contract +shape (`estimate`/`tokenizerModel` fields) was built wide enough that swapping +the basis touched only the counter + the field values, never the structure. +When you do add a tokenizer, prefer a **pure-JS** one if the repo bans native +bindings, and import the **isolated encoding subpath** so counting stays +synchronous and deterministic. diff --git a/.erpaval/specs/007-change-pack/spec.md b/.erpaval/specs/007-change-pack/spec.md index fc20ad5c..3bf87fc7 100644 --- a/.erpaval/specs/007-change-pack/spec.md +++ b/.erpaval/specs/007-change-pack/spec.md @@ -36,7 +36,7 @@ Full detail in `.erpaval/sessions/session-6afa8d/{explore-detect-changes,explore - **U3**: change-pack MUST read the ingested graph only (via `runImpact`/`runDetectChanges`); it MUST NOT re-derive edges, MUST NOT mutate the graph, MUST NOT call any LLM. - **U4**: `bash scripts/check-banned-strings.sh` exits 0; `mise run check` exits 0 after every commit. - **U5**: All output collections MUST be deterministically ordered — nodes/tests by `id` asc, edges by `(from,type,to,step)`, files by path. No locale-dependent or insertion-order leakage into hashed bytes. -- **U6**: The cost-attribution block MUST self-label as an estimate (`estimate:true`, `tokenizer_model:"char-heuristic-v1"`). It MUST NOT present heuristic counts as model tokens. +- **U6**: The cost-attribution block MUST count real BPE tokens via OpenAI's `o200k_base` encoding (`gpt-tokenizer`, pure-JS/MIT, no native binding — respects ADR 0015) and record the basis in `tokenizer_model:"openai/o200k_base"` with `estimate:false`. The encoder MAY fall back to a `len/4` character heuristic ONLY on pathological input that throws (rare, still deterministic); it MUST NOT otherwise present heuristic counts as model tokens. (Superseded the v1 char-heuristic decision per operator request — "ship tiktoken instead"; chose pure-JS `gpt-tokenizer` over native/WASM `tiktoken` to honor the no-native-binding rail.) ## Event-driven requirements diff --git a/packages/analysis/package.json b/packages/analysis/package.json index 85c2672d..a9335c02 100644 --- a/packages/analysis/package.json +++ b/packages/analysis/package.json @@ -43,6 +43,7 @@ "@opencodehub/sarif": "workspace:*", "@opencodehub/storage": "workspace:*", "@opencodehub/wiki": "workspace:*", + "gpt-tokenizer": "^3.4.0", "write-file-atomic": "8.0.0" }, "devDependencies": { diff --git a/packages/analysis/src/change-pack-types.ts b/packages/analysis/src/change-pack-types.ts index 899e731f..ff003dcf 100644 --- a/packages/analysis/src/change-pack-types.ts +++ b/packages/analysis/src/change-pack-types.ts @@ -89,16 +89,19 @@ export interface AffectedTest { } /** - * Cost attribution for the change-pack. All token figures are estimates from - * a character heuristic, never model-tokenizer counts — `estimate` is always - * true and `tokenizerModel` self-labels the basis. + * Cost attribution for the change-pack. Token figures are real BPE counts from + * OpenAI's `o200k_base` encoding (the encoding modern OpenAI models use, and + * the one the pack's `tokenizerId` pin names). `tokenizerModel` records the + * basis; `estimate` is false because these are model tokens, not a heuristic. + * (The encoder falls back to a `len/4` character heuristic only on + * pathological input that throws — rare, and still deterministic.) */ export interface CostAttribution { - readonly estimate: true; - readonly tokenizerModel: "char-heuristic-v1"; - /** Heuristic tokens for the change-pack context body the agent consumes. */ + readonly estimate: boolean; + readonly tokenizerModel: string; + /** o200k_base tokens for the change-pack context body the agent consumes. */ readonly changePackTokens: number; - /** Heuristic tokens an agent would read by opening every impacted file blind. */ + /** o200k_base tokens an agent would read by opening every impacted file blind. */ readonly blindBaselineTokens: number; readonly tokensSaved: number; readonly tokensSavedPct: number; diff --git a/packages/analysis/src/change-pack.test.ts b/packages/analysis/src/change-pack.test.ts index 107f2ca6..bed98aeb 100644 --- a/packages/analysis/src/change-pack.test.ts +++ b/packages/analysis/src/change-pack.test.ts @@ -2,7 +2,12 @@ import assert from "node:assert/strict"; import { test } from "node:test"; import { canonicalJson } from "@opencodehub/core-types"; import type { IGraphStore } from "@opencodehub/storage"; -import { type ChangePackInternal, charHeuristicTokens, runChangePack } from "./change-pack.js"; +import { + type ChangePackInternal, + COST_TOKENIZER_MODEL, + countTokens, + runChangePack, +} from "./change-pack.js"; import { FakeStore } from "./test-utils.js"; import type { DetectChangesResult } from "./types.js"; import type { VerdictQuery, VerdictResponse } from "./verdict-types.js"; @@ -305,12 +310,15 @@ test("runChangePack: affected tests sorted by (filePath, id), deduped by id", as // Cost attribution // --------------------------------------------------------------------------- -test("charHeuristicTokens: max(1, ceil(len/4))", () => { - assert.equal(charHeuristicTokens(""), 1); - assert.equal(charHeuristicTokens("abc"), 1); - assert.equal(charHeuristicTokens("abcd"), 1); - assert.equal(charHeuristicTokens("abcde"), 2); - assert.equal(charHeuristicTokens("a".repeat(40)), 10); +test("countTokens: real o200k_base counts, empty string → 0", () => { + // Empty is the one fixed point we can assert by value; otherwise we assert + // against the encoder itself (self-consistency) rather than magic numbers, + // and that token counts are <= char length (BPE merges chars into tokens). + assert.equal(countTokens(""), 0); + assert.ok(countTokens("hello world") > 0); + assert.ok(countTokens("hello world") <= "hello world".length); + // Stable across calls — deterministic encoder, no async rank load. + assert.equal(countTokens("function foo() {}"), countTokens("function foo() {}")); }); test("runChangePack: cost attribution computes baseline, savings, pct, ci skip", async () => { @@ -329,10 +337,11 @@ test("runChangePack: cost attribution computes baseline, savings, pct, ci skip", ); const cost = pack.costAttribution; - assert.equal(cost.estimate, true); - assert.equal(cost.tokenizerModel, "char-heuristic-v1"); - // Baseline = sum over the two impacted files (src/b.ts, src/c.ts). - const expectedBaseline = charHeuristicTokens(bigBody) * 2; + // Real model tokens now — not an estimate. + assert.equal(cost.estimate, false); + assert.equal(cost.tokenizerModel, COST_TOKENIZER_MODEL); + // Baseline = real token count summed over the two impacted files. + const expectedBaseline = countTokens(bigBody) * 2; assert.equal(cost.blindBaselineTokens, expectedBaseline); assert.ok(cost.changePackTokens > 0); assert.equal(cost.tokensSaved, Math.max(0, cost.blindBaselineTokens - cost.changePackTokens)); @@ -355,7 +364,7 @@ test("runChangePack: unreadable impacted file is skipped without breaking the ba { repoPath: "/repo" }, makeInternal(detect(FOO_CHANGE), files), ); - assert.equal(pack.costAttribution.blindBaselineTokens, charHeuristicTokens("y".repeat(400))); + assert.equal(pack.costAttribution.blindBaselineTokens, countTokens("y".repeat(400))); }); // --------------------------------------------------------------------------- diff --git a/packages/analysis/src/change-pack.ts b/packages/analysis/src/change-pack.ts index 6eabc1c0..898b2301 100644 --- a/packages/analysis/src/change-pack.ts +++ b/packages/analysis/src/change-pack.ts @@ -17,6 +17,12 @@ import { readFile } from "node:fs/promises"; import path from "node:path"; import { canonicalJson, sha256Hex } from "@opencodehub/core-types"; import type { IGraphStore } from "@opencodehub/storage"; +// Real BPE token counts via the o200k_base encoding (the encoding the +// `tokenizerId` pin already names). `gpt-tokenizer` is pure-JS + zero-dep +// (MIT) — no native binding, so it respects the WASM-only / no-native-binding +// rail (ADR 0015). The encoding subpath bundles its BPE ranks inline, so +// `encode` is synchronous and deterministic (no async rank fetch). +import { encode as encodeO200k } from "gpt-tokenizer/encoding/o200k_base"; import type { AffectedTest, ChangedSymbol, @@ -65,12 +71,25 @@ export interface ChangePackInternal { readonly computeVerdict?: (store: IGraphStore, q: VerdictQuery) => Promise; } +/** The model whose tokenizer backs the cost counts. Folded into the hash. */ +export const COST_TOKENIZER_MODEL = "openai/o200k_base"; + /** - * Character-count token heuristic. Matches the pack's degraded counter - * (`max(1, ceil(len / 4))`). This is an estimate, not a model tokenizer. + * Count tokens with OpenAI's `o200k_base` BPE encoding — the encoding modern + * models (gpt-4o, gpt-4.1, o-series) use, and the one the `tokenizerId` pin + * already names. These are real model tokens, not a heuristic. + * + * Falls back to the `max(1, ceil(len / 4))` character heuristic only if the + * encoder throws on pathological input, so cost attribution never crashes the + * pack; the fallback is rare and still deterministic. */ -export function charHeuristicTokens(text: string): number { - return Math.max(1, Math.ceil(text.length / 4)); +export function countTokens(text: string): number { + if (text.length === 0) return 0; + try { + return encodeO200k(text).length; + } catch { + return Math.max(1, Math.ceil(text.length / 4)); + } } /** Resolved query envelope, folded into the hash so identical inputs hash alike. */ @@ -353,7 +372,7 @@ interface CostAttributionInput { async function computeCostAttribution(input: CostAttributionInput): Promise { const { store, repoPath, impactedSubgraph, affectedTests, body, readFileText } = input; - const changePackTokens = charHeuristicTokens(canonicalJson(body)); + const changePackTokens = countTokens(canonicalJson(body)); // Blind baseline: the cost an agent pays by reading every impacted file // whole. Distinct File paths in the subgraph; read each once from disk. @@ -373,7 +392,7 @@ async function computeCostAttribution(input: CostAttributionInput): Promise = {}): }, ], costAttribution: { - estimate: true, - tokenizerModel: "char-heuristic-v1", + estimate: false, + tokenizerModel: "openai/o200k_base", changePackTokens: 120, blindBaselineTokens: 480, tokensSaved: 360, @@ -176,7 +177,7 @@ test("runChangePackCmd --json emits raw camelCase ChangePack, exit = verdict.exi const fixture = packFixture(2); const parsed = JSON.parse(output) as ChangePack; assert.equal(parsed.changePackHash, "deadbeef".repeat(8)); - assert.equal(parsed.costAttribution.estimate, true); + assert.equal(parsed.costAttribution.estimate, false); assert.equal(parsed.verdict.verdict, "expert_review"); // CLI<->MCP parity (CLI half): `--json` is a PURE passthrough — the emitted // object deep-equals the analysis ChangePack with zero reshaping. The MCP @@ -211,7 +212,7 @@ test("runChangePackCmd default (no --json) → human summary, exit = verdict.exi assert.match(output, /• itFoo — src\/foo\.test\.ts/); assert.match( output, - /Est\. tokens saved: 360 \(75%\) vs blind read; CI tests skippable: 3\/4 \(est\.\)/, + /Tokens saved: 360 \(75%\) vs blind read \[openai\/o200k_base\]; CI tests skippable: 3\/4/, ); // Summary mode is not JSON. assert.doesNotMatch(output, /"changePackHash"/); diff --git a/packages/cli/src/commands/change-pack.ts b/packages/cli/src/commands/change-pack.ts index 676d91b8..84d271e8 100644 --- a/packages/cli/src/commands/change-pack.ts +++ b/packages/cli/src/commands/change-pack.ts @@ -96,7 +96,7 @@ export async function runChangePackCmd(opts: ChangePackOptions = {}): Promise { assert.equal(toStructured(FIXTURE)["change_pack_hash"], FIXTURE.changePackHash); }); -test("cost attribution stays an estimate with values intact through MCP", () => { +test("cost attribution carries real o200k token values intact through MCP", () => { const cost = toStructured(FIXTURE)["cost_attribution"] as Record; - assert.equal(cost["estimate"], true); - assert.equal(cost["tokenizer_model"], "char-heuristic-v1"); + assert.equal(cost["estimate"], false); + assert.equal(cost["tokenizer_model"], "openai/o200k_base"); assert.equal(cost["tokens_saved"], FIXTURE.costAttribution.tokensSaved); assert.equal(cost["ci_tests_skipped"], FIXTURE.costAttribution.ciTestsSkipped); }); diff --git a/packages/mcp/src/tools/change-pack.test.ts b/packages/mcp/src/tools/change-pack.test.ts index 1b36dac3..6a1a39c8 100644 --- a/packages/mcp/src/tools/change-pack.test.ts +++ b/packages/mcp/src/tools/change-pack.test.ts @@ -80,15 +80,15 @@ test("change_pack returns the snake_case structuredContent envelope", async () = }); }); -test("change_pack cost attribution is always a char-heuristic estimate", async () => { +test("change_pack cost attribution reports real o200k_base token counts", async () => { await withHarness(async (ctx, server) => { registerChangePackTool(server, ctx); const handler = getToolHandler(server, "change_pack"); const result = await handler({ repo: "fakerepo" }, {}); const sc = result.structuredContent as unknown as StructuredChangePack; - assert.equal(sc.cost_attribution.estimate, true); - assert.equal(sc.cost_attribution.tokenizer_model, "char-heuristic-v1"); + assert.equal(sc.cost_attribution.estimate, false); + assert.equal(sc.cost_attribution.tokenizer_model, "openai/o200k_base"); assert.equal(typeof sc.cost_attribution.change_pack_tokens, "number"); assert.equal(typeof sc.cost_attribution.blind_baseline_tokens, "number"); assert.equal(typeof sc.cost_attribution.tokens_saved, "number"); @@ -146,6 +146,6 @@ test("change_pack threads optional knobs through without error", async () => { ); assert.equal(result.isError, undefined); const sc = result.structuredContent as unknown as StructuredChangePack; - assert.equal(sc.cost_attribution.estimate, true); + assert.equal(sc.cost_attribution.estimate, false); }); }); diff --git a/packages/mcp/src/tools/change-pack.ts b/packages/mcp/src/tools/change-pack.ts index a255a4dc..fd981b3d 100644 --- a/packages/mcp/src/tools/change-pack.ts +++ b/packages/mcp/src/tools/change-pack.ts @@ -115,7 +115,7 @@ function renderText(pack: ChangePack): string { `Affected tests: ${pack.costAttribution.affectedTestCount} of ${pack.costAttribution.totalTestCount}; CI tests skippable: ${pack.costAttribution.ciTestsSkipped}.`, ); lines.push( - `Tokens saved (est.): ${pack.costAttribution.tokensSaved} (${pack.costAttribution.tokensSavedPct}% vs. blind baseline).`, + `Tokens saved: ${pack.costAttribution.tokensSaved} (${pack.costAttribution.tokensSavedPct}% vs. blind baseline, ${pack.costAttribution.tokenizerModel}).`, ); return lines.join("\n"); } @@ -159,7 +159,7 @@ export function registerChangePackTool(server: McpServer, ctx: ToolContext): voi { title: "Diff-scoped change-pack", description: - "Deterministic, diff-scoped context pack for a git range: the impacted upstream subgraph (retained, not collapsed), the 5-tier verdict, the affected tests, and a char-heuristic cost estimate (tokens saved vs. opening every impacted file blind). CI-oriented — read-only, no LLM, byte-deterministic with a content hash.", + "Deterministic, diff-scoped context pack for a git range: the impacted upstream subgraph (retained, not collapsed), the 5-tier verdict, the affected tests, and o200k_base token-cost attribution (tokens saved vs. opening every impacted file blind). CI-oriented — read-only, no LLM, byte-deterministic with a content hash.", inputSchema: ChangePackInput, annotations: { readOnlyHint: true, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3cb09f58..5d51b813 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -80,6 +80,9 @@ importers: '@opencodehub/wiki': specifier: workspace:* version: link:../wiki + gpt-tokenizer: + specifier: ^3.4.0 + version: 3.4.0 write-file-atomic: specifier: 8.0.0 version: 8.0.0 @@ -3801,6 +3804,9 @@ packages: resolution: {integrity: sha512-6tfZ91bOr7bOXnK7PRDCGBLa1H4U080YHNaAQ2KsMGlLEzRbk44nsZF2E1IeRc3vtJHPVbKCYgdFbaGO2ljd8g==} engines: {node: '>=10.19.0'} + gpt-tokenizer@3.4.0: + resolution: {integrity: sha512-wxFLnhIXTDjYebd9A9pGl3e31ZpSypbpIJSOswbgop5jLte/AsZVDvjlbEuVFlsqZixVKqbcoNmRlFDf6pz/UQ==} + graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} @@ -9619,6 +9625,8 @@ snapshots: p-cancelable: 2.1.1 responselike: 2.0.1 + gpt-tokenizer@3.4.0: {} + graceful-fs@4.2.11: {} grapheme-splitter@1.0.4: {} @@ -12572,6 +12580,7 @@ time: commitizen@4.3.2: '2026-06-12T12:50:44.903Z' cz-conventional-changelog@3.3.0: '2020-08-26T18:43:16.534Z' fast-xml-parser@5.8.0: '2026-05-12T03:39:29.203Z' + gpt-tokenizer@3.4.0: '2025-11-07T20:15:06.227Z' graphology-dag@0.4.1: '2023-12-09T08:29:05.655Z' graphology@0.26.0: '2025-01-26T10:25:05.589Z' lefthook@2.1.9: '2026-05-29T08:40:26.517Z' From 3fee4efa8417765fe86932b9aefacb81ed398ebd Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 18:06:09 +0000 Subject: [PATCH 6/7] fix(cli): declare gpt-tokenizer as a CLI runtime dep The CLI bundles `@opencodehub/*` source (noExternal) but keeps third-party deps external, resolving them from the CLI's own node_modules at runtime. The change-pack o200k counter added `gpt-tokenizer` to @opencodehub/analysis but not to the CLI, so `node dist/index.js analyze` threw "Cannot find package 'gpt-tokenizer'" in CI (the self-scan + every fresh-install matrix leg) while passing locally via the pnpm workspace hoist. Re-declare it on the CLI like the other analysis third-party deps (@iarna/toml etc.). Per the tsup-collapse-monorepo lesson: verify from the built dist, not the hot workspace. --- packages/cli/package.json | 1 + pnpm-lock.yaml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/packages/cli/package.json b/packages/cli/package.json index 090f7029..b53e0e6e 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -56,6 +56,7 @@ "cli-table3": "0.6.5", "commander": "15.0.0", "fast-xml-parser": "5.8.0", + "gpt-tokenizer": "^3.4.0", "listr2": "10.2.1", "lru-cache": "11.5.1", "piscina": "5.2.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5d51b813..9efdf078 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -144,6 +144,9 @@ importers: fast-xml-parser: specifier: 5.8.0 version: 5.8.0 + gpt-tokenizer: + specifier: ^3.4.0 + version: 3.4.0 listr2: specifier: 10.2.1 version: 10.2.1 From a23fa76ac6a5d727117795e8072244d69320103f Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon Date: Sun, 14 Jun 2026 18:13:12 +0000 Subject: [PATCH 7/7] test(analysis): make change-pack file-reader seam path-separator-agnostic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cost-attribution tests key an in-memory file map with POSIX paths, but runChangePack joins repoPath + relPath with the OS separator — so on Windows the lookup arrived backslash-separated, missed the fixture, and the blind baseline computed as 0 (two failing assertions on windows-latest). Normalize the seam's lookup key to forward slashes so the fake reader matches on every platform. Production fs reads are unaffected (real paths use the right separator); this is a test-fixture fix only. --- packages/analysis/src/change-pack.test.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/packages/analysis/src/change-pack.test.ts b/packages/analysis/src/change-pack.test.ts index bed98aeb..b8efc2a8 100644 --- a/packages/analysis/src/change-pack.test.ts +++ b/packages/analysis/src/change-pack.test.ts @@ -46,7 +46,12 @@ function makeInternal( detectChanges: () => Promise.resolve(changes), computeVerdict: (_store: IGraphStore, _q: VerdictQuery) => Promise.resolve(verdict), readFileText: (absPath: string) => { - const v = files[absPath]; + // The production code joins repoPath + relPath with the OS separator, so + // on Windows the lookup arrives backslash-separated. The fixture keys are + // POSIX; normalize both to forward slashes so the in-memory reader is + // path-separator-agnostic across platforms. + const key = absPath.replace(/\\/g, "/"); + const v = files[key] ?? files[absPath]; if (v === undefined) return Promise.reject(new Error(`ENOENT: ${absPath}`)); return Promise.resolve(v); },