diff --git a/src/vouch/capabilities.py b/src/vouch/capabilities.py index 860bf08..5425bc2 100644 --- a/src/vouch/capabilities.py +++ b/src/vouch/capabilities.py @@ -72,6 +72,7 @@ "kb.provenance_rebuild", "kb.detect_themes", "kb.propose_theme", + "kb.reconcile_backlinks", ] diff --git a/src/vouch/cli.py b/src/vouch/cli.py index 59d5ef9..a40685d 100644 --- a/src/vouch/cli.py +++ b/src/vouch/cli.py @@ -2605,6 +2605,52 @@ def detect_themes_cmd( ) +# --- backlink reconciliation (#307) ---------------------------------------- + + +@cli.command("reconcile-backlinks") +@click.option( + "--rel-types", default=None, + help="Comma-separated relation types to scan " + "(default: every type with a configured inverse).", +) +@click.option("--limit", default=50, show_default=True, type=int, + help="Max proposals to file in one run.") +@click.option("--dry-run", is_flag=True, help="Report the would-propose set; write nothing.") +@click.option("--json", "as_json", is_flag=True, help="Emit JSON instead of text.") +def reconcile_backlinks_cmd( + rel_types: str | None, limit: int, dry_run: bool, as_json: bool, +) -> None: + """Propose missing reverse relations across the graph (#307).""" + types = [t.strip() for t in rel_types.split(",") if t.strip()] if rel_types else None + store = _load_store() + with _cli_errors(): + result = life.reconcile_backlinks( + store, rel_types=types, limit=limit, dry_run=dry_run, + ) + if as_json: + _emit_json({ + "checked": result.checked, + "proposed": [p.id for p in result.proposed], + "skipped_unmapped": result.skipped_unmapped, + "skipped_existing": result.skipped_existing, + "dry_run": result.dry_run, + }) + return + if not result.proposed: + click.echo("no missing backlinks found") + return + verb = "would propose" if dry_run else "proposed" + click.echo(f"{verb} {len(result.proposed)} backlink proposal(s)") + for pr in result.proposed: + click.echo( + f" {pr.id} {pr.payload['source']} " + f"--{pr.payload['relation']}--> {pr.payload['target']}" + ) + if dry_run: + click.echo("rerun without --dry-run to file these proposals") + + # --- export / import ------------------------------------------------------ diff --git a/src/vouch/jsonl_server.py b/src/vouch/jsonl_server.py index 5505995..9c5e17c 100644 --- a/src/vouch/jsonl_server.py +++ b/src/vouch/jsonl_server.py @@ -693,6 +693,22 @@ def _h_propose_theme(p: dict) -> dict: return themes.propose_theme(store, cluster, proposed_by=actor) +def _h_reconcile_backlinks(p: dict) -> dict: + result = life.reconcile_backlinks( + _store(), + rel_types=p.get("rel_types"), + limit=int(p.get("limit", 50)), + dry_run=bool(p.get("dry_run", False)), + ) + return { + "checked": result.checked, + "proposed": [pr.id for pr in result.proposed], + "skipped_unmapped": result.skipped_unmapped, + "skipped_existing": result.skipped_existing, + "dry_run": result.dry_run, + } + + HANDLERS: dict[str, Callable[[dict], Any]] = { "kb.capabilities": _h_capabilities, "kb.status": _h_status, @@ -751,6 +767,7 @@ def _h_propose_theme(p: dict) -> dict: "kb.provenance_rebuild": _h_provenance_rebuild, "kb.detect_themes": _h_detect_themes, "kb.propose_theme": _h_propose_theme, + "kb.reconcile_backlinks": _h_reconcile_backlinks, } diff --git a/src/vouch/lifecycle.py b/src/vouch/lifecycle.py index fadbb07..14ba8fa 100644 --- a/src/vouch/lifecycle.py +++ b/src/vouch/lifecycle.py @@ -7,14 +7,23 @@ If you want stricter review on lifecycle changes, gate the CLI commands behind a config flag rather than refactoring this module. + +`reconcile_backlinks` at the bottom is the one exception: it's a +read-then-*propose* pass over the relation graph, not a direct mutation — +every gap it finds lands as a pending `Proposal`, same as `propose_relation` +itself, and requires a human `kb.approve` like any other write. """ from __future__ import annotations +from dataclasses import dataclass, field from datetime import UTC, datetime +import yaml + from . import audit -from .models import Claim, ClaimStatus, Evidence, Relation, RelationType +from .models import Claim, ClaimStatus, Evidence, Proposal, Relation, RelationType +from .proposals import ProposalError, propose_relation from .storage import ArtifactNotFoundError, KBStore @@ -154,3 +163,146 @@ def cite(store: KBStore, claim_id: str) -> list[Evidence | dict]: except ArtifactNotFoundError: out.append({"kind": "missing", "ref": ref}) return out + + +# --- backlink reconciliation (#307) ---------------------------------------- + +# Only pairs with an unambiguous natural inverse are mapped by default. +# `owned_by` and the other purely-directed types (uses, supports, caused_by, +# derived_from, implements, references, mentions, supersedes) have no +# corresponding "reverse" RelationType value today, so — per the "unmapped +# types are skipped rather than guessed" rule — they're left out rather than +# invented. A KB can extend or override this via `.vouch/config.yaml`. + +# Mirrors extractors/edges.py's AUTO_EXTRACTOR_ACTOR: an automated pass is +# attributed to a fixed bot identity, not whichever human or agent happened +# to invoke it, so proposals it files can be told apart (and, if a bulk +# reject like `reject_auto_extracted` is ever added for these, filtered on). +RECONCILE_ACTOR = "reconcile" + +_DEFAULT_BACKLINK_INVERSE_MAP: dict[str, str] = { + RelationType.DEPENDS_ON.value: RelationType.BLOCKS.value, + RelationType.BLOCKS.value: RelationType.DEPENDS_ON.value, + RelationType.SIMILAR_TO.value: RelationType.SIMILAR_TO.value, + RelationType.RELATES_TO.value: RelationType.RELATES_TO.value, + RelationType.CONTRADICTS.value: RelationType.CONTRADICTS.value, +} + + +def _load_backlink_inverse_map(store: KBStore) -> dict[str, str]: + """Read the relation-type inverse map from config, with defensive defaults. + + Mirrors the `themes._load_theme_config` pattern: every value is + type-checked and falls back to the default map rather than crashing on + malformed input. Declared in `.vouch/config.yaml` as: + + backlinks: + inverse_map: + depends_on: blocks + blocks: depends_on + + A relation type absent from the resulting map has no defined mirror and + is skipped by `reconcile_backlinks`, not guessed. + """ + try: + raw = yaml.safe_load(store.config_path.read_text(encoding="utf-8")) + cfg = raw if isinstance(raw, dict) else {} + except (OSError, yaml.YAMLError): + cfg = {} + backlinks_cfg = cfg.get("backlinks") if isinstance(cfg, dict) else None + if not isinstance(backlinks_cfg, dict): + return dict(_DEFAULT_BACKLINK_INVERSE_MAP) + inverse_map = backlinks_cfg.get("inverse_map") + if not isinstance(inverse_map, dict) or not inverse_map: + return dict(_DEFAULT_BACKLINK_INVERSE_MAP) + cleaned = { + k: v for k, v in inverse_map.items() + if isinstance(k, str) and isinstance(v, str) + } + return cleaned or dict(_DEFAULT_BACKLINK_INVERSE_MAP) + + +@dataclass +class ReconcileResult: + """Outcome of `reconcile_backlinks`.""" + + checked: int = 0 + proposed: list[Proposal] = field(default_factory=list) + skipped_unmapped: int = 0 + skipped_existing: int = 0 + dry_run: bool = False + + +def reconcile_backlinks( + store: KBStore, + *, + rel_types: list[str] | None = None, + limit: int = 50, + dry_run: bool = False, + proposed_by: str = RECONCILE_ACTOR, +) -> ReconcileResult: + """Propose missing reverse relations across the graph (#307). + + A read-then-propose pass: for every existing `Relation` whose type has + a configured inverse (`_load_backlink_inverse_map`), checks whether the + mirror edge (`target --mirror--> source`) already exists and, if not, + files one `propose_relation` proposal for it — never writes an approved + edge directly. Relation types absent from the inverse map are skipped + rather than guessed. `rel_types`, if given, restricts which *original* + edges are scanned (by their own type, before mirroring). `limit` bounds + how many proposals a single run files; `dry_run` is threaded straight + into `propose_relation`, which still validates and returns a real + `Proposal` but writes nothing to disk. + + Note on the graph read: the issue that requested this (#307) suggested + reading via `kb.graph_export`. That method renders the unrelated + *provenance* DAG (claim citations, supersedes, approvedBy — see + `provenance.py`) as a dot/mermaid string and never touches `Relation` + objects. The actual full relation edge set is `store.list_relations()`, + and `store.relations_from()` is the existence check — the same + underlying data `kb.neighbors` merges in, minus its extra structural + (non-Relation) edges that would otherwise risk a false "already exists" + match. This function reads that store surface directly instead. + """ + inverse_map = _load_backlink_inverse_map(store) + relations = store.list_relations() + if rel_types: + wanted = set(rel_types) + relations = [r for r in relations if r.relation.value in wanted] + + result = ReconcileResult(dry_run=dry_run) + seen: set[tuple[str, str, str]] = set() + for rel in relations: + if len(result.proposed) >= limit: + break + mirror_type = inverse_map.get(rel.relation.value) + if mirror_type is None: + result.skipped_unmapped += 1 + continue + result.checked += 1 + mirror_src, mirror_target = rel.target, rel.source + key = (mirror_src, mirror_type, mirror_target) + if key in seen: + continue + already_mirrored = any( + r.target == mirror_target and r.relation.value == mirror_type + for r in store.relations_from(mirror_src) + ) + if already_mirrored: + result.skipped_existing += 1 + continue + seen.add(key) + try: + pr = propose_relation( + store, + src=mirror_src, + relation=mirror_type, + target=mirror_target, + proposed_by=proposed_by, + rationale=f"backlink for {rel.id}", + dry_run=dry_run, + ) + except ProposalError: + continue + result.proposed.append(pr) + return result diff --git a/src/vouch/server.py b/src/vouch/server.py index 6095f00..fe8fba6 100644 --- a/src/vouch/server.py +++ b/src/vouch/server.py @@ -973,6 +973,35 @@ def kb_propose_theme( return themes.propose_theme(store, cluster, proposed_by=actor) +@mcp.tool() +def kb_reconcile_backlinks( + rel_types: list[str] | None = None, + limit: int = 50, + dry_run: bool = False, +) -> dict[str, Any]: + """Propose missing reverse relations across the graph (#307). + + Read-then-propose: scans existing relations for a configured mirror + that's missing at the target and files one `kb.propose_relation` per + gap. Never writes an approved edge directly — pending human approval + via `vouch approve `, like any other proposal. `dry_run` reports + the would-propose set without writing anything. + """ + try: + result = life.reconcile_backlinks( + _store(), rel_types=rel_types, limit=limit, dry_run=dry_run, + ) + except (ArtifactNotFoundError, ValueError, ProposalError) as e: + raise ValueError(str(e)) from e + return { + "checked": result.checked, + "proposed": [p.id for p in result.proposed], + "skipped_unmapped": result.skipped_unmapped, + "skipped_existing": result.skipped_existing, + "dry_run": result.dry_run, + } + + def _current_model_name() -> str: try: from .embeddings import get_embedder diff --git a/tests/test_reconcile_backlinks.py b/tests/test_reconcile_backlinks.py new file mode 100644 index 0000000..500def7 --- /dev/null +++ b/tests/test_reconcile_backlinks.py @@ -0,0 +1,174 @@ +"""Backlink reconciliation — propose missing reverse relations (issue #307).""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + +from vouch.lifecycle import reconcile_backlinks +from vouch.models import Entity, EntityType, ProposalKind, ProposalStatus, RelationType +from vouch.proposals import approve, propose_relation +from vouch.storage import KBStore + + +@pytest.fixture +def store(tmp_path: Path) -> KBStore: + return KBStore.init(tmp_path) + + +def _entity(store: KBStore, id_: str) -> Entity: + e = Entity(id=id_, name=id_, type=EntityType.CONCEPT) + store.put_entity(e) + return e + + +def _approved_relation(store: KBStore, *, src: str, relation: str, target: str): + pr = propose_relation(store, src=src, relation=relation, target=target, proposed_by="seed") + return approve(store, pr.id, approved_by="reviewer") + + +def _pending_relation_proposals(store: KBStore) -> list: + pending = store.list_proposals(ProposalStatus.PENDING) + return [p for p in pending if p.kind == ProposalKind.RELATION] + + +def test_directed_gap_is_proposed(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + rel = _approved_relation(store, src="a", relation=RelationType.DEPENDS_ON.value, target="b") + + result = reconcile_backlinks(store) + + assert result.checked == 1 + assert len(result.proposed) == 1 + pr = result.proposed[0] + assert pr.payload["source"] == "b" + assert pr.payload["relation"] == RelationType.BLOCKS.value + assert pr.payload["target"] == "a" + assert pr.proposed_by == "reconcile" + assert rel.id in (pr.rationale or "") + # It's a real pending proposal on disk, not just an in-memory result. + pending = _pending_relation_proposals(store) + assert len(pending) == 1 + assert pending[0].id == pr.id + + +def test_already_mirrored_edge_is_skipped(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + _approved_relation(store, src="a", relation=RelationType.DEPENDS_ON.value, target="b") + _approved_relation(store, src="b", relation=RelationType.BLOCKS.value, target="a") + + result = reconcile_backlinks(store) + + assert result.proposed == [] + assert result.skipped_existing == 2 + assert _pending_relation_proposals(store) == [] + + +def test_symmetric_type_proposes_missing_mirror(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + _approved_relation(store, src="a", relation=RelationType.SIMILAR_TO.value, target="b") + + result = reconcile_backlinks(store) + + assert len(result.proposed) == 1 + pr = result.proposed[0] + assert pr.payload["source"] == "b" + assert pr.payload["relation"] == RelationType.SIMILAR_TO.value + assert pr.payload["target"] == "a" + + +def test_symmetric_type_with_existing_mirror_is_not_duplicated(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + _approved_relation(store, src="a", relation=RelationType.CONTRADICTS.value, target="b") + _approved_relation(store, src="b", relation=RelationType.CONTRADICTS.value, target="a") + + result = reconcile_backlinks(store) + + assert result.proposed == [] + assert result.skipped_existing == 2 + + +def test_unmapped_relation_type_is_skipped(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + # `uses` has no default inverse — it must be skipped, not guessed. + _approved_relation(store, src="a", relation=RelationType.USES.value, target="b") + + result = reconcile_backlinks(store) + + assert result.proposed == [] + assert result.checked == 0 + assert result.skipped_unmapped == 1 + + +def test_dry_run_reports_without_writing(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + _approved_relation(store, src="a", relation=RelationType.DEPENDS_ON.value, target="b") + + result = reconcile_backlinks(store, dry_run=True) + + assert len(result.proposed) == 1 + assert result.dry_run is True + # Reports the would-propose set, but nothing actually lands on disk. + assert _pending_relation_proposals(store) == [] + + +def test_limit_bounds_proposals_per_run(store: KBStore) -> None: + _entity(store, "hub") + for i in range(5): + node = f"leaf{i}" + _entity(store, node) + _approved_relation(store, src="hub", relation=RelationType.DEPENDS_ON.value, target=node) + + result = reconcile_backlinks(store, limit=2) + + assert len(result.proposed) == 2 + + +def test_rel_types_filters_scanned_edges(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + _entity(store, "c") + _approved_relation(store, src="a", relation=RelationType.DEPENDS_ON.value, target="b") + _approved_relation(store, src="a", relation=RelationType.SIMILAR_TO.value, target="c") + + result = reconcile_backlinks(store, rel_types=[RelationType.SIMILAR_TO.value]) + + assert result.checked == 1 + assert len(result.proposed) == 1 + assert result.proposed[0].payload["relation"] == RelationType.SIMILAR_TO.value + + +def test_custom_inverse_map_from_config(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + store.config_path.write_text( + yaml.safe_dump({"backlinks": {"inverse_map": {"uses": "uses"}}}), + encoding="utf-8", + ) + _approved_relation(store, src="a", relation=RelationType.USES.value, target="b") + + result = reconcile_backlinks(store) + + assert len(result.proposed) == 1 + assert result.proposed[0].payload["relation"] == RelationType.USES.value + assert result.proposed[0].payload["target"] == "a" + + +def test_malformed_config_falls_back_to_default_map(store: KBStore) -> None: + _entity(store, "a") + _entity(store, "b") + store.config_path.write_text("backlinks: not-a-mapping\n", encoding="utf-8") + _approved_relation(store, src="a", relation=RelationType.DEPENDS_ON.value, target="b") + + result = reconcile_backlinks(store) + + assert len(result.proposed) == 1 + assert result.proposed[0].payload["relation"] == RelationType.BLOCKS.value