Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/vouch/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"kb.provenance_rebuild",
"kb.detect_themes",
"kb.propose_theme",
"kb.reconcile_backlinks",
]


Expand Down
46 changes: 46 additions & 0 deletions src/vouch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2605,6 +2605,52 @@ def detect_themes_cmd(
)


# --- backlink reconciliation (#307) ----------------------------------------


@cli.command("reconcile-backlinks")
@click.option(
"--rel-types", default=None,
help="Comma-separated relation types to scan "
"(default: every type with a configured inverse).",
)
@click.option("--limit", default=50, show_default=True, type=int,
help="Max proposals to file in one run.")
@click.option("--dry-run", is_flag=True, help="Report the would-propose set; write nothing.")
@click.option("--json", "as_json", is_flag=True, help="Emit JSON instead of text.")
def reconcile_backlinks_cmd(
rel_types: str | None, limit: int, dry_run: bool, as_json: bool,
) -> None:
"""Propose missing reverse relations across the graph (#307)."""
types = [t.strip() for t in rel_types.split(",") if t.strip()] if rel_types else None
store = _load_store()
with _cli_errors():
result = life.reconcile_backlinks(
store, rel_types=types, limit=limit, dry_run=dry_run,
)
if as_json:
_emit_json({
"checked": result.checked,
"proposed": [p.id for p in result.proposed],
"skipped_unmapped": result.skipped_unmapped,
"skipped_existing": result.skipped_existing,
"dry_run": result.dry_run,
})
return
if not result.proposed:
click.echo("no missing backlinks found")
return
verb = "would propose" if dry_run else "proposed"
click.echo(f"{verb} {len(result.proposed)} backlink proposal(s)")
for pr in result.proposed:
click.echo(
f" {pr.id} {pr.payload['source']} "
f"--{pr.payload['relation']}--> {pr.payload['target']}"
)
if dry_run:
click.echo("rerun without --dry-run to file these proposals")


# --- export / import ------------------------------------------------------


Expand Down
17 changes: 17 additions & 0 deletions src/vouch/jsonl_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,22 @@ def _h_propose_theme(p: dict) -> dict:
return themes.propose_theme(store, cluster, proposed_by=actor)


def _h_reconcile_backlinks(p: dict) -> dict:
result = life.reconcile_backlinks(
_store(),
rel_types=p.get("rel_types"),
limit=int(p.get("limit", 50)),
dry_run=bool(p.get("dry_run", False)),
)
return {
"checked": result.checked,
"proposed": [pr.id for pr in result.proposed],
"skipped_unmapped": result.skipped_unmapped,
"skipped_existing": result.skipped_existing,
"dry_run": result.dry_run,
}


HANDLERS: dict[str, Callable[[dict], Any]] = {
"kb.capabilities": _h_capabilities,
"kb.status": _h_status,
Expand Down Expand Up @@ -751,6 +767,7 @@ def _h_propose_theme(p: dict) -> dict:
"kb.provenance_rebuild": _h_provenance_rebuild,
"kb.detect_themes": _h_detect_themes,
"kb.propose_theme": _h_propose_theme,
"kb.reconcile_backlinks": _h_reconcile_backlinks,
}


Expand Down
154 changes: 153 additions & 1 deletion src/vouch/lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,23 @@

If you want stricter review on lifecycle changes, gate the CLI commands
behind a config flag rather than refactoring this module.

`reconcile_backlinks` at the bottom is the one exception: it's a
read-then-*propose* pass over the relation graph, not a direct mutation —
every gap it finds lands as a pending `Proposal`, same as `propose_relation`
itself, and requires a human `kb.approve` like any other write.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from datetime import UTC, datetime

import yaml

from . import audit
from .models import Claim, ClaimStatus, Evidence, Relation, RelationType
from .models import Claim, ClaimStatus, Evidence, Proposal, Relation, RelationType
from .proposals import ProposalError, propose_relation
from .storage import ArtifactNotFoundError, KBStore


Expand Down Expand Up @@ -154,3 +163,146 @@ def cite(store: KBStore, claim_id: str) -> list[Evidence | dict]:
except ArtifactNotFoundError:
out.append({"kind": "missing", "ref": ref})
return out


# --- backlink reconciliation (#307) ----------------------------------------

# Only pairs with an unambiguous natural inverse are mapped by default.
# `owned_by` and the other purely-directed types (uses, supports, caused_by,
# derived_from, implements, references, mentions, supersedes) have no
# corresponding "reverse" RelationType value today, so — per the "unmapped
# types are skipped rather than guessed" rule — they're left out rather than
# invented. A KB can extend or override this via `.vouch/config.yaml`.

# Mirrors extractors/edges.py's AUTO_EXTRACTOR_ACTOR: an automated pass is
# attributed to a fixed bot identity, not whichever human or agent happened
# to invoke it, so proposals it files can be told apart (and, if a bulk
# reject like `reject_auto_extracted` is ever added for these, filtered on).
RECONCILE_ACTOR = "reconcile"

_DEFAULT_BACKLINK_INVERSE_MAP: dict[str, str] = {
RelationType.DEPENDS_ON.value: RelationType.BLOCKS.value,
RelationType.BLOCKS.value: RelationType.DEPENDS_ON.value,
RelationType.SIMILAR_TO.value: RelationType.SIMILAR_TO.value,
RelationType.RELATES_TO.value: RelationType.RELATES_TO.value,
RelationType.CONTRADICTS.value: RelationType.CONTRADICTS.value,
}


def _load_backlink_inverse_map(store: KBStore) -> dict[str, str]:
"""Read the relation-type inverse map from config, with defensive defaults.

Mirrors the `themes._load_theme_config` pattern: every value is
type-checked and falls back to the default map rather than crashing on
malformed input. Declared in `.vouch/config.yaml` as:

backlinks:
inverse_map:
depends_on: blocks
blocks: depends_on

A relation type absent from the resulting map has no defined mirror and
is skipped by `reconcile_backlinks`, not guessed.
"""
try:
raw = yaml.safe_load(store.config_path.read_text(encoding="utf-8"))
cfg = raw if isinstance(raw, dict) else {}
except (OSError, yaml.YAMLError):
cfg = {}
backlinks_cfg = cfg.get("backlinks") if isinstance(cfg, dict) else None
if not isinstance(backlinks_cfg, dict):
return dict(_DEFAULT_BACKLINK_INVERSE_MAP)
inverse_map = backlinks_cfg.get("inverse_map")
if not isinstance(inverse_map, dict) or not inverse_map:
return dict(_DEFAULT_BACKLINK_INVERSE_MAP)
cleaned = {
k: v for k, v in inverse_map.items()
if isinstance(k, str) and isinstance(v, str)
}
return cleaned or dict(_DEFAULT_BACKLINK_INVERSE_MAP)


@dataclass
class ReconcileResult:
"""Outcome of `reconcile_backlinks`."""

checked: int = 0
proposed: list[Proposal] = field(default_factory=list)
skipped_unmapped: int = 0
skipped_existing: int = 0
dry_run: bool = False


def reconcile_backlinks(
store: KBStore,
*,
rel_types: list[str] | None = None,
limit: int = 50,
dry_run: bool = False,
proposed_by: str = RECONCILE_ACTOR,
) -> ReconcileResult:
"""Propose missing reverse relations across the graph (#307).

A read-then-propose pass: for every existing `Relation` whose type has
a configured inverse (`_load_backlink_inverse_map`), checks whether the
mirror edge (`target --mirror--> source`) already exists and, if not,
files one `propose_relation` proposal for it — never writes an approved
edge directly. Relation types absent from the inverse map are skipped
rather than guessed. `rel_types`, if given, restricts which *original*
edges are scanned (by their own type, before mirroring). `limit` bounds
how many proposals a single run files; `dry_run` is threaded straight
into `propose_relation`, which still validates and returns a real
`Proposal` but writes nothing to disk.

Note on the graph read: the issue that requested this (#307) suggested
reading via `kb.graph_export`. That method renders the unrelated
*provenance* DAG (claim citations, supersedes, approvedBy — see
`provenance.py`) as a dot/mermaid string and never touches `Relation`
objects. The actual full relation edge set is `store.list_relations()`,
and `store.relations_from()` is the existence check — the same
underlying data `kb.neighbors` merges in, minus its extra structural
(non-Relation) edges that would otherwise risk a false "already exists"
match. This function reads that store surface directly instead.
"""
inverse_map = _load_backlink_inverse_map(store)
relations = store.list_relations()
if rel_types:
wanted = set(rel_types)
relations = [r for r in relations if r.relation.value in wanted]

result = ReconcileResult(dry_run=dry_run)
seen: set[tuple[str, str, str]] = set()
for rel in relations:
if len(result.proposed) >= limit:
break
mirror_type = inverse_map.get(rel.relation.value)
if mirror_type is None:
result.skipped_unmapped += 1
continue
result.checked += 1
mirror_src, mirror_target = rel.target, rel.source
key = (mirror_src, mirror_type, mirror_target)
if key in seen:
continue
already_mirrored = any(
r.target == mirror_target and r.relation.value == mirror_type
for r in store.relations_from(mirror_src)
)
if already_mirrored:
result.skipped_existing += 1
continue
seen.add(key)
try:
pr = propose_relation(
store,
src=mirror_src,
relation=mirror_type,
target=mirror_target,
proposed_by=proposed_by,
rationale=f"backlink for {rel.id}",
dry_run=dry_run,
)
except ProposalError:
continue
result.proposed.append(pr)
return result
29 changes: 29 additions & 0 deletions src/vouch/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,35 @@ def kb_propose_theme(
return themes.propose_theme(store, cluster, proposed_by=actor)


@mcp.tool()
def kb_reconcile_backlinks(
rel_types: list[str] | None = None,
limit: int = 50,
dry_run: bool = False,
) -> dict[str, Any]:
"""Propose missing reverse relations across the graph (#307).

Read-then-propose: scans existing relations for a configured mirror
that's missing at the target and files one `kb.propose_relation` per
gap. Never writes an approved edge directly — pending human approval
via `vouch approve <id>`, like any other proposal. `dry_run` reports
the would-propose set without writing anything.
"""
try:
result = life.reconcile_backlinks(
_store(), rel_types=rel_types, limit=limit, dry_run=dry_run,
)
except (ArtifactNotFoundError, ValueError, ProposalError) as e:
raise ValueError(str(e)) from e
return {
"checked": result.checked,
"proposed": [p.id for p in result.proposed],
"skipped_unmapped": result.skipped_unmapped,
"skipped_existing": result.skipped_existing,
"dry_run": result.dry_run,
}


def _current_model_name() -> str:
try:
from .embeddings import get_embedder
Expand Down
Loading
Loading