From ecb8ed63da40be35194559080d021bbf5f3b7689 Mon Sep 17 00:00:00 2001 From: kmajdoub Date: Thu, 28 May 2026 19:27:19 +0200 Subject: [PATCH] feat(critic): manifesto compliance check gates auto-merge (#133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The critic could rubber-stamp PRs that compiled and passed tests but visibly violated project quality rules (e.g. `except Exception: pass`, weak assertions). With nothing teaching the critic about the manifestos, auto-merge shipped drift. This wires the critic into the manifestos: - `docs/manifestos/` is now the canonical home for project rules. Two seed manifestos (error-handling, testing) ship with the repo so the feature is operative on day one. - `_critic_sdk.load_manifestos_text` reads every `*.md`/`*.txt` file under the canonical dir (overridable via `LOOP_MANIFESTOS_DIR`) and renders one block per file. `critic.review_pr` interpolates that block into `briefs/critic.md.tmpl` via a new `{manifestos}` slot. - `CriticReport` gains a `manifesto_violations: list[ManifestoViolation]` field. `ManifestoViolation` is a typed dataclass with rule_id, manifesto, quote, suggested_fix, severity. Back-compat: JSON without the field still parses (defaults to `[]`). - `_coerce_report` drops malformed entries (missing rule_id/manifesto), treats unknown severities as sev3 (defensive default), and forces `overall = "request_changes"` whenever any sev1 violation appears — even if the model emitted `approve`. - `critic_actions.plan_actions` blocks auto-merge on any sev1 manifesto violation, labels the PR `critic:blocking` + `critic:manifesto-violation`, and posts a manifesto-violation summary comment via the gh client. sev2/sev3 violations are reported but not blocking. - Prompt template now teaches the model to scan for manifesto rule violations and emit the new JSON shape. Tests (`tests/test_critic_manifesto_check.py`): - happy-path: manifesto text rendered into prompt; violations field parses; back-compat with absent field; sev1 flips overall; sev2 leaves it alone. - adversarial: malformed entry skipped; unknown severity → sev3; wrong-type field ignored. - gate: sev1 violation blocks; sev2-only does not. Existing `tests/test_briefs.py::test_render_critic_brief_*` adjusted to pass the new required `manifestos` kwarg. --- docs/manifestos/error-handling.md | 29 +++ docs/manifestos/testing.md | 19 ++ src/forge_loop/_critic_sdk.py | 56 +++++- src/forge_loop/briefs/critic.md.tmpl | 24 +++ src/forge_loop/critic.py | 90 ++++++++- src/forge_loop/critic_actions.py | 18 +- tests/test_briefs.py | 1 + tests/test_critic_manifesto_check.py | 281 +++++++++++++++++++++++++++ 8 files changed, 512 insertions(+), 6 deletions(-) create mode 100644 docs/manifestos/error-handling.md create mode 100644 docs/manifestos/testing.md create mode 100644 tests/test_critic_manifesto_check.py diff --git a/docs/manifestos/error-handling.md b/docs/manifestos/error-handling.md new file mode 100644 index 0000000..ccd972f --- /dev/null +++ b/docs/manifestos/error-handling.md @@ -0,0 +1,29 @@ +# Error-handling manifesto + +Rule IDs in this file use the prefix `EH-`. + +## EH-001: No silent broad excepts + +`except Exception: pass` (or `except BaseException: pass`) is forbidden in +committed code. It hides real bugs behind a green CI. If you genuinely want +to swallow an exception, you must: + +1. catch the *specific* exception class you expect, and +2. log it (or otherwise surface it) with enough context to debug. + +Severity: **sev1** — blocks auto-merge. + +## EH-002: No bare `except:` + +`except:` (no exception class) catches `KeyboardInterrupt` and `SystemExit` +and is almost never what you want. Use `except Exception:` at minimum, and +prefer a specific class. + +Severity: **sev1**. + +## EH-003: No `print` debugging in committed code + +Leftover `print(...)` statements used for ad-hoc debugging should not ship. +Use the project's logger. + +Severity: **sev2**. diff --git a/docs/manifestos/testing.md b/docs/manifestos/testing.md new file mode 100644 index 0000000..4cbb841 --- /dev/null +++ b/docs/manifestos/testing.md @@ -0,0 +1,19 @@ +# Testing manifesto + +Rule IDs in this file use the prefix `TE-`. + +## TE-001: Tests must assert behaviour, not mock returns + +A test that only asserts the return value of a mock it just configured is a +tautology. Assert on observable behaviour: state changes, emitted events, +side effects against a fake collaborator. + +Severity: **sev1**. + +## TE-002: Cover the sad path + +Every new public function needs at least one adversarial test: bad input, +missing config, partial failure, or empty list. A happy-path-only test +suite ships bugs. + +Severity: **sev2**. diff --git a/src/forge_loop/_critic_sdk.py b/src/forge_loop/_critic_sdk.py index d2f685b..79f07ce 100644 --- a/src/forge_loop/_critic_sdk.py +++ b/src/forge_loop/_critic_sdk.py @@ -14,10 +14,59 @@ from __future__ import annotations import asyncio +import os from dataclasses import dataclass from pathlib import Path from typing import Any +# Canonical manifesto location. Can be overridden by LOOP_MANIFESTOS_DIR +# (operator escape hatch — primarily for tests). The default tracks the +# project layout: ``docs/manifestos/*.md`` at the repo root. +DEFAULT_MANIFESTOS_SUBDIR = ("docs", "manifestos") + + +def _manifestos_dir(repo: Path) -> Path: + override = os.environ.get("LOOP_MANIFESTOS_DIR") + if override: + return Path(override).expanduser() + return Path(repo, *DEFAULT_MANIFESTOS_SUBDIR) + + +def load_manifestos_text(repo: Path) -> str: + """Load every manifesto file under the canonical manifestos dir and + return a single block of text ready to interpolate into the critic + prompt. + + Each manifesto is rendered as:: + + ## Manifesto: + + + + If the directory is missing or empty, returns a short placeholder so + the prompt template doesn't end up with a dangling ``{manifestos}`` + section — the critic will simply have no rules to enforce. + """ + d = _manifestos_dir(repo) + if not d.is_dir(): + return "(no manifestos configured — manifesto compliance check skipped)" + + chunks: list[str] = [] + for path in sorted(d.iterdir()): + if not path.is_file(): + continue + if path.suffix.lower() not in {".md", ".markdown", ".txt"}: + continue + try: + body = path.read_text(encoding="utf-8") + except OSError: + continue + chunks.append(f"## Manifesto: {path.name}\n\n{body.rstrip()}\n") + + if not chunks: + return "(no manifestos configured — manifesto compliance check skipped)" + return "\n".join(chunks) + @dataclass class CriticSdkResult: @@ -145,4 +194,9 @@ def run_po_sdk(*args: Any, **kwargs: Any) -> CriticSdkResult: return run_critic_sdk(*args, **kwargs) -__all__ = ["CriticSdkResult", "run_critic_sdk", "run_po_sdk"] +__all__ = [ + "CriticSdkResult", + "load_manifestos_text", + "run_critic_sdk", + "run_po_sdk", +] diff --git a/src/forge_loop/briefs/critic.md.tmpl b/src/forge_loop/briefs/critic.md.tmpl index 5904071..de08484 100644 --- a/src/forge_loop/briefs/critic.md.tmpl +++ b/src/forge_loop/briefs/critic.md.tmpl @@ -4,6 +4,10 @@ Your job: review it and emit a structured JSON CriticReport. PR URL: {pr_url} Linked issue: #{issue_number} +MANIFESTOS (canonical project rules — violations are first-class findings): +{manifestos} +END MANIFESTOS + DO: 1. Read the issue via `gh issue view {issue_number} --comments` to learn the acceptance criteria. Note any "Acceptance" or "Out of scope" sections. @@ -34,6 +38,19 @@ DO NOT: - comment on formatting (the formatter does that). - post review comments yourself — the runner does that from your report. +MANIFESTO COMPLIANCE CHECK: + For every manifesto above, scan the PR diff for rule violations and + emit one entry in ``manifesto_violations`` per violation. Each entry: + - rule_id: the rule's identifier as it appears in the manifesto + (e.g. ``EH-001``). + - manifesto: the manifesto filename (e.g. ``error-handling.md``). + - quote: the offending snippet, verbatim, from the PR diff. + - suggested_fix: a concrete, minimal change that would resolve it. + - severity: sev1 | sev2 | sev3 — use the severity declared in the + manifesto. Default to sev3 only if the manifesto is silent. + ANY sev1 manifesto violation forces ``overall = "request_changes"`` + regardless of other findings — manifesto compliance is a hard gate. + FINAL OUTPUT (one JSON line, no prose after it, no markdown fence): {{"overall": "approve|request_changes|block", "findings": [ @@ -43,6 +60,13 @@ FINAL OUTPUT (one JSON line, no prose after it, no markdown fence): "line": 42 or null, "message": "what's wrong and what to do"}} ], + "manifesto_violations": [ + {{"rule_id": "EH-001", + "manifesto": "error-handling.md", + "quote": "except Exception: pass", + "suggested_fix": "catch the specific exception and log it", + "severity": "sev1|sev2|sev3"}} + ], "issue": {issue_number}}} Hard rules: diff --git a/src/forge_loop/critic.py b/src/forge_loop/critic.py index 3ded724..37c0bb8 100644 --- a/src/forge_loop/critic.py +++ b/src/forge_loop/critic.py @@ -41,6 +41,31 @@ VALID_CATEGORY = {"correctness", "security", "style", "tests", "docs", "product"} +@dataclass +class ManifestoViolation: + """A specific rule in a project manifesto the PR diff violates. + + Emitted by the critic LLM and parsed by ``_coerce_report``. Any + violation with ``severity == "sev1"`` blocks auto-merge (see + ``_coerce_report`` and ``critic_actions.plan_actions``). + """ + + rule_id: str + manifesto: str + quote: str + suggested_fix: str + severity: str # sev1 | sev2 | sev3 + + def is_valid(self) -> bool: + return ( + isinstance(self.rule_id, str) and bool(self.rule_id.strip()) + and isinstance(self.manifesto, str) and bool(self.manifesto.strip()) + and isinstance(self.quote, str) + and isinstance(self.suggested_fix, str) + and self.severity in VALID_SEVERITY + ) + + def _default_brief() -> str: """Load the critic brief template — bundled or operator-overridden.""" from forge_loop.briefs import load_template @@ -71,16 +96,26 @@ def is_valid(self) -> bool: class CriticReport: overall: str # approve | request_changes | block findings: list[Finding] = field(default_factory=list) + manifesto_violations: list[ManifestoViolation] = field(default_factory=list) raw: str = "" def severities(self) -> set[str]: return {f.severity for f in self.findings} def has_sev1(self) -> bool: - return any(f.severity == "sev1" for f in self.findings) + return ( + any(f.severity == "sev1" for f in self.findings) + or any(v.severity == "sev1" for v in self.manifesto_violations) + ) def has_sev2(self) -> bool: - return any(f.severity == "sev2" for f in self.findings) + return ( + any(f.severity == "sev2" for f in self.findings) + or any(v.severity == "sev2" for v in self.manifesto_violations) + ) + + def has_sev1_manifesto_violation(self) -> bool: + return any(v.severity == "sev1" for v in self.manifesto_violations) @dataclass @@ -113,7 +148,14 @@ def review_pr( no auto-approve — so a human can intervene). """ template = brief_template or _default_brief() - brief = template.format(pr_url=pr_url, issue_number=issue_number) + from forge_loop._critic_sdk import load_manifestos_text + + manifestos = load_manifestos_text(repo) + brief = template.format( + pr_url=pr_url, + issue_number=issue_number, + manifestos=manifestos, + ) logs_dir.mkdir(parents=True, exist_ok=True) ensure_subagent_trusted(repo) @@ -361,7 +403,47 @@ def _coerce_report(obj: dict[str, Any], raw: str) -> CriticReport | None: ) if f.is_valid(): findings.append(f) - return CriticReport(overall=overall, findings=findings, raw=raw) + + # Back-compat: missing field is fine, defaults to empty list. + raw_violations = obj.get("manifesto_violations") or [] + if not isinstance(raw_violations, list): + raw_violations = [] + violations: list[ManifestoViolation] = [] + for item in raw_violations: + if not isinstance(item, dict): + continue + rule_id = item.get("rule_id") + manifesto = item.get("manifesto") + # Drop entries missing the identifying fields entirely; we won't + # fabricate a rule_id for the model. + if not isinstance(rule_id, str) or not rule_id.strip(): + continue + if not isinstance(manifesto, str) or not manifesto.strip(): + continue + sev_raw = item.get("severity") + # Defensive default: unknown/missing severity → sev3 (non-blocking). + severity = sev_raw if sev_raw in VALID_SEVERITY else "sev3" + v = ManifestoViolation( + rule_id=rule_id, + manifesto=manifesto, + quote=str(item.get("quote", "")), + suggested_fix=str(item.get("suggested_fix", "")), + severity=severity, + ) + if v.is_valid(): + violations.append(v) + + # sev1 manifesto violation forces request_changes even if the model + # said "approve" — manifesto compliance is a hard gate. + if overall == "approve" and any(v.severity == "sev1" for v in violations): + overall = "request_changes" + + return CriticReport( + overall=overall, + findings=findings, + manifesto_violations=violations, + raw=raw, + ) def _tail(path: Path | None, n: int) -> str: diff --git a/src/forge_loop/critic_actions.py b/src/forge_loop/critic_actions.py index 69e01be..a898aa0 100644 --- a/src/forge_loop/critic_actions.py +++ b/src/forge_loop/critic_actions.py @@ -64,10 +64,14 @@ def plan_actions( has_sev1 = report.has_sev1() has_sev2 = report.has_sev2() + has_sev1_manifesto = report.has_sev1_manifesto_violation() - if report.overall == "block" or has_sev1: + if report.overall == "block" or has_sev1 or has_sev1_manifesto: plan.block_merge = True plan.labels_to_add.append("critic:blocking") + if has_sev1_manifesto: + plan.labels_to_add.append("critic:manifesto-violation") + reasons.append("sev1_manifesto_violation") if has_sev1: reasons.append("sev1_finding") if report.overall == "block": @@ -148,6 +152,18 @@ def apply_critic_report( ) gh.post_review_comment(pr_url, f"Critic findings:\n{summary}", repo=repo) + if report.manifesto_violations: + viol_summary = "\n".join( + f"- **[{v.severity}] {v.manifesto}#{v.rule_id}** — " + f"`{v.quote.strip()[:120]}` → {v.suggested_fix}" + for v in report.manifesto_violations + ) + gh.post_review_comment( + pr_url, + f"Manifesto violations:\n{viol_summary}", + repo=repo, + ) + if emit is not None: emit("critic_actions_applied", { "pr": pr_url, diff --git a/tests/test_briefs.py b/tests/test_briefs.py index f7f1d62..1bd161b 100644 --- a/tests/test_briefs.py +++ b/tests/test_briefs.py @@ -68,6 +68,7 @@ def test_render_critic_brief_substitutes_pr_and_issue() -> None: "critic", pr_url="https://github.com/acme/repo/pull/7", issue_number=7, + manifestos="(test manifestos block)", ) assert "https://github.com/acme/repo/pull/7" in out assert "#7" in out diff --git a/tests/test_critic_manifesto_check.py b/tests/test_critic_manifesto_check.py new file mode 100644 index 0000000..ba39ad7 --- /dev/null +++ b/tests/test_critic_manifesto_check.py @@ -0,0 +1,281 @@ +"""Tests for the manifesto-compliance arm of the critic (issue #133). + +Covers: +- `load_manifestos_text` reads every file under the canonical manifestos dir. +- `_coerce_report` parses the new `manifesto_violations` field, preserves + back-compat when absent, drops malformed entries, defaults unknown + severities to sev3, and forces `overall = "request_changes"` on any + sev1 violation. +- The brief template, once rendered, contains each manifesto body. +- The auto-merge gate (`plan_actions` in critic_actions) blocks merge on + a sev1 manifesto violation even when no sev1 finding exists. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from forge_loop._critic_sdk import load_manifestos_text +from forge_loop.briefs import load_template +from forge_loop.critic import ( + CriticReport, + ManifestoViolation, + parse_report_from_text, +) +from forge_loop.critic_actions import plan_actions + + +# --------------------------------------------------------------------------- +# Manifesto loading + prompt rendering +# --------------------------------------------------------------------------- + + +def _seed_manifestos(repo: Path) -> None: + d = repo / "docs" / "manifestos" + d.mkdir(parents=True) + (d / "alpha.md").write_text("# Alpha rules\n\nAR-001: never alpha.\n") + (d / "beta.md").write_text("# Beta rules\n\nBR-001: never beta.\n") + # Non-markdown file should be ignored. + (d / "ignore.bin").write_bytes(b"\x00binary\x00") + + +def test_load_manifestos_text_reads_every_file(tmp_path: Path) -> None: + _seed_manifestos(tmp_path) + text = load_manifestos_text(tmp_path) + assert "## Manifesto: alpha.md" in text + assert "## Manifesto: beta.md" in text + assert "AR-001: never alpha." in text + assert "BR-001: never beta." in text + assert "binary" not in text + + +def test_load_manifestos_text_missing_dir_returns_placeholder(tmp_path: Path) -> None: + text = load_manifestos_text(tmp_path) + assert "no manifestos configured" in text + + +def test_manifesto_text_rendered_into_prompt(tmp_path: Path) -> None: + _seed_manifestos(tmp_path) + template = load_template("critic") + rendered = template.format( + pr_url="https://example.com/pr/1", + issue_number=1, + manifestos=load_manifestos_text(tmp_path), + ) + assert "AR-001: never alpha." in rendered + assert "BR-001: never beta." in rendered + assert "manifesto_violations" in rendered # JSON schema instruction present + + +# --------------------------------------------------------------------------- +# Report parsing +# --------------------------------------------------------------------------- + + +def test_critic_report_parses_violations_field() -> None: + blob = json.dumps({ + "overall": "request_changes", + "findings": [], + "manifesto_violations": [ + { + "rule_id": "EH-001", + "manifesto": "error-handling.md", + "quote": "except Exception: pass", + "suggested_fix": "narrow the except and log", + "severity": "sev1", + }, + ], + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert len(report.manifesto_violations) == 1 + v = report.manifesto_violations[0] + assert isinstance(v, ManifestoViolation) + assert v.rule_id == "EH-001" + assert v.manifesto == "error-handling.md" + assert v.quote == "except Exception: pass" + assert v.suggested_fix == "narrow the except and log" + assert v.severity == "sev1" + assert report.has_sev1_manifesto_violation() is True + + +def test_critic_report_back_compat_no_field() -> None: + blob = json.dumps({ + "overall": "approve", + "findings": [ + {"severity": "sev3", "category": "style", "file": None, + "line": None, "message": "rename var"}, + ], + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert report.manifesto_violations == [] + assert report.overall == "approve" + + +def test_sev1_violation_forces_request_changes() -> None: + # Model says "approve" but emits a sev1 violation — coercion flips it. + blob = json.dumps({ + "overall": "approve", + "findings": [], + "manifesto_violations": [ + { + "rule_id": "EH-001", + "manifesto": "error-handling.md", + "quote": "except Exception: pass", + "suggested_fix": "narrow it", + "severity": "sev1", + }, + ], + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert report.overall == "request_changes" + + +def test_sev2_violation_does_not_block() -> None: + # sev2 leaves overall untouched and does not flip to request_changes. + blob = json.dumps({ + "overall": "approve", + "findings": [], + "manifesto_violations": [ + { + "rule_id": "EH-003", + "manifesto": "error-handling.md", + "quote": "print('debug')", + "suggested_fix": "use logger", + "severity": "sev2", + }, + ], + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert report.overall == "approve" + assert len(report.manifesto_violations) == 1 + + +# --------------------------------------------------------------------------- +# Adversarial / sad-path +# --------------------------------------------------------------------------- + + +def test_malformed_violation_entry_is_skipped() -> None: + blob = json.dumps({ + "overall": "approve", + "findings": [], + "manifesto_violations": [ + {"manifesto": "x.md", "severity": "sev1"}, # missing rule_id + {"rule_id": "X-1", "severity": "sev1"}, # missing manifesto + "not-a-dict", + { + "rule_id": "Y-1", + "manifesto": "y.md", + "quote": "q", + "suggested_fix": "f", + "severity": "sev3", + }, + ], + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert [v.rule_id for v in report.manifesto_violations] == ["Y-1"] + + +def test_unknown_severity_treated_as_sev3() -> None: + blob = json.dumps({ + "overall": "approve", + "findings": [], + "manifesto_violations": [ + { + "rule_id": "Z-1", + "manifesto": "z.md", + "quote": "q", + "suggested_fix": "f", + "severity": "critical-omg", + }, + ], + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert len(report.manifesto_violations) == 1 + assert report.manifesto_violations[0].severity == "sev3" + # Defensive default must NOT cascade to a verdict flip. + assert report.overall == "approve" + + +def test_violations_field_wrong_type_is_ignored() -> None: + blob = json.dumps({ + "overall": "approve", + "findings": [], + "manifesto_violations": "not a list", + }) + report, err = parse_report_from_text(blob) + assert err is None + assert report is not None + assert report.manifesto_violations == [] + + +# --------------------------------------------------------------------------- +# Auto-merge gate +# --------------------------------------------------------------------------- + + +def test_automerge_blocked_on_sev1_violation() -> None: + report = CriticReport( + overall="request_changes", # already flipped by _coerce_report + findings=[], + manifesto_violations=[ + ManifestoViolation( + rule_id="EH-001", + manifesto="error-handling.md", + quote="except Exception: pass", + suggested_fix="narrow it", + severity="sev1", + ), + ], + ) + plan = plan_actions( + report, + pr_changed_lines=10, + block_on_sev2=False, + min_findings_for_approve=0, + ) + assert plan.block_merge is True + assert "critic:blocking" in plan.labels_to_add + assert "critic:manifesto-violation" in plan.labels_to_add + assert "sev1_manifesto_violation" in plan.reason + + +def test_automerge_not_blocked_on_sev2_violation_only() -> None: + report = CriticReport( + overall="approve", + findings=[], + manifesto_violations=[ + ManifestoViolation( + rule_id="EH-003", + manifesto="error-handling.md", + quote="print('x')", + suggested_fix="use logger", + severity="sev2", + ), + ], + ) + plan = plan_actions( + report, + # large enough that an empty-finding approve isn't considered + # suspicious — we want to verify sev2 manifesto alone doesn't block. + pr_changed_lines=0, + block_on_sev2=False, + min_findings_for_approve=1000, + ) + assert plan.block_merge is False + assert "critic:manifesto-violation" not in plan.labels_to_add