Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions docs/manifestos/error-handling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Error-handling manifesto

Rule IDs in this file use the prefix `EH-`.

## EH-001: No silent broad excepts

`except Exception: pass` (or `except BaseException: pass`) is forbidden in
committed code. It hides real bugs behind a green CI. If you genuinely want
to swallow an exception, you must:

1. catch the *specific* exception class you expect, and
2. log it (or otherwise surface it) with enough context to debug.

Severity: **sev1** — blocks auto-merge.

## EH-002: No bare `except:`

`except:` (no exception class) catches `KeyboardInterrupt` and `SystemExit`
and is almost never what you want. Use `except Exception:` at minimum, and
prefer a specific class.

Severity: **sev1**.

## EH-003: No `print` debugging in committed code

Leftover `print(...)` statements used for ad-hoc debugging should not ship.
Use the project's logger.

Severity: **sev2**.
19 changes: 19 additions & 0 deletions docs/manifestos/testing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Testing manifesto

Rule IDs in this file use the prefix `TE-`.

## TE-001: Tests must assert behaviour, not mock returns

A test that only asserts the return value of a mock it just configured is a
tautology. Assert on observable behaviour: state changes, emitted events,
side effects against a fake collaborator.

Severity: **sev1**.

## TE-002: Cover the sad path

Every new public function needs at least one adversarial test: bad input,
missing config, partial failure, or empty list. A happy-path-only test
suite ships bugs.

Severity: **sev2**.
56 changes: 55 additions & 1 deletion src/forge_loop/_critic_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,59 @@
from __future__ import annotations

import asyncio
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any

# Canonical manifesto location. Can be overridden by LOOP_MANIFESTOS_DIR
# (operator escape hatch — primarily for tests). The default tracks the
# project layout: ``docs/manifestos/*.md`` at the repo root.
DEFAULT_MANIFESTOS_SUBDIR = ("docs", "manifestos")


def _manifestos_dir(repo: Path) -> Path:
override = os.environ.get("LOOP_MANIFESTOS_DIR")
if override:
return Path(override).expanduser()
return Path(repo, *DEFAULT_MANIFESTOS_SUBDIR)


def load_manifestos_text(repo: Path) -> str:
"""Load every manifesto file under the canonical manifestos dir and
return a single block of text ready to interpolate into the critic
prompt.

Each manifesto is rendered as::

## Manifesto: <filename>

<full body>

If the directory is missing or empty, returns a short placeholder so
the prompt template doesn't end up with a dangling ``{manifestos}``
section — the critic will simply have no rules to enforce.
"""
d = _manifestos_dir(repo)
if not d.is_dir():
return "(no manifestos configured — manifesto compliance check skipped)"

chunks: list[str] = []
for path in sorted(d.iterdir()):
if not path.is_file():
continue
if path.suffix.lower() not in {".md", ".markdown", ".txt"}:
continue
try:
body = path.read_text(encoding="utf-8")
except OSError:
continue
chunks.append(f"## Manifesto: {path.name}\n\n{body.rstrip()}\n")

if not chunks:
return "(no manifestos configured — manifesto compliance check skipped)"
return "\n".join(chunks)


@dataclass
class CriticSdkResult:
Expand Down Expand Up @@ -145,4 +194,9 @@ def run_po_sdk(*args: Any, **kwargs: Any) -> CriticSdkResult:
return run_critic_sdk(*args, **kwargs)


__all__ = ["CriticSdkResult", "run_critic_sdk", "run_po_sdk"]
__all__ = [
"CriticSdkResult",
"load_manifestos_text",
"run_critic_sdk",
"run_po_sdk",
]
24 changes: 24 additions & 0 deletions src/forge_loop/briefs/critic.md.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Your job: review it and emit a structured JSON CriticReport.
PR URL: {pr_url}
Linked issue: #{issue_number}

MANIFESTOS (canonical project rules — violations are first-class findings):
{manifestos}
END MANIFESTOS

DO:
1. Read the issue via `gh issue view {issue_number} --comments` to learn the
acceptance criteria. Note any "Acceptance" or "Out of scope" sections.
Expand Down Expand Up @@ -34,6 +38,19 @@ DO NOT:
- comment on formatting (the formatter does that).
- post review comments yourself — the runner does that from your report.

MANIFESTO COMPLIANCE CHECK:
For every manifesto above, scan the PR diff for rule violations and
emit one entry in ``manifesto_violations`` per violation. Each entry:
- rule_id: the rule's identifier as it appears in the manifesto
(e.g. ``EH-001``).
- manifesto: the manifesto filename (e.g. ``error-handling.md``).
- quote: the offending snippet, verbatim, from the PR diff.
- suggested_fix: a concrete, minimal change that would resolve it.
- severity: sev1 | sev2 | sev3 — use the severity declared in the
manifesto. Default to sev3 only if the manifesto is silent.
ANY sev1 manifesto violation forces ``overall = "request_changes"``
regardless of other findings — manifesto compliance is a hard gate.

FINAL OUTPUT (one JSON line, no prose after it, no markdown fence):
{{"overall": "approve|request_changes|block",
"findings": [
Expand All @@ -43,6 +60,13 @@ FINAL OUTPUT (one JSON line, no prose after it, no markdown fence):
"line": 42 or null,
"message": "what's wrong and what to do"}}
],
"manifesto_violations": [
{{"rule_id": "EH-001",
"manifesto": "error-handling.md",
"quote": "except Exception: pass",
"suggested_fix": "catch the specific exception and log it",
"severity": "sev1|sev2|sev3"}}
],
"issue": {issue_number}}}

Hard rules:
Expand Down
90 changes: 86 additions & 4 deletions src/forge_loop/critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,31 @@
VALID_CATEGORY = {"correctness", "security", "style", "tests", "docs", "product"}


@dataclass
class ManifestoViolation:
"""A specific rule in a project manifesto the PR diff violates.

Emitted by the critic LLM and parsed by ``_coerce_report``. Any
violation with ``severity == "sev1"`` blocks auto-merge (see
``_coerce_report`` and ``critic_actions.plan_actions``).
"""

rule_id: str
manifesto: str
quote: str
suggested_fix: str
severity: str # sev1 | sev2 | sev3

def is_valid(self) -> bool:
return (
isinstance(self.rule_id, str) and bool(self.rule_id.strip())
and isinstance(self.manifesto, str) and bool(self.manifesto.strip())
and isinstance(self.quote, str)
and isinstance(self.suggested_fix, str)
and self.severity in VALID_SEVERITY
)


def _default_brief() -> str:
"""Load the critic brief template — bundled or operator-overridden."""
from forge_loop.briefs import load_template
Expand Down Expand Up @@ -71,16 +96,26 @@ def is_valid(self) -> bool:
class CriticReport:
overall: str # approve | request_changes | block
findings: list[Finding] = field(default_factory=list)
manifesto_violations: list[ManifestoViolation] = field(default_factory=list)
raw: str = ""

def severities(self) -> set[str]:
return {f.severity for f in self.findings}

def has_sev1(self) -> bool:
return any(f.severity == "sev1" for f in self.findings)
return (
any(f.severity == "sev1" for f in self.findings)
or any(v.severity == "sev1" for v in self.manifesto_violations)
)

def has_sev2(self) -> bool:
return any(f.severity == "sev2" for f in self.findings)
return (
any(f.severity == "sev2" for f in self.findings)
or any(v.severity == "sev2" for v in self.manifesto_violations)
)

def has_sev1_manifesto_violation(self) -> bool:
return any(v.severity == "sev1" for v in self.manifesto_violations)


@dataclass
Expand Down Expand Up @@ -113,7 +148,14 @@ def review_pr(
no auto-approve — so a human can intervene).
"""
template = brief_template or _default_brief()
brief = template.format(pr_url=pr_url, issue_number=issue_number)
from forge_loop._critic_sdk import load_manifestos_text

manifestos = load_manifestos_text(repo)
brief = template.format(
pr_url=pr_url,
issue_number=issue_number,
manifestos=manifestos,
)

logs_dir.mkdir(parents=True, exist_ok=True)
ensure_subagent_trusted(repo)
Expand Down Expand Up @@ -361,7 +403,47 @@ def _coerce_report(obj: dict[str, Any], raw: str) -> CriticReport | None:
)
if f.is_valid():
findings.append(f)
return CriticReport(overall=overall, findings=findings, raw=raw)

# Back-compat: missing field is fine, defaults to empty list.
raw_violations = obj.get("manifesto_violations") or []
if not isinstance(raw_violations, list):
raw_violations = []
violations: list[ManifestoViolation] = []
for item in raw_violations:
if not isinstance(item, dict):
continue
rule_id = item.get("rule_id")
manifesto = item.get("manifesto")
# Drop entries missing the identifying fields entirely; we won't
# fabricate a rule_id for the model.
if not isinstance(rule_id, str) or not rule_id.strip():
continue
if not isinstance(manifesto, str) or not manifesto.strip():
continue
sev_raw = item.get("severity")
# Defensive default: unknown/missing severity → sev3 (non-blocking).
severity = sev_raw if sev_raw in VALID_SEVERITY else "sev3"
v = ManifestoViolation(
rule_id=rule_id,
manifesto=manifesto,
quote=str(item.get("quote", "")),
suggested_fix=str(item.get("suggested_fix", "")),
severity=severity,
)
if v.is_valid():
violations.append(v)

# sev1 manifesto violation forces request_changes even if the model
# said "approve" — manifesto compliance is a hard gate.
if overall == "approve" and any(v.severity == "sev1" for v in violations):
overall = "request_changes"

return CriticReport(
overall=overall,
findings=findings,
manifesto_violations=violations,
raw=raw,
)


def _tail(path: Path | None, n: int) -> str:
Expand Down
18 changes: 17 additions & 1 deletion src/forge_loop/critic_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,14 @@ def plan_actions(

has_sev1 = report.has_sev1()
has_sev2 = report.has_sev2()
has_sev1_manifesto = report.has_sev1_manifesto_violation()

if report.overall == "block" or has_sev1:
if report.overall == "block" or has_sev1 or has_sev1_manifesto:
plan.block_merge = True
plan.labels_to_add.append("critic:blocking")
if has_sev1_manifesto:
plan.labels_to_add.append("critic:manifesto-violation")
reasons.append("sev1_manifesto_violation")
if has_sev1:
reasons.append("sev1_finding")
if report.overall == "block":
Expand Down Expand Up @@ -148,6 +152,18 @@ def apply_critic_report(
)
gh.post_review_comment(pr_url, f"Critic findings:\n{summary}", repo=repo)

if report.manifesto_violations:
viol_summary = "\n".join(
f"- **[{v.severity}] {v.manifesto}#{v.rule_id}** — "
f"`{v.quote.strip()[:120]}` → {v.suggested_fix}"
for v in report.manifesto_violations
)
gh.post_review_comment(
pr_url,
f"Manifesto violations:\n{viol_summary}",
repo=repo,
)

if emit is not None:
emit("critic_actions_applied", {
"pr": pr_url,
Expand Down
1 change: 1 addition & 0 deletions tests/test_briefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_render_critic_brief_substitutes_pr_and_issue() -> None:
"critic",
pr_url="https://github.com/acme/repo/pull/7",
issue_number=7,
manifestos="(test manifestos block)",
)
assert "https://github.com/acme/repo/pull/7" in out
assert "#7" in out
Expand Down
Loading
Loading