hadamrd · hadamrd · May 30, 2026 · May 30, 2026
diff --git a/src/forge_loop/audit_probes/__init__.py b/src/forge_loop/audit_probes/__init__.py
@@ -0,0 +1,14 @@
+"""Codebase-audit probes (issue #156).
+
+Each probe is a small module exposing one class that satisfies the
+:class:`forge_loop.codebase_audit.Probe` Protocol. Keep probes:
+
+* Pure (filesystem in, ``Violation`` objects out).
+* Independently importable — the framework can disable a broken probe
+  without bringing the rest down.
+* Named with a stable identifier (the probe label uses it).
+"""
+
+from forge_loop.audit_probes.file_size import FileSizeProbe
+
+__all__ = ["FileSizeProbe"]
diff --git a/src/forge_loop/audit_probes/file_size.py b/src/forge_loop/audit_probes/file_size.py
@@ -0,0 +1,138 @@
+"""File-size probe — first state-rule probe (issue #156).
+
+The quality manifesto carries soft caps for module size per language.
+This probe walks the repository, measures LOC per source file
+(non-blank, non-comment-only lines), and yields a :class:`Violation`
+for every file that exceeds its language's threshold.
+
+Why this is the first probe
+===========================
+
+The motivating example is ``src/forge_loop/cli.py`` at >1700 LOC, 3.4×
+the Python soft-cap of 500. The per-PR critic never flagged it because
+cli.py grew 50-100 LOC per PR — each diff was reasonable, the cumulative
+state-violation was invisible. This probe makes the cumulative
+violation visible AS a violation, on the same cadence as the
+maintenance daemon.
+
+Thresholds
+==========
+
+Defaults match the manifesto:
+
+* Python: 500 LOC
+* TypeScript / TSX / JS: 400 LOC
+* Java: 600 LOC
+
+Operators override via ``FileSizeProbe(thresholds=...)``. The
+``hard_threshold_multiplier`` knob escalates severity once a file
+crosses N× its soft cap — the cli.py case fires P1, not P2.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Iterable
+
+from forge_loop.codebase_audit import Violation, walk_source_files
+
+
+DEFAULT_THRESHOLDS: dict[str, int] = {
+    ".py": 500,
+    ".ts": 400,
+    ".tsx": 400,
+    ".js": 400,
+    ".java": 600,
+}
+
+
+def _count_significant_lines(path: Path) -> int:
+    """Count non-blank, non-comment-only lines.
+
+    Pragmatic — no AST. A line that's entirely whitespace or starts
+    (after lstrip) with ``#`` / ``//`` / ``/*`` / ``*`` is dropped. Good
+    enough to distinguish a 1700-LOC business-logic file from a 1700-
+    LOC docstring (none of which exist in the codebase today).
+
+    Decode failures fall back to 0 — a binary file shouldn't trip the
+    probe just because its suffix matched.
+    """
+    try:
+        text = path.read_text(encoding="utf-8", errors="strict")
+    except (OSError, UnicodeDecodeError):
+        return 0
+    n = 0
+    for raw in text.splitlines():
+        line = raw.strip()
+        if not line:
+            continue
+        if line.startswith(("#", "//", "/*", "*")):
+            continue
+        n += 1
+    return n
+
+
+@dataclass
+class FileSizeProbe:
+    """Yield one :class:`Violation` per oversized file.
+
+    Construction parameters are kept simple — production wires defaults,
+    tests override per-case.
+    """
+
+    name: str = "file-size"
+    thresholds: dict[str, int] = field(default_factory=lambda: dict(DEFAULT_THRESHOLDS))
+    hard_threshold_multiplier: float = 2.0
+
+    def scan(self, repo: Path) -> Iterable[Violation]:
+        suffixes = tuple(self.thresholds.keys())
+        for path in walk_source_files(repo, suffixes=suffixes):
+            soft = self.thresholds.get(path.suffix)
+            if soft is None:
+                continue
+            loc = _count_significant_lines(path)
+            if loc <= soft:
+                continue
+            rel = path.relative_to(repo.resolve()) if path.is_absolute() else path
+            rel_str = str(rel).replace("\\", "/")
+            multiplier = loc / soft if soft else float("inf")
+            severity = 1 if multiplier >= self.hard_threshold_multiplier else 2
+            ratio_str = f"{multiplier:.1f}×"
+            title = (
+                f"refactor({rel_str}): split oversized module "
+                f"({loc} LOC > {soft} cap, {ratio_str})"
+            )
+            rationale = (
+                f"`{rel_str}` is {loc} significant LOC, which exceeds the "
+                f"quality-manifesto soft cap of {soft} for `{path.suffix}` "
+                f"files ({ratio_str}).\n\n"
+                f"The per-PR critic does not catch this class of accumulation "
+                f"violation — modules grow 50-100 LOC at a time and each "
+                f"individual diff looks reasonable. The audit probe (issue "
+                f"#156) is the state-based gate that the manifesto rule "
+                f"\"state-based rules need a state-based gate\" calls for.\n\n"
+                f"Why it matters: oversized modules are correlated with the "
+                f"boiling-frog failures the manifesto cites (#147 stringly-"
+                f"typed boundaries, #128 silent fallthroughs) — once a "
+                f"module is hard to read end-to-end, single-character bugs "
+                f"survive review."
+            )
+            acceptance = [
+                f"`{rel_str}` is split into ≥2 focused modules, none exceeding {soft} LOC.",
+                "Public import surface preserved (no downstream breakage).",
+                "Tests for the split modules remain green; no test logic moved.",
+                "Manifesto rationale is honoured: each new module has a single, nameable responsibility.",
+            ]
+            yield Violation(
+                probe=self.name,
+                target=rel_str,
+                severity=severity,
+                title=title,
+                rationale=rationale,
+                acceptance=acceptance,
+                metrics={"loc": loc, "soft_cap": soft, "ratio": round(multiplier, 2)},
+            )
+
+
+__all__ = ["FileSizeProbe", "DEFAULT_THRESHOLDS"]
diff --git a/src/forge_loop/cli.py b/src/forge_loop/cli.py
@@ -890,6 +890,132 @@ def _render_ticket_body(ticket: ProposedTicket, parent: int | None) -> str:
     return 0
 
 
+def _cmd_audit(args: SimpleNamespace) -> int:
+    """`forge-loop audit` — codebase-state audit (issue #156).
+
+    Contract:
+      * Default (no flags): walk the repo, run every default probe,
+        print a human-readable summary, exit 0. NO GitHub calls.
+      * --apply: file one ticket per violation (idempotent — existing
+        open tickets for the same probe+target are skipped).
+      * --json: emit the report as JSON for scripting / dashboards.
+
+    Errors:
+      * No probe failure can cause exit != 0 on the dry-run path —
+        ``audit.errors`` is surfaced as a warning but isn't a gate.
+      * ``--apply`` exits 1 if ANY ticket-create call raised.
+    """
+    import json as _json
+
+    from forge_loop.codebase_audit import audit, file_violations
+
+    repo_path = Path.cwd()
+    owner = ""
+    repo_name = ""
+    extra_labels: tuple[str, ...] = ()
+    try:
+        cfg = load()
+        repo_path = Path(cfg.repo).resolve() if getattr(cfg, "repo", None) else repo_path
+        gh_repo = getattr(cfg, "github_repo", "") or ""
+        if "/" in gh_repo:
+            owner, repo_name = gh_repo.split("/", 1)
+    except Exception:  # noqa: BLE001 — audit must work even without a config
+        pass
+
+    report = audit(repo_path)
+
+    if getattr(args, "json", False):
+        payload = {
+            "probes_run": report.probes_run,
+            "violations": [
+                {
+                    "probe": v.probe,
+                    "target": v.target,
+                    "severity": v.severity,
+                    "title": v.title,
+                    "metrics": v.metrics,
+                }
+                for v in report.violations
+            ],
+            "errors": report.errors,
+        }
+        typer.echo(_json.dumps(payload, indent=2, sort_keys=True))
+    else:
+        typer.echo(
+            f"audit: probes_run={report.probes_run} "
+            f"violations={len(report.violations)} errors={list(report.errors)}"
+        )
+        for v in report.violations:
+            typer.echo(f"  [P{v.severity}] {v.probe}: {v.target}")
+            typer.echo(f"        {v.title}")
+        for probe_name, err in report.errors.items():
+            typer.echo(f"  ! probe {probe_name} crashed: {err}", err=True)
+
+    if not getattr(args, "apply", False):
+        return 0
+
+    if not owner or not repo_name:
+        typer.echo(
+            "audit: --apply requires github_repo configured (owner/repo)",
+            err=True,
+        )
+        return 2
+
+    if report.is_clean:
+        typer.echo("audit: clean — nothing to file.")
+        return 0
+
+    try:
+        gh_client = _gh_client_factory()
+    except Exception as exc:  # noqa: BLE001
+        typer.echo(f"audit: gh client init failed: {exc}", err=True)
+        return 1
+
+    # Wire events file from the resolved config (best-effort).
+    events_file: Path | None = None
+    try:
+        events_file = cfg.events_file  # type: ignore[name-defined]
+    except Exception:  # noqa: BLE001
+        events_file = None
+
+    def _emit_filed(v, number):  # noqa: ANN001 — internal
+        if events_file is None:
+            return
+        try:
+            from forge_loop.events import AuditViolationFiledEvent, emit
+
+            emit(
+                events_file,
+                AuditViolationFiledEvent(
+                    probe=v.probe,
+                    target=v.target,
+                    severity=v.severity,
+                    issue_number=number,
+                    title=v.title,
+                ),
+            )
+        except Exception:  # noqa: BLE001 — never let event emit kill --apply
+            pass
+
+    outcome = file_violations(
+        report,
+        gh_client,
+        owner=owner,
+        repo=repo_name,
+        extra_labels=extra_labels,
+        emit_filed=_emit_filed,
+    )
+    for v, number in outcome.filed:
+        typer.echo(f"audit: filed #{number}: {v.title}")
+    for v in outcome.skipped:
+        typer.echo(f"audit: skipped (already filed): {v.probe}:{v.target}")
+    if outcome.errors:
+        for key, err in outcome.errors.items():
+            typer.echo(f"audit: ERROR filing {key}: {err}", err=True)
+        return 1
+    return 0
+
+
 def _cmd_record_session(args: SimpleNamespace) -> int:
     from forge_loop._testing.recorder import SessionRecorder
     from forge_loop.worker import make_brief
@@ -1597,6 +1723,21 @@ def cmd_brainstorm(
     _exit(_cmd_brainstorm(SimpleNamespace(apply=apply)))
 
 
+@app.command(
+    "audit",
+    help="Codebase-state audit (issue #156). Dry-run by default; --apply files tickets.",
+)
+def cmd_audit(
+    apply: bool = typer.Option(
+        False, "--apply", help="File one ticket per violation (idempotent)."
+    ),
+    json_: bool = typer.Option(
+        False, "--json", help="Emit the report as JSON (for scripts/dashboards)."
+    ),
+) -> None:
+    _exit(_cmd_audit(SimpleNamespace(apply=apply, json=json_)))
+
+
 @app.command("record-session", help="Record a real SDK session to a JSONL fixture.")
 def cmd_record_session(
     issue: int | None = typer.Option(None, "--issue"),