Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/forge_loop/audit_probes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Codebase-audit probes (issue #156).

Each probe is a small module exposing one class that satisfies the
:class:`forge_loop.codebase_audit.Probe` Protocol. Keep probes:

* Pure (filesystem in, ``Violation`` objects out).
* Independently importable — the framework can disable a broken probe
without bringing the rest down.
* Named with a stable identifier (the probe label uses it).
"""

from forge_loop.audit_probes.file_size import FileSizeProbe

__all__ = ["FileSizeProbe"]
138 changes: 138 additions & 0 deletions src/forge_loop/audit_probes/file_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""File-size probe — first state-rule probe (issue #156).

The quality manifesto carries soft caps for module size per language.
This probe walks the repository, measures LOC per source file
(non-blank, non-comment-only lines), and yields a :class:`Violation`
for every file that exceeds its language's threshold.

Why this is the first probe
===========================

The motivating example is ``src/forge_loop/cli.py`` at >1700 LOC, 3.4×
the Python soft-cap of 500. The per-PR critic never flagged it because
cli.py grew 50-100 LOC per PR — each diff was reasonable, the cumulative
state-violation was invisible. This probe makes the cumulative
violation visible AS a violation, on the same cadence as the
maintenance daemon.

Thresholds
==========

Defaults match the manifesto:

* Python: 500 LOC
* TypeScript / TSX / JS: 400 LOC
* Java: 600 LOC

Operators override via ``FileSizeProbe(thresholds=...)``. The
``hard_threshold_multiplier`` knob escalates severity once a file
crosses N× its soft cap — the cli.py case fires P1, not P2.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable

from forge_loop.codebase_audit import Violation, walk_source_files


DEFAULT_THRESHOLDS: dict[str, int] = {
".py": 500,
".ts": 400,
".tsx": 400,
".js": 400,
".java": 600,
}


def _count_significant_lines(path: Path) -> int:
"""Count non-blank, non-comment-only lines.

Pragmatic — no AST. A line that's entirely whitespace or starts
(after lstrip) with ``#`` / ``//`` / ``/*`` / ``*`` is dropped. Good
enough to distinguish a 1700-LOC business-logic file from a 1700-
LOC docstring (none of which exist in the codebase today).

Decode failures fall back to 0 — a binary file shouldn't trip the
probe just because its suffix matched.
"""
try:
text = path.read_text(encoding="utf-8", errors="strict")
except (OSError, UnicodeDecodeError):
return 0
n = 0
for raw in text.splitlines():
line = raw.strip()
if not line:
continue
if line.startswith(("#", "//", "/*", "*")):
continue
n += 1
return n


@dataclass
class FileSizeProbe:
"""Yield one :class:`Violation` per oversized file.

Construction parameters are kept simple — production wires defaults,
tests override per-case.
"""

name: str = "file-size"
thresholds: dict[str, int] = field(default_factory=lambda: dict(DEFAULT_THRESHOLDS))
hard_threshold_multiplier: float = 2.0

def scan(self, repo: Path) -> Iterable[Violation]:
suffixes = tuple(self.thresholds.keys())
for path in walk_source_files(repo, suffixes=suffixes):
soft = self.thresholds.get(path.suffix)
if soft is None:
continue
loc = _count_significant_lines(path)
if loc <= soft:
continue
rel = path.relative_to(repo.resolve()) if path.is_absolute() else path
rel_str = str(rel).replace("\\", "/")
multiplier = loc / soft if soft else float("inf")
severity = 1 if multiplier >= self.hard_threshold_multiplier else 2
ratio_str = f"{multiplier:.1f}×"
title = (
f"refactor({rel_str}): split oversized module "
f"({loc} LOC > {soft} cap, {ratio_str})"
)
rationale = (
f"`{rel_str}` is {loc} significant LOC, which exceeds the "
f"quality-manifesto soft cap of {soft} for `{path.suffix}` "
f"files ({ratio_str}).\n\n"
f"The per-PR critic does not catch this class of accumulation "
f"violation — modules grow 50-100 LOC at a time and each "
f"individual diff looks reasonable. The audit probe (issue "
f"#156) is the state-based gate that the manifesto rule "
f"\"state-based rules need a state-based gate\" calls for.\n\n"
f"Why it matters: oversized modules are correlated with the "
f"boiling-frog failures the manifesto cites (#147 stringly-"
f"typed boundaries, #128 silent fallthroughs) — once a "
f"module is hard to read end-to-end, single-character bugs "
f"survive review."
)
acceptance = [
f"`{rel_str}` is split into ≥2 focused modules, none exceeding {soft} LOC.",
"Public import surface preserved (no downstream breakage).",
"Tests for the split modules remain green; no test logic moved.",
"Manifesto rationale is honoured: each new module has a single, nameable responsibility.",
]
yield Violation(
probe=self.name,
target=rel_str,
severity=severity,
title=title,
rationale=rationale,
acceptance=acceptance,
metrics={"loc": loc, "soft_cap": soft, "ratio": round(multiplier, 2)},
)


__all__ = ["FileSizeProbe", "DEFAULT_THRESHOLDS"]
141 changes: 141 additions & 0 deletions src/forge_loop/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,132 @@ def _render_ticket_body(ticket: ProposedTicket, parent: int | None) -> str:
return 0


def _cmd_audit(args: SimpleNamespace) -> int:
"""`forge-loop audit` — codebase-state audit (issue #156).

Contract:
* Default (no flags): walk the repo, run every default probe,
print a human-readable summary, exit 0. NO GitHub calls.
* --apply: file one ticket per violation (idempotent — existing
open tickets for the same probe+target are skipped).
* --json: emit the report as JSON for scripting / dashboards.

Errors:
* No probe failure can cause exit != 0 on the dry-run path —
``audit.errors`` is surfaced as a warning but isn't a gate.
* ``--apply`` exits 1 if ANY ticket-create call raised.
"""
import json as _json

from forge_loop.codebase_audit import audit, file_violations

repo_path = Path.cwd()
owner = ""
repo_name = ""
extra_labels: tuple[str, ...] = ()
try:
cfg = load()
repo_path = Path(cfg.repo).resolve() if getattr(cfg, "repo", None) else repo_path
gh_repo = getattr(cfg, "github_repo", "") or ""
if "/" in gh_repo:
owner, repo_name = gh_repo.split("/", 1)
except Exception: # noqa: BLE001 — audit must work even without a config
pass

report = audit(repo_path)

if getattr(args, "json", False):
payload = {
"probes_run": report.probes_run,
"violations": [
{
"probe": v.probe,
"target": v.target,
"severity": v.severity,
"title": v.title,
"metrics": v.metrics,
}
for v in report.violations
],
"errors": report.errors,
}
typer.echo(_json.dumps(payload, indent=2, sort_keys=True))
else:
typer.echo(
f"audit: probes_run={report.probes_run} "
f"violations={len(report.violations)} errors={list(report.errors)}"
)
for v in report.violations:
typer.echo(f" [P{v.severity}] {v.probe}: {v.target}")
typer.echo(f" {v.title}")
for probe_name, err in report.errors.items():
typer.echo(f" ! probe {probe_name} crashed: {err}", err=True)

if not getattr(args, "apply", False):
return 0

if not owner or not repo_name:
typer.echo(
"audit: --apply requires github_repo configured (owner/repo)",
err=True,
)
return 2

if report.is_clean:
typer.echo("audit: clean — nothing to file.")
return 0

try:
gh_client = _gh_client_factory()
except Exception as exc: # noqa: BLE001
typer.echo(f"audit: gh client init failed: {exc}", err=True)
return 1

# Wire events file from the resolved config (best-effort).
events_file: Path | None = None
try:
events_file = cfg.events_file # type: ignore[name-defined]
except Exception: # noqa: BLE001
events_file = None

def _emit_filed(v, number): # noqa: ANN001 — internal
if events_file is None:
return
try:
from forge_loop.events import AuditViolationFiledEvent, emit

emit(
events_file,
AuditViolationFiledEvent(
probe=v.probe,
target=v.target,
severity=v.severity,
issue_number=number,
title=v.title,
),
)
except Exception: # noqa: BLE001 — never let event emit kill --apply
pass

outcome = file_violations(
report,
gh_client,
owner=owner,
repo=repo_name,
extra_labels=extra_labels,
emit_filed=_emit_filed,
)
for v, number in outcome.filed:
typer.echo(f"audit: filed #{number}: {v.title}")
for v in outcome.skipped:
typer.echo(f"audit: skipped (already filed): {v.probe}:{v.target}")
if outcome.errors:
for key, err in outcome.errors.items():
typer.echo(f"audit: ERROR filing {key}: {err}", err=True)
return 1
return 0


def _cmd_record_session(args: SimpleNamespace) -> int:
from forge_loop._testing.recorder import SessionRecorder
from forge_loop.worker import make_brief
Expand Down Expand Up @@ -1597,6 +1723,21 @@ def cmd_brainstorm(
_exit(_cmd_brainstorm(SimpleNamespace(apply=apply)))


@app.command(
"audit",
help="Codebase-state audit (issue #156). Dry-run by default; --apply files tickets.",
)
def cmd_audit(
apply: bool = typer.Option(
False, "--apply", help="File one ticket per violation (idempotent)."
),
json_: bool = typer.Option(
False, "--json", help="Emit the report as JSON (for scripts/dashboards)."
),
) -> None:
_exit(_cmd_audit(SimpleNamespace(apply=apply, json=json_)))


@app.command("record-session", help="Record a real SDK session to a JSONL fixture.")
def cmd_record_session(
issue: int | None = typer.Option(None, "--issue"),
Expand Down
Loading
Loading