From 72040a2e03afb7f8f37840f25efdc2772e8b65a2 Mon Sep 17 00:00:00 2001
From: kmajdoub <majdoub.khalid@gmail.com>
Date: Thu, 28 May 2026 19:09:58 +0200
Subject: [PATCH] feat(manifestos): seed quality+testing manifestos for
 forge-loop (#135)

Dogfood the manifestos system on forge-loop itself by writing the seed
quality and testing manifestos that every future forge-loop change is
gated against.

quality-manifesto.md codifies five rules drawn from this week's
persistent-worker work: no shared module-level state (#100), typed
Protocol+Fake at every I/O boundary (#104), single Settings source of
truth (#98), typed events instead of untyped **fields (#99), and no
subprocess.run for SDK-able services (#103, #105). Each rule names the
concrete issue it came from so future contributors know the *why*.

testing-manifesto.md codifies six rules drawn from this week's
iteration-probe bugs: one test per state-machine edge plus a fallthrough
adversarial (would have caught #97/#120/#128), an adversarial test for
the false case of every external-dep assumption, both ==0 and !=0
branches for every subprocess.returncode (specifically #128), a
contract test pinning every Fake to its Real, hypothesis property
tests on >4-branch / user-input functions (#102), and an adversarial
test that every infinite-loop guard actually fires.

tests/test_manifestos_discovery.py is the meta-validation gate: it
discovers and parses both files, asserts each rule has a rationale,
asserts the spec-mandated issue references are present, and includes
adversarial tests that stubs and missing files are detectable. 22
tests, all pass.
---
 .forge/quality-manifesto.md        |  75 ++++++++++++++++
 .forge/testing-manifesto.md        |  88 +++++++++++++++++++
 .gitignore                         |   3 +
 tests/test_manifestos_discovery.py | 132 +++++++++++++++++++++++++++++
 4 files changed, 298 insertions(+)
 create mode 100644 .forge/quality-manifesto.md
 create mode 100644 .forge/testing-manifesto.md
 create mode 100644 tests/test_manifestos_discovery.py

diff --git a/.forge/quality-manifesto.md b/.forge/quality-manifesto.md
new file mode 100644
index 0000000..f533a06
--- /dev/null
+++ b/.forge/quality-manifesto.md
@@ -0,0 +1,75 @@
+# forge-loop quality manifesto (seed)
+
+This is the dogfood quality manifesto for forge-loop itself. Every future
+forge-loop change is gated by these rules. Each rule is followed by a
+rationale that names the iteration-probe bug or persistent-worker work
+that motivated it, so future contributors understand *why* the rule
+exists, not just what it says.
+
+## Rules
+
+### Q1. No shared mutable module-level state. Use a Container or per-instance State.
+
+Module-level globals (caches, counters, "current run" dicts, singleton
+queues) make tests order-dependent and make the worker un-restartable in
+process. All mutable state MUST hang off a `Container` or a per-instance
+`State`/`Runner` object that can be re-created cheaply.
+
+**Rationale:** see #100 (Runner class). The iteration-probe leaked state
+between sprints because the runner held onto module-level dicts; the
+persistent-worker refactor wedged that into per-instance state and the
+class of bug went away.
+
+### Q2. Every external I/O boundary lives behind a typed Protocol with a Fake for tests.
+
+If forge-loop talks to the network, the filesystem, a subprocess, an LLM
+API, or `gh`, the call MUST go through a `typing.Protocol` adapter with a
+companion `Fake*` implementation in `forge_loop/_testing/`. Tests use the
+Fake. Production wires the Real. No ad-hoc `httpx.get` / `subprocess.run`
+calls scattered through business logic.
+
+**Rationale:** see #104 (adapters). Untyped boundaries are exactly where
+mocks drift away from reality and where flaky tests breed.
+
+### Q3. Single config source of truth via `Settings`. No `os.environ.get` outside `settings.py`.
+
+Configuration is read in exactly one place: `forge_loop/config.py`'s
+`Settings` (or its successor `settings.py`). Reaching for
+`os.environ.get(...)` anywhere else is a lint-level violation. Tests
+override config by constructing a `Settings` instance, not by mutating
+`os.environ`.
+
+**Rationale:** see #98. The iteration probe found two different code
+paths reading the same env var with different defaults, producing
+divergent behaviour between the CLI and the worker.
+
+### Q4. Typed events for every state change. No untyped `**fields` for kinds that have a registered model.
+
+If an event `kind` has a registered pydantic / dataclass model, the
+event MUST be constructed via that model. Passing arbitrary `**fields`
+into a generic `emit()` for a known kind is forbidden — it defeats
+schema validation and lets typos through silently.
+
+**Rationale:** see #99. A misspelled field (`attempt_no` vs `attempt`)
+silently fell through `**fields` and produced empty dashboard rows for
+two days before anyone noticed.
+
+### Q5. No `subprocess.run` for SDK-able external services.
+
+If a service has a first-class Python SDK (Anthropic, GitHub), use the
+SDK. Shelling out to `claude` / `gh` from inside forge-loop is
+forbidden in new code. Existing shell-outs must migrate (#103 critic
+SDK, #105 GhClient).
+
+**Rationale:** subprocess wrappers hide returncode handling bugs
+(#128), eat structured errors, and make tests painful (you end up
+mocking argv strings instead of method calls).
+
+## How to apply this manifesto
+
+* When reviewing a forge-loop PR, scan the diff for each rule. A
+  violation is a blocking review comment, not a nit.
+* When the critic agent reviews a forge-loop PR, it loads this file and
+  flags violations as P0 findings.
+* When adding a new rule, add a rationale paragraph that names the
+  concrete issue or incident. No rationale ⇒ no rule.
diff --git a/.forge/testing-manifesto.md b/.forge/testing-manifesto.md
new file mode 100644
index 0000000..8179c50
--- /dev/null
+++ b/.forge/testing-manifesto.md
@@ -0,0 +1,88 @@
+# forge-loop testing manifesto (seed)
+
+This is the dogfood testing manifesto for forge-loop itself. It exists
+because three of this week's bugs (#97, #120, #128) all shared the same
+root cause: tests covered the happy edge of a transition and silently
+ignored the default / fallthrough branch. The rules below would have
+caught all three.
+
+## Rules
+
+### T1. State machine ⇒ ONE TEST PER EDGE + ONE fallthrough adversarial test per default-branch.
+
+If a function is a state machine — `match`, `if/elif/elif/else`, a
+dispatch table over an enum — there MUST be one test per edge AND one
+adversarial test for each `else` / default arm that asserts the
+fallthrough is reached on at least one realistic input that doesn't
+match any explicit branch.
+
+**Rationale:** would have caught #97, #120, AND #128. All three shipped
+because the test suite covered the explicit cases and assumed the
+default arm was unreachable.
+
+### T2. External-dep assumption ⇒ ONE adversarial test for the false case.
+
+Any time the production code asks an external system a yes/no question
+— "does `origin/<branch>` exist?", "is the `gh` CLI alive?", "is this
+file readable?" — there MUST be a test asserting correct behaviour when
+the answer is **no**. The happy "yes" test alone is not sufficient.
+
+**Rationale:** the iteration probe found ~6 places where forge-loop
+assumed an external check returned the answer it wanted, with no test
+for the negative branch.
+
+### T3. `subprocess.returncode` handling ⇒ test BOTH `==0` and `!=0` branches.
+
+Any call site that inspects `CompletedProcess.returncode` MUST have
+explicit tests for both the success (`==0`) and the failure (`!=0`)
+branches. Stderr-only failure modes (returncode 0 but stderr non-empty)
+also get a test if the production code looks at stderr.
+
+**Rationale:** see #128 specifically. The bug was a silent `!=0` branch
+that fell through to "success" because the test only exercised the
+returncode=0 path.
+
+### T4. Every Protocol Fake ⇒ a regression test that asserts the Real impl returns the same shape on representative inputs.
+
+For every `Fake*` adapter under `forge_loop/_testing/`, there MUST be a
+contract test that runs the **real** implementation on a representative
+input (a tmpdir, a tiny mock server, a recorded fixture) and asserts
+its output shape matches what the Fake produces. This keeps Fakes from
+drifting away from reality.
+
+**Rationale:** Fakes that diverge from Reals produce tests that pass
+locally and fail in production. The contract test is cheap insurance.
+
+### T5. Property-based tests on any function with >4 branches OR any function consuming user input.
+
+If a pure function has more than four distinct branches, OR if it
+consumes any user-supplied string / bytes (CLI arg, env var, file
+content, network payload), it MUST have a `hypothesis` property-based
+test. The property need not be deep — "does not raise on any
+`st.text()`" is acceptable — but it MUST exist.
+
+**Rationale:** see #102. A property test caught a surrogate-codepoint
+crash in the fingerprint function that no example-based test would
+have surfaced.
+
+### T6. Every infinite-loop guard ⇒ adversarial test that the guard actually fires.
+
+Any `while True`, recursive descent, retry loop, or polling loop with a
+max-iteration / max-attempts / deadline guard MUST have an adversarial
+test that drives the function past the guard and asserts the guard
+fires (raises, breaks, returns) instead of looping forever. The test
+runs under a wall-clock timeout so a regression hangs the test, not
+CI.
+
+**Rationale:** "trust me, this loop terminates" has cost the project
+two outages. The guard is part of the contract; test it.
+
+## How to apply this manifesto
+
+* Every new test file is reviewed against these six rules. Missing
+  coverage of a rule that applies to the diff is a blocking review
+  comment.
+* The critic agent loads this file when reviewing forge-loop PRs and
+  flags missing branches as P0 findings.
+* New rules require a named incident or issue number in the rationale,
+  same as the quality manifesto.
diff --git a/.gitignore b/.gitignore
index cb9a0cd..36b080e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,5 +24,8 @@ docs/ops/loop-runner.force-retry.json
 # Per-operator overrides + multi-repo dir + worker-planted settings
 forge-loop.local.yaml
 .forge/
+# ...except the seed manifestos, which ARE committed (dogfood, #135).
+!.forge/quality-manifesto.md
+!.forge/testing-manifesto.md
 .claude/settings.json
 .claude/settings.local.json
diff --git a/tests/test_manifestos_discovery.py b/tests/test_manifestos_discovery.py
new file mode 100644
index 0000000..b4e6ee3
--- /dev/null
+++ b/tests/test_manifestos_discovery.py
@@ -0,0 +1,132 @@
+"""Tests that the seed quality + testing manifestos exist, are
+discoverable from the repo root, and parse as well-formed markdown
+with the rule structure the rest of the system expects.
+
+These tests are the meta-validation gate for issue #135 — they ensure
+the manifestos themselves remain present and structurally sound. If
+someone deletes or guts the manifestos, this test fails.
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+import pytest
+
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+FORGE_DIR = REPO_ROOT / ".forge"
+
+QUALITY_PATH = FORGE_DIR / "quality-manifesto.md"
+TESTING_PATH = FORGE_DIR / "testing-manifesto.md"
+
+
+# ---------------------------------------------------------------------------
+# Discovery — happy path
+# ---------------------------------------------------------------------------
+
+def test_forge_dir_exists() -> None:
+    assert FORGE_DIR.is_dir(), f"missing {FORGE_DIR}"
+
+
+def test_quality_manifesto_discoverable() -> None:
+    assert QUALITY_PATH.is_file(), f"missing {QUALITY_PATH}"
+
+
+def test_testing_manifesto_discoverable() -> None:
+    assert TESTING_PATH.is_file(), f"missing {TESTING_PATH}"
+
+
+# ---------------------------------------------------------------------------
+# Parsing — both files have an H1 title and at least one rule heading
+# ---------------------------------------------------------------------------
+
+H1_RE = re.compile(r"^# .+", re.MULTILINE)
+RULE_HEADING_RE = re.compile(r"^### [QT]\d+\. ", re.MULTILINE)
+
+
+@pytest.mark.parametrize("path", [QUALITY_PATH, TESTING_PATH])
+def test_manifesto_has_h1(path: Path) -> None:
+    text = path.read_text(encoding="utf-8")
+    assert H1_RE.search(text), f"{path} missing H1 title"
+
+
+@pytest.mark.parametrize(
+    "path,min_rules",
+    [(QUALITY_PATH, 5), (TESTING_PATH, 6)],
+)
+def test_manifesto_has_rules(path: Path, min_rules: int) -> None:
+    text = path.read_text(encoding="utf-8")
+    rules = RULE_HEADING_RE.findall(text)
+    assert len(rules) >= min_rules, (
+        f"{path} expected ≥{min_rules} rule headings, got {len(rules)}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Content — every rule has a rationale section, per the contract in
+# both manifestos ("No rationale ⇒ no rule").
+# ---------------------------------------------------------------------------
+
+@pytest.mark.parametrize("path", [QUALITY_PATH, TESTING_PATH])
+def test_every_rule_has_rationale(path: Path) -> None:
+    text = path.read_text(encoding="utf-8")
+    # Split on rule headings, drop preamble.
+    chunks = re.split(r"(?m)^### [QT]\d+\. ", text)[1:]
+    assert chunks, f"{path} parsed zero rule chunks"
+    missing = [c.splitlines()[0] for c in chunks if "Rationale" not in c]
+    assert not missing, (
+        f"{path} rules missing rationale: {missing}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Content — quality manifesto names the issues from the spec.
+# ---------------------------------------------------------------------------
+
+REQUIRED_QUALITY_REFS = ["#100", "#104", "#98", "#99", "#103", "#105"]
+
+
+@pytest.mark.parametrize("ref", REQUIRED_QUALITY_REFS)
+def test_quality_manifesto_references_spec_issue(ref: str) -> None:
+    text = QUALITY_PATH.read_text(encoding="utf-8")
+    assert ref in text, f"quality manifesto missing reference to {ref}"
+
+
+# ---------------------------------------------------------------------------
+# Content — testing manifesto names the iteration-probe bugs from the spec.
+# ---------------------------------------------------------------------------
+
+REQUIRED_TESTING_REFS = ["#97", "#120", "#128", "#102"]
+
+
+@pytest.mark.parametrize("ref", REQUIRED_TESTING_REFS)
+def test_testing_manifesto_references_iteration_probe_bug(ref: str) -> None:
+    text = TESTING_PATH.read_text(encoding="utf-8")
+    assert ref in text, f"testing manifesto missing reference to {ref}"
+
+
+# ---------------------------------------------------------------------------
+# Adversarial — manifestos are non-empty and not stub placeholders.
+# ---------------------------------------------------------------------------
+
+@pytest.mark.parametrize("path", [QUALITY_PATH, TESTING_PATH])
+def test_manifesto_is_substantive(path: Path) -> None:
+    text = path.read_text(encoding="utf-8")
+    assert len(text) > 1000, f"{path} too short to be a real manifesto ({len(text)} bytes)"
+    lowered = text.lower()
+    for stub in ("tbd", "todo", "lorem ipsum", "coming soon"):
+        assert stub not in lowered, f"{path} contains stub marker: {stub!r}"
+
+
+# ---------------------------------------------------------------------------
+# Adversarial — discovery must not silently succeed on a missing file.
+# This guards the discovery helpers we'd build on top.
+# ---------------------------------------------------------------------------
+
+def test_missing_manifesto_path_is_detectable(tmp_path: Path) -> None:
+    fake = tmp_path / ".forge" / "does-not-exist.md"
+    assert not fake.is_file()
+    with pytest.raises(FileNotFoundError):
+        fake.read_text(encoding="utf-8")