From 8c7069c690f20eb942c4de26381505463351a8c7 Mon Sep 17 00:00:00 2001 From: kmajdoub Date: Thu, 28 May 2026 17:46:12 +0200 Subject: [PATCH] hot-fix(iteration): two reliability bugs caught dogfooding the brainstormer epic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CTO observed: "the loop is not reliable. How come maintenance didn't detect the issue?" — two bugs ganged up on forge-loop #125/#126 to make the dispatcher re-pick the same broken issue every tick. ## Bug 1: probe misclassifies "never pushed" as PUSHED_NO_PR Pre-fix flow when origin/ doesn't exist: - ``git rev-list origin/branch..HEAD`` returns non-zero exit code - The fallback ``ahead = 0`` (NOT the -1 sentinel) was triggered - ``ahead == 0`` returned PUSHED_NO_PR → ``open_pr`` brief - Worker tried to ``gh pr create`` on a branch GitHub couldn't see - Iteration loop spun until exhausted Fix: explicitly verify the remote ref exists via ``git rev-parse --verify --quiet refs/remotes/origin/`` BEFORE the ahead-count. If origin/ isn't present, the probe now routes to the COMMITTED_NOT_PUSHED fallback (which dispatches the ``push`` brief — the correct next step). ## Bug 2: escalate_to_human added loop:needs-human WITHOUT removing loop:ready, so the dispatcher kept picking the same exhausted issue every subsequent tick. Fix: one gh issue edit call now carries both ``--add-label loop:needs-human --remove-label loop:ready``. ## Tests: tests/test_iteration_pushed_no_pr_fix.py — 5 new - ``test_no_origin_branch_returns_committed_not_pushed_not_pushed_no_pr`` is the headline regression pin. - ``test_origin_branch_exists_with_local_match_returns_pushed_no_pr`` pins the legitimate PUSHED_NO_PR path still works. - ``test_origin_branch_exists_with_local_ahead_returns_committed_not_pushed`` pins the legitimate COMMITTED_NOT_PUSHED path. - ``test_escalate_removes_loop_ready_in_same_call`` pins both labels move together. - ``test_escalate_returns_false_on_subprocess_error`` pins the failure mode (network down) doesn't crash the iteration loop. Updated tests for the new probe call: - tests/test_iteration_probe.py — add ``git rev-parse --verify`` mock output where the test asserts on origin-exists path; update the gh-failure test to assert the new (safer) CLEAN_NOTHING verdict instead of the old buggy PUSHED_NO_PR. - tests/test_runner_multi_iteration.py — Scripted mock now scans for ``--add-label X`` (the label the test cares about) instead of ``args[-1]`` (which the new dual-label call moved off the end). Full suite: 783 passed (was 766), 14 pre-existing failures unchanged. ## NOT in scope (follow-up) The maintenance daemon today is an LLM-based backlog groomer — it does NOT scan for stuck issues. A separate ticket should add a "stuck-issue sweep" that demotes any loop:ready issue with N consecutive ``worker_iterations_exhausted`` events. The CTO question "how come maintenance didn't detect the issue?" exposes a real gap that needs its own ticket — file as a forge-loop epic-or-bug. --- src/forge_loop/runner/iteration.py | 43 +++++- tests/test_iteration_probe.py | 16 +- tests/test_iteration_pushed_no_pr_fix.py | 180 +++++++++++++++++++++++ tests/test_runner_multi_iteration.py | 10 +- 4 files changed, 238 insertions(+), 11 deletions(-) create mode 100644 tests/test_iteration_pushed_no_pr_fix.py diff --git a/src/forge_loop/runner/iteration.py b/src/forge_loop/runner/iteration.py index 59c11d4..a92e8a0 100644 --- a/src/forge_loop/runner/iteration.py +++ b/src/forge_loop/runner/iteration.py @@ -188,16 +188,37 @@ def probe_worker_state( pass # 4. Local commits ahead of origin? - ahead = 0 + # First: does origin/ actually exist? If not, the branch + # was never pushed and we must NOT fall through to a "0 ahead" + # verdict. Before this check the probe misclassified locally- + # committed-but-not-pushed branches as PUSHED_NO_PR, driving the + # iteration loop into an open_pr brief on a branch GitHub can't + # see. Dogfood-caught on forge-loop #125 and #126. + origin_exists = False try: - r = run( - ["git", "rev-list", "--count", f"origin/{branch}..HEAD"], + r_remote = run( + ["git", "rev-parse", "--verify", "--quiet", f"refs/remotes/origin/{branch}"], worktree, ) - ahead = int(r.stdout.strip() or "0") if r.returncode == 0 else 0 - except (subprocess.SubprocessError, ValueError, OSError): - # Likely no remote-tracking branch yet (push never happened). - ahead = -1 # sentinel: branch may not exist on origin + origin_exists = r_remote.returncode == 0 and bool(r_remote.stdout.strip()) + except (subprocess.SubprocessError, OSError): + origin_exists = False + + ahead = 0 + if origin_exists: + try: + r = run( + ["git", "rev-list", "--count", f"origin/{branch}..HEAD"], + worktree, + ) + ahead = int(r.stdout.strip() or "0") if r.returncode == 0 else 0 + except (subprocess.SubprocessError, ValueError, OSError): + ahead = -1 # transient git failure — treat as unknown + else: + # Branch isn't on origin yet — sentinel = -1 routes to the + # local-commits-no-upstream fallback further down (which returns + # COMMITTED_NOT_PUSHED). PUSHED_NO_PR cannot be reached here. + ahead = -1 # 5. With a PR — classify the PR-side state. if pr_view and pr_view.get("state") == "OPEN": @@ -529,6 +550,12 @@ def escalate_to_human( Best-effort. Returns True if at least the label landed; the comment is cosmetic. Never raises. """ + # Add loop:needs-human AND remove loop:ready in one call so the + # dispatcher stops re-picking the issue on the next tick. Before + # this, escalated issues kept reappearing in `top_issues` because + # only the needs-human label was added — loop:ready survived. + # Dogfood-caught on forge-loop #125/#126 (CTO observed loop "not + # reliable" — same stuck issue served on every tick). label_ok = False try: r = run( @@ -541,6 +568,8 @@ def escalate_to_human( repo, "--add-label", "loop:needs-human", + "--remove-label", + "loop:ready", ], worktree, ) diff --git a/tests/test_iteration_probe.py b/tests/test_iteration_probe.py index 5933bae..735e796 100644 --- a/tests/test_iteration_probe.py +++ b/tests/test_iteration_probe.py @@ -199,6 +199,9 @@ def test_committed_not_pushed(worktree: Path) -> None: run = _make_fake_run( { ("gh", "pr", "list"): _completed("[]"), + # origin/ exists (new probe added by the pushed_no_pr + # misclassification hot-fix); rev-list returns 2 ahead. + ("git", "rev-parse", "--verify"): _completed("abc123\n"), ("git", "status", "--porcelain"): _completed(""), ("git", "rev-list", "--count"): _completed("2\n"), } @@ -211,6 +214,8 @@ def test_pushed_no_pr(worktree: Path) -> None: run = _make_fake_run( { ("gh", "pr", "list"): _completed("[]"), + # origin/ exists AND local matches it (ahead=0). + ("git", "rev-parse", "--verify"): _completed("abc123\n"), ("git", "status", "--porcelain"): _completed(""), ("git", "rev-list", "--count"): _completed("0\n"), } @@ -228,7 +233,13 @@ def test_clean_nothing_when_worktree_missing(tmp_path: Path) -> None: def test_gh_failure_degrades_gracefully(worktree: Path) -> None: - """Adversarial: gh subprocess errors → fall through, don't crash.""" + """Adversarial: gh subprocess errors → fall through, don't crash. + + After the pushed_no_pr hot-fix, the probe is more conservative when + origin/ can't be verified. With no PR seen AND no proof of + a remote ref AND no local commits, we return CLEAN_NOTHING. This + is SAFER than the old "guess PUSHED_NO_PR" verdict — the worker + won't try to open a PR on a branch that doesn't exist remotely.""" def _run(args, _cwd): if args[0] == "gh": @@ -240,8 +251,7 @@ def _run(args, _cwd): return _completed("") state, _ = probe_worker_state(worktree, "b", "owner/r", 10, run=_run) - # gh died → no PR seen → fall through to pushed_no_pr (ahead == 0). - assert state == WorkerState.PUSHED_NO_PR + assert state == WorkerState.CLEAN_NOTHING # --------------------------------------------------------------------------- diff --git a/tests/test_iteration_pushed_no_pr_fix.py b/tests/test_iteration_pushed_no_pr_fix.py new file mode 100644 index 0000000..937c016 --- /dev/null +++ b/tests/test_iteration_pushed_no_pr_fix.py @@ -0,0 +1,180 @@ +"""Tests for the pushed_no_pr misclassification fix + the escalate +loop:ready-removal fix (hot-fix). + +Both bugs surfaced dogfooding the brainstormer epic on forge-loop's +own backlog. The CTO observed "loop is not reliable" — the iteration +probe was claiming branches were pushed when they weren't, AND the +escalate_to_human path added loop:needs-human without removing +loop:ready, so the dispatcher kept re-picking the same broken issue +every tick. +""" + +from __future__ import annotations + +import json +import subprocess +from pathlib import Path + +import pytest + +from forge_loop.runner.iteration import ( + WorkerState, + escalate_to_human, + probe_worker_state, +) + + +def _completed(stdout: str = "", returncode: int = 0) -> subprocess.CompletedProcess[str]: + return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr="") + + +@pytest.fixture() +def worktree(tmp_path: Path) -> Path: + wt = tmp_path / "wt" + wt.mkdir() + (wt / ".git").mkdir() + return wt + + +# --------------------------------------------------------------------------- +# Probe — origin/ existence check +# --------------------------------------------------------------------------- + + +def test_no_origin_branch_returns_committed_not_pushed_not_pushed_no_pr(worktree: Path) -> None: + """Headline regression pin — the probe must NOT return PUSHED_NO_PR + when origin/ doesn't exist. Pre-fix: rev-list failed + silently, ahead defaulted to 0, returned PUSHED_NO_PR → open_pr + brief → worker tried to open PR on a branch GitHub couldn't see → + iteration loop spun forever.""" + calls: list[list[str]] = [] + + def fake_run(args, _cwd): + calls.append(list(args)) + if args[:3] == ["gh", "pr", "list"]: + return _completed("[]") # no PR + if args[:2] == ["git", "fetch"]: + # Branch doesn't exist on origin — fetch fails. + return _completed("", returncode=128) + if args[:1] == ["git"] and "rev-parse" in args: + # The new ref-existence probe — origin ref missing. + return _completed("", returncode=128) + if args[:1] == ["git"] and "status" in args: + return _completed("") # clean worktree + if args[:1] == ["git"] and "log" in args: + return _completed("abc123def456") # has local commits + if args[:1] == ["git"] and "rev-list" in args: + # Defensive: if this were called (it shouldn't, because + # origin doesn't exist), it'd fail. + return _completed("", returncode=128) + return _completed("") + + state, _ = probe_worker_state(worktree, "loop/999-nonexistent", "o/r", 999, run=fake_run) + # Must be COMMITTED_NOT_PUSHED (the fallback for "local commits but + # no upstream"), NOT PUSHED_NO_PR. + assert state == WorkerState.COMMITTED_NOT_PUSHED, ( + f"got {state.value!r}; this is the exact regression that wedged the " + f"iteration loop on forge-loop #125/#126." + ) + + +def test_origin_branch_exists_with_local_match_returns_pushed_no_pr(worktree: Path) -> None: + """Sanity: when origin/ DOES exist and local matches it + (ahead=0), we still return PUSHED_NO_PR. The fix only narrows + the misclassification — it doesn't break the legitimate path.""" + def fake_run(args, _cwd): + if args[:3] == ["gh", "pr", "list"]: + return _completed("[]") + if args[:2] == ["git", "fetch"]: + return _completed("") + if args[:1] == ["git"] and "rev-parse" in args: + # origin/ exists + return _completed("abc123\n", returncode=0) + if args[:1] == ["git"] and "status" in args: + return _completed("") + if args[:1] == ["git"] and "rev-list" in args: + return _completed("0") # local matches origin + return _completed("") + + state, _ = probe_worker_state(worktree, "loop/1-real", "o/r", 1, run=fake_run) + assert state == WorkerState.PUSHED_NO_PR + + +def test_origin_branch_exists_with_local_ahead_returns_committed_not_pushed(worktree: Path) -> None: + """Sanity: real "ahead by N commits" still routes to push brief.""" + def fake_run(args, _cwd): + if args[:3] == ["gh", "pr", "list"]: + return _completed("[]") + if args[:2] == ["git", "fetch"]: + return _completed("") + if args[:1] == ["git"] and "rev-parse" in args: + return _completed("abc123\n", returncode=0) + if args[:1] == ["git"] and "status" in args: + return _completed("") + if args[:1] == ["git"] and "rev-list" in args: + return _completed("3") # 3 commits ahead + return _completed("") + + state, _ = probe_worker_state(worktree, "loop/1-real", "o/r", 1, run=fake_run) + assert state == WorkerState.COMMITTED_NOT_PUSHED + + +# --------------------------------------------------------------------------- +# escalate_to_human — must REMOVE loop:ready, not just add loop:needs-human +# --------------------------------------------------------------------------- + + +def test_escalate_removes_loop_ready_in_same_call(worktree: Path) -> None: + """The headline reliability fix — adding loop:needs-human without + removing loop:ready meant the dispatcher kept picking the same + broken issue every tick, forever. The CTO noticed: 'the loop is + not reliable.' This pin asserts the labels move together.""" + invocations: list[list[str]] = [] + + def fake_run(args, _cwd): + invocations.append(list(args)) + return _completed("ok", returncode=0) + + ok = escalate_to_human( + issue_n=42, + repo="o/r", + state=WorkerState.COMMITTED_NOT_PUSHED, + worktree=worktree, + pr_url=None, + run=fake_run, + ) + assert ok + + # The gh issue edit invocation must carry BOTH --add-label + # loop:needs-human AND --remove-label loop:ready in the same call, + # so the dispatcher sees a consistent state on the next tick. + edit_call = next( + (c for c in invocations if c[:3] == ["gh", "issue", "edit"]), + None, + ) + assert edit_call is not None, "escalate_to_human must call gh issue edit" + assert "--add-label" in edit_call + assert "loop:needs-human" in edit_call + assert "--remove-label" in edit_call, ( + "escalate_to_human MUST remove loop:ready — without this the " + "dispatcher re-picks the issue every tick." + ) + assert "loop:ready" in edit_call + + +def test_escalate_returns_false_on_subprocess_error(worktree: Path) -> None: + """Adversarial: a network/gh failure during escalation must not + crash the iteration loop. Returns False so the caller knows the + operator label wasn't applied.""" + def fake_run(args, _cwd): + raise subprocess.SubprocessError("network down") + + ok = escalate_to_human( + issue_n=42, + repo="o/r", + state=WorkerState.COMMITTED_NOT_PUSHED, + worktree=worktree, + pr_url=None, + run=fake_run, + ) + assert ok is False diff --git a/tests/test_runner_multi_iteration.py b/tests/test_runner_multi_iteration.py index 7a82732..a2e01b5 100644 --- a/tests/test_runner_multi_iteration.py +++ b/tests/test_runner_multi_iteration.py @@ -93,7 +93,15 @@ def _run(args, _cwd): self.automerge_calls.append(args[3]) return _cp("", 0) if args[0] == "gh" and args[1] == "issue" and args[2] == "edit": - self.label_calls.append((int(args[3]), args[-1])) + # The escalation hot-fix calls `--add-label X --remove-label Y` + # in a single invocation. Capture the ADDED label (the one + # the test cares about) by scanning for --add-label. + added = "" + for i, a in enumerate(args): + if a == "--add-label" and i + 1 < len(args): + added = args[i + 1] + break + self.label_calls.append((int(args[3]), added or args[-1])) return _cp("", 0) if args[0] == "git" and args[1] == "rev-parse": return _cp("loop/78-x\n", 0)