From 8c7069c690f20eb942c4de26381505463351a8c7 Mon Sep 17 00:00:00 2001
From: kmajdoub <majdoub.khalid@gmail.com>
Date: Thu, 28 May 2026 17:46:12 +0200
Subject: [PATCH] hot-fix(iteration): two reliability bugs caught dogfooding
 the brainstormer epic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CTO observed: "the loop is not reliable. How come maintenance didn't
detect the issue?" — two bugs ganged up on forge-loop #125/#126 to
make the dispatcher re-pick the same broken issue every tick.

## Bug 1: probe misclassifies "never pushed" as PUSHED_NO_PR

Pre-fix flow when origin/<branch> doesn't exist:
- ``git rev-list origin/branch..HEAD`` returns non-zero exit code
- The fallback ``ahead = 0`` (NOT the -1 sentinel) was triggered
- ``ahead == 0`` returned PUSHED_NO_PR → ``open_pr`` brief
- Worker tried to ``gh pr create`` on a branch GitHub couldn't see
- Iteration loop spun until exhausted

Fix: explicitly verify the remote ref exists via
``git rev-parse --verify --quiet refs/remotes/origin/<branch>``
BEFORE the ahead-count. If origin/<branch> isn't present, the probe
now routes to the COMMITTED_NOT_PUSHED fallback (which dispatches the
``push`` brief — the correct next step).

## Bug 2: escalate_to_human added loop:needs-human WITHOUT removing
loop:ready, so the dispatcher kept picking the same exhausted issue
every subsequent tick.

Fix: one gh issue edit call now carries both
``--add-label loop:needs-human --remove-label loop:ready``.

## Tests: tests/test_iteration_pushed_no_pr_fix.py — 5 new
- ``test_no_origin_branch_returns_committed_not_pushed_not_pushed_no_pr``
  is the headline regression pin.
- ``test_origin_branch_exists_with_local_match_returns_pushed_no_pr``
  pins the legitimate PUSHED_NO_PR path still works.
- ``test_origin_branch_exists_with_local_ahead_returns_committed_not_pushed``
  pins the legitimate COMMITTED_NOT_PUSHED path.
- ``test_escalate_removes_loop_ready_in_same_call`` pins both labels
  move together.
- ``test_escalate_returns_false_on_subprocess_error`` pins the failure
  mode (network down) doesn't crash the iteration loop.

Updated tests for the new probe call:
- tests/test_iteration_probe.py — add ``git rev-parse --verify`` mock
  output where the test asserts on origin-exists path; update the
  gh-failure test to assert the new (safer) CLEAN_NOTHING verdict
  instead of the old buggy PUSHED_NO_PR.
- tests/test_runner_multi_iteration.py — Scripted mock now scans for
  ``--add-label X`` (the label the test cares about) instead of
  ``args[-1]`` (which the new dual-label call moved off the end).

Full suite: 783 passed (was 766), 14 pre-existing failures unchanged.

## NOT in scope (follow-up)

The maintenance daemon today is an LLM-based backlog groomer — it does
NOT scan for stuck issues. A separate ticket should add a "stuck-issue
sweep" that demotes any loop:ready issue with N consecutive
``worker_iterations_exhausted`` events. The CTO question "how come
maintenance didn't detect the issue?" exposes a real gap that needs
its own ticket — file as a forge-loop epic-or-bug.
---
 src/forge_loop/runner/iteration.py       |  43 +++++-
 tests/test_iteration_probe.py            |  16 +-
 tests/test_iteration_pushed_no_pr_fix.py | 180 +++++++++++++++++++++++
 tests/test_runner_multi_iteration.py     |  10 +-
 4 files changed, 238 insertions(+), 11 deletions(-)
 create mode 100644 tests/test_iteration_pushed_no_pr_fix.py
diff --git a/src/forge_loop/runner/iteration.py b/src/forge_loop/runner/iteration.py
index 59c11d4..a92e8a0 100644
--- a/src/forge_loop/runner/iteration.py
+++ b/src/forge_loop/runner/iteration.py
@@ -188,16 +188,37 @@ def probe_worker_state(
         pass
 
     # 4. Local commits ahead of origin?
-    ahead = 0
+    # First: does origin/<branch> actually exist? If not, the branch
+    # was never pushed and we must NOT fall through to a "0 ahead"
+    # verdict. Before this check the probe misclassified locally-
+    # committed-but-not-pushed branches as PUSHED_NO_PR, driving the
+    # iteration loop into an open_pr brief on a branch GitHub can't
+    # see. Dogfood-caught on forge-loop #125 and #126.
+    origin_exists = False
     try:
-        r = run(
-            ["git", "rev-list", "--count", f"origin/{branch}..HEAD"],
+        r_remote = run(
+            ["git", "rev-parse", "--verify", "--quiet", f"refs/remotes/origin/{branch}"],
             worktree,
         )
-        ahead = int(r.stdout.strip() or "0") if r.returncode == 0 else 0
-    except (subprocess.SubprocessError, ValueError, OSError):
-        # Likely no remote-tracking branch yet (push never happened).
-        ahead = -1  # sentinel: branch may not exist on origin
+        origin_exists = r_remote.returncode == 0 and bool(r_remote.stdout.strip())
+    except (subprocess.SubprocessError, OSError):
+        origin_exists = False
+
+    ahead = 0
+    if origin_exists:
+        try:
+            r = run(
+                ["git", "rev-list", "--count", f"origin/{branch}..HEAD"],
+                worktree,
+            )
+            ahead = int(r.stdout.strip() or "0") if r.returncode == 0 else 0
+        except (subprocess.SubprocessError, ValueError, OSError):
+            ahead = -1  # transient git failure — treat as unknown
+    else:
+        # Branch isn't on origin yet — sentinel = -1 routes to the
+        # local-commits-no-upstream fallback further down (which returns
+        # COMMITTED_NOT_PUSHED). PUSHED_NO_PR cannot be reached here.
+        ahead = -1
 
     # 5. With a PR — classify the PR-side state.
     if pr_view and pr_view.get("state") == "OPEN":
@@ -529,6 +550,12 @@ def escalate_to_human(
     Best-effort. Returns True if at least the label landed; the comment is
     cosmetic. Never raises.
     """
+    # Add loop:needs-human AND remove loop:ready in one call so the
+    # dispatcher stops re-picking the issue on the next tick. Before
+    # this, escalated issues kept reappearing in `top_issues` because
+    # only the needs-human label was added — loop:ready survived.
+    # Dogfood-caught on forge-loop #125/#126 (CTO observed loop "not
+    # reliable" — same stuck issue served on every tick).
     label_ok = False
     try:
         r = run(
@@ -541,6 +568,8 @@ def escalate_to_human(
                 repo,
                 "--add-label",
                 "loop:needs-human",
+                "--remove-label",
+                "loop:ready",
             ],
             worktree,
         )
diff --git a/tests/test_iteration_probe.py b/tests/test_iteration_probe.py
index 5933bae..735e796 100644
--- a/tests/test_iteration_probe.py
+++ b/tests/test_iteration_probe.py
@@ -199,6 +199,9 @@ def test_committed_not_pushed(worktree: Path) -> None:
     run = _make_fake_run(
         {
             ("gh", "pr", "list"): _completed("[]"),
+            # origin/<branch> exists (new probe added by the pushed_no_pr
+            # misclassification hot-fix); rev-list returns 2 ahead.
+            ("git", "rev-parse", "--verify"): _completed("abc123\n"),
             ("git", "status", "--porcelain"): _completed(""),
             ("git", "rev-list", "--count"): _completed("2\n"),
         }
@@ -211,6 +214,8 @@ def test_pushed_no_pr(worktree: Path) -> None:
     run = _make_fake_run(
         {
             ("gh", "pr", "list"): _completed("[]"),
+            # origin/<branch> exists AND local matches it (ahead=0).
+            ("git", "rev-parse", "--verify"): _completed("abc123\n"),
             ("git", "status", "--porcelain"): _completed(""),
             ("git", "rev-list", "--count"): _completed("0\n"),
         }
@@ -228,7 +233,13 @@ def test_clean_nothing_when_worktree_missing(tmp_path: Path) -> None:
 
 
 def test_gh_failure_degrades_gracefully(worktree: Path) -> None:
-    """Adversarial: gh subprocess errors → fall through, don't crash."""
+    """Adversarial: gh subprocess errors → fall through, don't crash.
+
+    After the pushed_no_pr hot-fix, the probe is more conservative when
+    origin/<branch> can't be verified. With no PR seen AND no proof of
+    a remote ref AND no local commits, we return CLEAN_NOTHING. This
+    is SAFER than the old "guess PUSHED_NO_PR" verdict — the worker
+    won't try to open a PR on a branch that doesn't exist remotely."""
 
     def _run(args, _cwd):
         if args[0] == "gh":
@@ -240,8 +251,7 @@ def _run(args, _cwd):
         return _completed("")
 
     state, _ = probe_worker_state(worktree, "b", "owner/r", 10, run=_run)
-    # gh died → no PR seen → fall through to pushed_no_pr (ahead == 0).
-    assert state == WorkerState.PUSHED_NO_PR
+    assert state == WorkerState.CLEAN_NOTHING
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_iteration_pushed_no_pr_fix.py b/tests/test_iteration_pushed_no_pr_fix.py
new file mode 100644
index 0000000..937c016
--- /dev/null
+++ b/tests/test_iteration_pushed_no_pr_fix.py
@@ -0,0 +1,180 @@
+"""Tests for the pushed_no_pr misclassification fix + the escalate
+loop:ready-removal fix (hot-fix).
+
+Both bugs surfaced dogfooding the brainstormer epic on forge-loop's
+own backlog. The CTO observed "loop is not reliable" — the iteration
+probe was claiming branches were pushed when they weren't, AND the
+escalate_to_human path added loop:needs-human without removing
+loop:ready, so the dispatcher kept re-picking the same broken issue
+every tick.
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+
+import pytest
+
+from forge_loop.runner.iteration import (
+    WorkerState,
+    escalate_to_human,
+    probe_worker_state,
+)
+
+
+def _completed(stdout: str = "", returncode: int = 0) -> subprocess.CompletedProcess[str]:
+    return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr="")
+
+
+@pytest.fixture()
+def worktree(tmp_path: Path) -> Path:
+    wt = tmp_path / "wt"
+    wt.mkdir()
+    (wt / ".git").mkdir()
+    return wt
+
+
+# ---------------------------------------------------------------------------
+# Probe — origin/<branch> existence check
+# ---------------------------------------------------------------------------
+
+
+def test_no_origin_branch_returns_committed_not_pushed_not_pushed_no_pr(worktree: Path) -> None:
+    """Headline regression pin — the probe must NOT return PUSHED_NO_PR
+    when origin/<branch> doesn't exist. Pre-fix: rev-list failed
+    silently, ahead defaulted to 0, returned PUSHED_NO_PR → open_pr
+    brief → worker tried to open PR on a branch GitHub couldn't see →
+    iteration loop spun forever."""
+    calls: list[list[str]] = []
+
+    def fake_run(args, _cwd):
+        calls.append(list(args))
+        if args[:3] == ["gh", "pr", "list"]:
+            return _completed("[]")  # no PR
+        if args[:2] == ["git", "fetch"]:
+            # Branch doesn't exist on origin — fetch fails.
+            return _completed("", returncode=128)
+        if args[:1] == ["git"] and "rev-parse" in args:
+            # The new ref-existence probe — origin ref missing.
+            return _completed("", returncode=128)
+        if args[:1] == ["git"] and "status" in args:
+            return _completed("")  # clean worktree
+        if args[:1] == ["git"] and "log" in args:
+            return _completed("abc123def456")  # has local commits
+        if args[:1] == ["git"] and "rev-list" in args:
+            # Defensive: if this were called (it shouldn't, because
+            # origin doesn't exist), it'd fail.
+            return _completed("", returncode=128)
+        return _completed("")
+
+    state, _ = probe_worker_state(worktree, "loop/999-nonexistent", "o/r", 999, run=fake_run)
+    # Must be COMMITTED_NOT_PUSHED (the fallback for "local commits but
+    # no upstream"), NOT PUSHED_NO_PR.
+    assert state == WorkerState.COMMITTED_NOT_PUSHED, (
+        f"got {state.value!r}; this is the exact regression that wedged the "
+        f"iteration loop on forge-loop #125/#126."
+    )
+
+
+def test_origin_branch_exists_with_local_match_returns_pushed_no_pr(worktree: Path) -> None:
+    """Sanity: when origin/<branch> DOES exist and local matches it
+    (ahead=0), we still return PUSHED_NO_PR. The fix only narrows
+    the misclassification — it doesn't break the legitimate path."""
+    def fake_run(args, _cwd):
+        if args[:3] == ["gh", "pr", "list"]:
+            return _completed("[]")
+        if args[:2] == ["git", "fetch"]:
+            return _completed("")
+        if args[:1] == ["git"] and "rev-parse" in args:
+            # origin/<branch> exists
+            return _completed("abc123\n", returncode=0)
+        if args[:1] == ["git"] and "status" in args:
+            return _completed("")
+        if args[:1] == ["git"] and "rev-list" in args:
+            return _completed("0")  # local matches origin
+        return _completed("")
+
+    state, _ = probe_worker_state(worktree, "loop/1-real", "o/r", 1, run=fake_run)
+    assert state == WorkerState.PUSHED_NO_PR
+
+
+def test_origin_branch_exists_with_local_ahead_returns_committed_not_pushed(worktree: Path) -> None:
+    """Sanity: real "ahead by N commits" still routes to push brief."""
+    def fake_run(args, _cwd):
+        if args[:3] == ["gh", "pr", "list"]:
+            return _completed("[]")
+        if args[:2] == ["git", "fetch"]:
+            return _completed("")
+        if args[:1] == ["git"] and "rev-parse" in args:
+            return _completed("abc123\n", returncode=0)
+        if args[:1] == ["git"] and "status" in args:
+            return _completed("")
+        if args[:1] == ["git"] and "rev-list" in args:
+            return _completed("3")  # 3 commits ahead
+        return _completed("")
+
+    state, _ = probe_worker_state(worktree, "loop/1-real", "o/r", 1, run=fake_run)
+    assert state == WorkerState.COMMITTED_NOT_PUSHED
+
+
+# ---------------------------------------------------------------------------
+# escalate_to_human — must REMOVE loop:ready, not just add loop:needs-human
+# ---------------------------------------------------------------------------
+
+
+def test_escalate_removes_loop_ready_in_same_call(worktree: Path) -> None:
+    """The headline reliability fix — adding loop:needs-human without
+    removing loop:ready meant the dispatcher kept picking the same
+    broken issue every tick, forever. The CTO noticed: 'the loop is
+    not reliable.' This pin asserts the labels move together."""
+    invocations: list[list[str]] = []
+
+    def fake_run(args, _cwd):
+        invocations.append(list(args))
+        return _completed("ok", returncode=0)
+
+    ok = escalate_to_human(
+        issue_n=42,
+        repo="o/r",
+        state=WorkerState.COMMITTED_NOT_PUSHED,
+        worktree=worktree,
+        pr_url=None,
+        run=fake_run,
+    )
+    assert ok
+
+    # The gh issue edit invocation must carry BOTH --add-label
+    # loop:needs-human AND --remove-label loop:ready in the same call,
+    # so the dispatcher sees a consistent state on the next tick.
+    edit_call = next(
+        (c for c in invocations if c[:3] == ["gh", "issue", "edit"]),
+        None,
+    )
+    assert edit_call is not None, "escalate_to_human must call gh issue edit"
+    assert "--add-label" in edit_call
+    assert "loop:needs-human" in edit_call
+    assert "--remove-label" in edit_call, (
+        "escalate_to_human MUST remove loop:ready — without this the "
+        "dispatcher re-picks the issue every tick."
+    )
+    assert "loop:ready" in edit_call
+
+
+def test_escalate_returns_false_on_subprocess_error(worktree: Path) -> None:
+    """Adversarial: a network/gh failure during escalation must not
+    crash the iteration loop. Returns False so the caller knows the
+    operator label wasn't applied."""
+    def fake_run(args, _cwd):
+        raise subprocess.SubprocessError("network down")
+
+    ok = escalate_to_human(
+        issue_n=42,
+        repo="o/r",
+        state=WorkerState.COMMITTED_NOT_PUSHED,
+        worktree=worktree,
+        pr_url=None,
+        run=fake_run,
+    )
+    assert ok is False
diff --git a/tests/test_runner_multi_iteration.py b/tests/test_runner_multi_iteration.py
index 7a82732..a2e01b5 100644
--- a/tests/test_runner_multi_iteration.py
+++ b/tests/test_runner_multi_iteration.py
@@ -93,7 +93,15 @@ def _run(args, _cwd):
                 self.automerge_calls.append(args[3])
                 return _cp("", 0)
             if args[0] == "gh" and args[1] == "issue" and args[2] == "edit":
-                self.label_calls.append((int(args[3]), args[-1]))
+                # The escalation hot-fix calls `--add-label X --remove-label Y`
+                # in a single invocation. Capture the ADDED label (the one
+                # the test cares about) by scanning for --add-label.
+                added = ""
+                for i, a in enumerate(args):
+                    if a == "--add-label" and i + 1 < len(args):
+                        added = args[i + 1]
+                        break
+                self.label_calls.append((int(args[3]), added or args[-1]))
                 return _cp("", 0)
             if args[0] == "git" and args[1] == "rev-parse":
                 return _cp("loop/78-x\n", 0)