Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 36 additions & 7 deletions src/forge_loop/runner/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,16 +188,37 @@ def probe_worker_state(
pass

# 4. Local commits ahead of origin?
ahead = 0
# First: does origin/<branch> actually exist? If not, the branch
# was never pushed and we must NOT fall through to a "0 ahead"
# verdict. Before this check the probe misclassified locally-
# committed-but-not-pushed branches as PUSHED_NO_PR, driving the
# iteration loop into an open_pr brief on a branch GitHub can't
# see. Dogfood-caught on forge-loop #125 and #126.
origin_exists = False
try:
r = run(
["git", "rev-list", "--count", f"origin/{branch}..HEAD"],
r_remote = run(
["git", "rev-parse", "--verify", "--quiet", f"refs/remotes/origin/{branch}"],
worktree,
)
ahead = int(r.stdout.strip() or "0") if r.returncode == 0 else 0
except (subprocess.SubprocessError, ValueError, OSError):
# Likely no remote-tracking branch yet (push never happened).
ahead = -1 # sentinel: branch may not exist on origin
origin_exists = r_remote.returncode == 0 and bool(r_remote.stdout.strip())
except (subprocess.SubprocessError, OSError):
origin_exists = False

ahead = 0
if origin_exists:
try:
r = run(
["git", "rev-list", "--count", f"origin/{branch}..HEAD"],
worktree,
)
ahead = int(r.stdout.strip() or "0") if r.returncode == 0 else 0
except (subprocess.SubprocessError, ValueError, OSError):
ahead = -1 # transient git failure — treat as unknown
else:
# Branch isn't on origin yet — sentinel = -1 routes to the
# local-commits-no-upstream fallback further down (which returns
# COMMITTED_NOT_PUSHED). PUSHED_NO_PR cannot be reached here.
ahead = -1

# 5. With a PR — classify the PR-side state.
if pr_view and pr_view.get("state") == "OPEN":
Expand Down Expand Up @@ -529,6 +550,12 @@ def escalate_to_human(
Best-effort. Returns True if at least the label landed; the comment is
cosmetic. Never raises.
"""
# Add loop:needs-human AND remove loop:ready in one call so the
# dispatcher stops re-picking the issue on the next tick. Before
# this, escalated issues kept reappearing in `top_issues` because
# only the needs-human label was added — loop:ready survived.
# Dogfood-caught on forge-loop #125/#126 (CTO observed loop "not
# reliable" — same stuck issue served on every tick).
label_ok = False
try:
r = run(
Expand All @@ -541,6 +568,8 @@ def escalate_to_human(
repo,
"--add-label",
"loop:needs-human",
"--remove-label",
"loop:ready",
],
worktree,
)
Expand Down
16 changes: 13 additions & 3 deletions tests/test_iteration_probe.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ def test_committed_not_pushed(worktree: Path) -> None:
run = _make_fake_run(
{
("gh", "pr", "list"): _completed("[]"),
# origin/<branch> exists (new probe added by the pushed_no_pr
# misclassification hot-fix); rev-list returns 2 ahead.
("git", "rev-parse", "--verify"): _completed("abc123\n"),
("git", "status", "--porcelain"): _completed(""),
("git", "rev-list", "--count"): _completed("2\n"),
}
Expand All @@ -211,6 +214,8 @@ def test_pushed_no_pr(worktree: Path) -> None:
run = _make_fake_run(
{
("gh", "pr", "list"): _completed("[]"),
# origin/<branch> exists AND local matches it (ahead=0).
("git", "rev-parse", "--verify"): _completed("abc123\n"),
("git", "status", "--porcelain"): _completed(""),
("git", "rev-list", "--count"): _completed("0\n"),
}
Expand All @@ -228,7 +233,13 @@ def test_clean_nothing_when_worktree_missing(tmp_path: Path) -> None:


def test_gh_failure_degrades_gracefully(worktree: Path) -> None:
"""Adversarial: gh subprocess errors → fall through, don't crash."""
"""Adversarial: gh subprocess errors → fall through, don't crash.

After the pushed_no_pr hot-fix, the probe is more conservative when
origin/<branch> can't be verified. With no PR seen AND no proof of
a remote ref AND no local commits, we return CLEAN_NOTHING. This
is SAFER than the old "guess PUSHED_NO_PR" verdict — the worker
won't try to open a PR on a branch that doesn't exist remotely."""

def _run(args, _cwd):
if args[0] == "gh":
Expand All @@ -240,8 +251,7 @@ def _run(args, _cwd):
return _completed("")

state, _ = probe_worker_state(worktree, "b", "owner/r", 10, run=_run)
# gh died → no PR seen → fall through to pushed_no_pr (ahead == 0).
assert state == WorkerState.PUSHED_NO_PR
assert state == WorkerState.CLEAN_NOTHING


# ---------------------------------------------------------------------------
Expand Down
180 changes: 180 additions & 0 deletions tests/test_iteration_pushed_no_pr_fix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""Tests for the pushed_no_pr misclassification fix + the escalate
loop:ready-removal fix (hot-fix).

Both bugs surfaced dogfooding the brainstormer epic on forge-loop's
own backlog. The CTO observed "loop is not reliable" — the iteration
probe was claiming branches were pushed when they weren't, AND the
escalate_to_human path added loop:needs-human without removing
loop:ready, so the dispatcher kept re-picking the same broken issue
every tick.
"""

from __future__ import annotations

import json
import subprocess
from pathlib import Path

import pytest

from forge_loop.runner.iteration import (
WorkerState,
escalate_to_human,
probe_worker_state,
)


def _completed(stdout: str = "", returncode: int = 0) -> subprocess.CompletedProcess[str]:
return subprocess.CompletedProcess(args=[], returncode=returncode, stdout=stdout, stderr="")


@pytest.fixture()
def worktree(tmp_path: Path) -> Path:
wt = tmp_path / "wt"
wt.mkdir()
(wt / ".git").mkdir()
return wt


# ---------------------------------------------------------------------------
# Probe — origin/<branch> existence check
# ---------------------------------------------------------------------------


def test_no_origin_branch_returns_committed_not_pushed_not_pushed_no_pr(worktree: Path) -> None:
"""Headline regression pin — the probe must NOT return PUSHED_NO_PR
when origin/<branch> doesn't exist. Pre-fix: rev-list failed
silently, ahead defaulted to 0, returned PUSHED_NO_PR → open_pr
brief → worker tried to open PR on a branch GitHub couldn't see →
iteration loop spun forever."""
calls: list[list[str]] = []

def fake_run(args, _cwd):
calls.append(list(args))
if args[:3] == ["gh", "pr", "list"]:
return _completed("[]") # no PR
if args[:2] == ["git", "fetch"]:
# Branch doesn't exist on origin — fetch fails.
return _completed("", returncode=128)
if args[:1] == ["git"] and "rev-parse" in args:
# The new ref-existence probe — origin ref missing.
return _completed("", returncode=128)
if args[:1] == ["git"] and "status" in args:
return _completed("") # clean worktree
if args[:1] == ["git"] and "log" in args:
return _completed("abc123def456") # has local commits
if args[:1] == ["git"] and "rev-list" in args:
# Defensive: if this were called (it shouldn't, because
# origin doesn't exist), it'd fail.
return _completed("", returncode=128)
return _completed("")

state, _ = probe_worker_state(worktree, "loop/999-nonexistent", "o/r", 999, run=fake_run)
# Must be COMMITTED_NOT_PUSHED (the fallback for "local commits but
# no upstream"), NOT PUSHED_NO_PR.
assert state == WorkerState.COMMITTED_NOT_PUSHED, (
f"got {state.value!r}; this is the exact regression that wedged the "
f"iteration loop on forge-loop #125/#126."
)


def test_origin_branch_exists_with_local_match_returns_pushed_no_pr(worktree: Path) -> None:
"""Sanity: when origin/<branch> DOES exist and local matches it
(ahead=0), we still return PUSHED_NO_PR. The fix only narrows
the misclassification — it doesn't break the legitimate path."""
def fake_run(args, _cwd):
if args[:3] == ["gh", "pr", "list"]:
return _completed("[]")
if args[:2] == ["git", "fetch"]:
return _completed("")
if args[:1] == ["git"] and "rev-parse" in args:
# origin/<branch> exists
return _completed("abc123\n", returncode=0)
if args[:1] == ["git"] and "status" in args:
return _completed("")
if args[:1] == ["git"] and "rev-list" in args:
return _completed("0") # local matches origin
return _completed("")

state, _ = probe_worker_state(worktree, "loop/1-real", "o/r", 1, run=fake_run)
assert state == WorkerState.PUSHED_NO_PR


def test_origin_branch_exists_with_local_ahead_returns_committed_not_pushed(worktree: Path) -> None:
"""Sanity: real "ahead by N commits" still routes to push brief."""
def fake_run(args, _cwd):
if args[:3] == ["gh", "pr", "list"]:
return _completed("[]")
if args[:2] == ["git", "fetch"]:
return _completed("")
if args[:1] == ["git"] and "rev-parse" in args:
return _completed("abc123\n", returncode=0)
if args[:1] == ["git"] and "status" in args:
return _completed("")
if args[:1] == ["git"] and "rev-list" in args:
return _completed("3") # 3 commits ahead
return _completed("")

state, _ = probe_worker_state(worktree, "loop/1-real", "o/r", 1, run=fake_run)
assert state == WorkerState.COMMITTED_NOT_PUSHED


# ---------------------------------------------------------------------------
# escalate_to_human — must REMOVE loop:ready, not just add loop:needs-human
# ---------------------------------------------------------------------------


def test_escalate_removes_loop_ready_in_same_call(worktree: Path) -> None:
"""The headline reliability fix — adding loop:needs-human without
removing loop:ready meant the dispatcher kept picking the same
broken issue every tick, forever. The CTO noticed: 'the loop is
not reliable.' This pin asserts the labels move together."""
invocations: list[list[str]] = []

def fake_run(args, _cwd):
invocations.append(list(args))
return _completed("ok", returncode=0)

ok = escalate_to_human(
issue_n=42,
repo="o/r",
state=WorkerState.COMMITTED_NOT_PUSHED,
worktree=worktree,
pr_url=None,
run=fake_run,
)
assert ok

# The gh issue edit invocation must carry BOTH --add-label
# loop:needs-human AND --remove-label loop:ready in the same call,
# so the dispatcher sees a consistent state on the next tick.
edit_call = next(
(c for c in invocations if c[:3] == ["gh", "issue", "edit"]),
None,
)
assert edit_call is not None, "escalate_to_human must call gh issue edit"
assert "--add-label" in edit_call
assert "loop:needs-human" in edit_call
assert "--remove-label" in edit_call, (
"escalate_to_human MUST remove loop:ready — without this the "
"dispatcher re-picks the issue every tick."
)
assert "loop:ready" in edit_call


def test_escalate_returns_false_on_subprocess_error(worktree: Path) -> None:
"""Adversarial: a network/gh failure during escalation must not
crash the iteration loop. Returns False so the caller knows the
operator label wasn't applied."""
def fake_run(args, _cwd):
raise subprocess.SubprocessError("network down")

ok = escalate_to_human(
issue_n=42,
repo="o/r",
state=WorkerState.COMMITTED_NOT_PUSHED,
worktree=worktree,
pr_url=None,
run=fake_run,
)
assert ok is False
10 changes: 9 additions & 1 deletion tests/test_runner_multi_iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,15 @@ def _run(args, _cwd):
self.automerge_calls.append(args[3])
return _cp("", 0)
if args[0] == "gh" and args[1] == "issue" and args[2] == "edit":
self.label_calls.append((int(args[3]), args[-1]))
# The escalation hot-fix calls `--add-label X --remove-label Y`
# in a single invocation. Capture the ADDED label (the one
# the test cares about) by scanning for --add-label.
added = ""
for i, a in enumerate(args):
if a == "--add-label" and i + 1 < len(args):
added = args[i + 1]
break
self.label_calls.append((int(args[3]), added or args[-1]))
return _cp("", 0)
if args[0] == "git" and args[1] == "rev-parse":
return _cp("loop/78-x\n", 0)
Expand Down
Loading