From 2688f3733c599e3c035a87ded35936b77d4f3a97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?=
 <pruiz@users.noreply.github.com>
Date: Thu, 18 Jun 2026 01:35:17 +0200
Subject: [PATCH 1/4] fix: harden phase completion diagnostics

---
 tests/test_event_recording.py           |   6 +-
 tests/test_findings_quality.py          |  76 +++++++++++
 tests/test_gate_check.py                |  15 +++
 tests/test_phase_artifacts_cli.py       |  82 ++++++++++++
 tests/test_phase_failure_state_reset.py |  77 +++++++++++
 tests/test_phases_completion.py         | 127 +++++++++++++++++-
 tools/codecome/harness.py               |  76 ++++++++++-
 tools/codecome/transcript.py            |   6 +-
 tools/findings/quality.py               | 165 ++++++++++++++++++++++++
 tools/phases/artifact_checks.py         |  77 ++++++++++-
 tools/phases/completion.py              |  68 +++++++++-
 tools/phases/gates.py                   |   6 +-
 12 files changed, 762 insertions(+), 19 deletions(-)
 create mode 100644 tests/test_findings_quality.py
 create mode 100644 tools/findings/quality.py

diff --git a/tests/test_event_recording.py b/tests/test_event_recording.py
index a4f85251..26177c33 100644
--- a/tests/test_event_recording.py
+++ b/tests/test_event_recording.py
@@ -1,4 +1,5 @@
 import sys
+import re
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools"))
 
@@ -77,4 +78,7 @@ def test_phase_transcript_does_not_truncate_existing_file(tmp_path, monkeypatch)
 
     assert existing.read_text(encoding="utf-8") == "keep me\n"
     assert transcript.path != existing
-    assert transcript.path.name.startswith("last-phase-1c-no-finding-attempt-1-")
+    assert re.fullmatch(
+        r"last-phase-1c-no-finding-attempt-1-\d{8}-\d{6}-pid\d+\.jsonl",
+        transcript.path.name,
+    )
diff --git a/tests/test_findings_quality.py b/tests/test_findings_quality.py
new file mode 100644
index 00000000..ef00277c
--- /dev/null
+++ b/tests/test_findings_quality.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "tools"))
+
+
+def _write_phase2_finding(path: Path, *, title: str, category: str, target_area: str, summary: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(
+        "---\n"
+        "id: \"CC-0099\"\n"
+        f"title: \"{title}\"\n"
+        "status: \"PENDING\"\n"
+        "severity: \"MEDIUM\"\n"
+        "cvss_v4:\n  vector: \"\"\n  score: 0.0\n  justification: \"\"\n"
+        "confidence: \"MEDIUM\"\n"
+        f"category: \"{category}\"\n"
+        "cwe: [\"CWE-287\"]\n"
+        "language: \"c\"\n"
+        f"target_area: \"{target_area}\"\n"
+        "files: [\"scheduler/ipp.c\"]\n"
+        "symbols: [\"ippReadIO\"]\n"
+        "entry_points: [\"IPP request parser\"]\n"
+        "sources: [\"network IPP request\"]\n"
+        "sinks: [\"authorization decision\"]\n"
+        "trust_boundary: \"remote client to scheduler\"\n"
+        "assets_at_risk: [\"scheduler authorization state\"]\n"
+        "validation:\n  status: \"NOT_STARTED\"\n  methods: []\n  evidence_dir: \"itemdb/evidence/CC-0099\"\n  summary: \"\"\n"
+        "exploitation:\n  status: \"NOT_STARTED\"\n  impact_demonstrated: \"\"\n  exploit_type: \"\"\n  severity_before: \"\"\n  severity_after: \"\"\n  artifacts_dir: \"itemdb/evidence/CC-0099/exploits\"\n  summary: \"\"\n"
+        "created_at: \"2026-06-18\"\n"
+        "updated_at: \"2026-06-18\"\n"
+        "---\n\n"
+        "# Summary\n\n"
+        f"{summary}\n\n"
+        "# Target context\n\n"
+        "The CUPS scheduler accepts IPP requests from remote clients and maps request metadata into authorization state.\n\n"
+        "# Affected code\n\n"
+        "The affected path is `scheduler/ipp.c` in the IPP request parser and authorization handoff.\n\n"
+        "# Vulnerability hypothesis\n\n"
+        "A remote client may control the user identity attribute that reaches an authorization decision without canonicalization.\n\n"
+        "# Source-to-sink reasoning\n\n"
+        "The source is a network IPP request. The parser copies the identity attribute into scheduler request state. The sink is an authorization check that trusts that state.\n\n"
+        "# Attackability / trigger conditions\n\n"
+        "An unauthenticated remote client can send a crafted IPP request before authorization is evaluated.\n\n"
+        "# Impact\n\n"
+        "Successful exploitation could bypass authorization checks and perform scheduler operations as a more privileged user.\n\n"
+        "# Validation plan\n\n"
+        "Send crafted IPP requests with controlled identity attributes and compare the scheduler authorization outcome against a baseline request.\n\n"
+        "# Counter-analysis\n\n"
+        "Review parser normalization, authentication layers, and later authorization checks to determine whether attacker control is removed before the sink.\n\n"
+        "# Validation result\n\n"
+        "Pending.\n\n"
+        "# Evidence\n\n"
+        "Pending.\n",
+        encoding="utf-8",
+    )
+
+
+def test_phase2_quality_rejects_test_template_artifact(tmp_path: Path) -> None:
+    from findings.quality import validate_phase2_finding_quality
+
+    finding = tmp_path / "itemdb" / "findings" / "PENDING" / "CC-0099-test-finding.md"
+    _write_phase2_finding(
+        finding,
+        title="Test finding to see template",
+        category="Test",
+        target_area="Testing",
+        summary="This is a test finding created to verify the template system. It does not represent an actual vulnerability.",
+    )
+
+    errors = validate_phase2_finding_quality(finding)
+
+    assert any("test/template artifact" in error for error in errors), errors
+    assert any("not an actual target vulnerability" in error for error in errors), errors
diff --git a/tests/test_gate_check.py b/tests/test_gate_check.py
index 4ab99826..b4e06f1c 100644
--- a/tests/test_gate_check.py
+++ b/tests/test_gate_check.py
@@ -127,3 +127,18 @@ def test_gate_phase_4_rejects_wrong_status(tmp_path, monkeypatch):
         assert exit_code == 1
     finally:
         gates_module.ROOT = original_root
+
+
+def test_gate_phase_3_no_pending_is_noop_success(tmp_path, capsys):
+    original_root = gates_module.ROOT
+    gates_module.ROOT = tmp_path
+    (tmp_path / "itemdb" / "findings" / "PENDING").mkdir(parents=True)
+
+    try:
+        exit_code = gates_module.gate_phase_3()
+    finally:
+        gates_module.ROOT = original_root
+
+    out = capsys.readouterr().out
+    assert exit_code == 0
+    assert "nothing to review" in out
diff --git a/tests/test_phase_artifacts_cli.py b/tests/test_phase_artifacts_cli.py
index b92bf7aa..bdba1bee 100644
--- a/tests/test_phase_artifacts_cli.py
+++ b/tests/test_phase_artifacts_cli.py
@@ -216,3 +216,85 @@ def test_has_valid_threat_model_returns_false_when_missing(tmp_path: Path) -> No
 
     with patch("phases.artifact_checks.ROOT", tmp_path):
         assert not has_valid_threat_model()
+
+
+def test_phase2_artifacts_accept_explicit_no_findings_summary(tmp_path: Path) -> None:
+    from phases.artifact_checks import check_phase_2_artifacts
+
+    runs = tmp_path / "runs"
+    runs.mkdir(parents=True)
+    (runs / "phase-2-summary-2026-06-16-120000.md").write_text(
+        "# Findings created\n\n"
+        "| ID | Title | Path |\n"
+        "|---|---|---|\n"
+        "| - | None. | - |\n",
+        encoding="utf-8",
+    )
+
+    with patch("phases.artifact_checks.ROOT", tmp_path):
+        assert check_phase_2_artifacts() == []
+
+
+def test_phase2_artifacts_reject_stub_finding(tmp_path: Path) -> None:
+    from phases.artifact_checks import check_phase_2_artifacts
+
+    runs = tmp_path / "runs"
+    pending = tmp_path / "itemdb" / "findings" / "PENDING"
+    runs.mkdir(parents=True)
+    pending.mkdir(parents=True)
+    (runs / "phase-2-summary-2026-06-16-120000.md").write_text(
+        "# Findings created\n\n"
+        "| ID | Title | Path |\n"
+        "|---|---|---|\n"
+        "| CC-0001 | Stub | itemdb/findings/PENDING/CC-0001-stub.md |\n",
+        encoding="utf-8",
+    )
+    (pending / "CC-0001-stub.md").write_text(
+        "---\n"
+        "id: \"CC-0001\"\n"
+        "title: \"Stub\"\n"
+        "status: \"PENDING\"\n"
+        "severity: \"MEDIUM\"\n"
+        "cvss_v4:\n  vector: \"\"\n  score: 0.0\n  justification: \"\"\n"
+        "confidence: \"LOW\"\ncategory: \"Unclassified\"\ncwe: []\nlanguage: \"unknown\"\ntarget_area: \"unknown\"\n"
+        "files: []\nsymbols: []\nentry_points: []\nsources: []\nsinks: []\ntrust_boundary: \"unknown\"\nassets_at_risk: []\n"
+        "validation:\n  status: \"NOT_STARTED\"\n  methods: []\n  evidence_dir: \"itemdb/evidence/CC-0001\"\n  summary: \"\"\n"
+        "exploitation:\n  status: \"NOT_STARTED\"\n  impact_demonstrated: \"\"\n  exploit_type: \"\"\n  severity_before: \"\"\n  severity_after: \"\"\n  artifacts_dir: \"itemdb/evidence/CC-0001/exploits\"\n  summary: \"\"\n"
+        "created_at: \"2026-06-16\"\nupdated_at: \"2026-06-16\"\n---\n\n# Summary\n\nPending.\n",
+        encoding="utf-8",
+    )
+
+    with patch("phases.artifact_checks.ROOT", tmp_path):
+        errors = check_phase_2_artifacts()
+
+    assert any("not a complete Phase 2 finding" in error for error in errors), errors
+
+
+def test_phase2_artifacts_report_all_quality_errors(tmp_path: Path, monkeypatch) -> None:
+    from phases.artifact_checks import check_phase_2_artifacts
+    from findings import quality as quality_mod
+
+    runs = tmp_path / "runs"
+    pending = tmp_path / "itemdb" / "findings" / "PENDING"
+    runs.mkdir(parents=True)
+    pending.mkdir(parents=True)
+    (runs / "phase-2-summary-2026-06-18-120000.md").write_text(
+        "# Findings created\n\n"
+        "| ID | Title | Path |\n"
+        "|---|---|---|\n"
+        "| CC-0099 | Many | itemdb/findings/PENDING/CC-0099-many-errors.md |\n",
+        encoding="utf-8",
+    )
+    (pending / "CC-0099-many-errors.md").write_text("placeholder", encoding="utf-8")
+    monkeypatch.setattr(
+        quality_mod,
+        "validate_phase2_finding_quality",
+        lambda _path: [f"artifact-error-{i}" for i in range(7)],
+    )
+
+    with patch("phases.artifact_checks.ROOT", tmp_path):
+        errors = check_phase_2_artifacts()
+
+    joined = "\n".join(errors)
+    for i in range(7):
+        assert f"artifact-error-{i}" in joined
diff --git a/tests/test_phase_failure_state_reset.py b/tests/test_phase_failure_state_reset.py
index 24425772..e70ad83a 100644
--- a/tests/test_phase_failure_state_reset.py
+++ b/tests/test_phase_failure_state_reset.py
@@ -117,6 +117,83 @@ def fake_resume_prompt(*_args, failure_details=None, **_kw):
     assert captured == [["stale failure from previous attempt"], None]
 
 
+def test_phase_mode_terminal_stop_missing_artifacts_auto_resumes(monkeypatch):
+    from codecome import harness as harness_mod
+    from codecome import runner as runner_mod
+
+    transcript = harness_mod.ROOT / "tmp" / "fake.jsonl"
+    attempts = iter([
+        (0, "ses_test", _terminal_result(), transcript),
+        (0, "ses_test", _terminal_result(), transcript),
+    ])
+    completion_results = iter([
+        (False, ["runs/phase-2-summary*.md was not updated during this run"]),
+        (True, []),
+    ])
+    captured: list[list[str] | None] = []
+    prompts: list[str] = []
+
+    def fake_run_single_attempt(_args, _console, prompt, *_a, **_kw):
+        prompts.append(prompt)
+        return next(attempts)
+
+    def fake_resume_prompt(*_args, failure_details=None, **_kw):
+        captured.append(failure_details)
+        return "resume prompt"
+
+    monkeypatch.setattr(harness_mod, "ServerRunner", lambda: _FakeServerRunner())
+    monkeypatch.setenv("CODECOME_MAX_ITERATION_RETRIES", "1")
+    monkeypatch.setattr(harness_mod, "load_prompt", lambda *_a, **_kw: "initial prompt")
+    monkeypatch.setattr(harness_mod, "resolve_runtime_config", lambda _agent: _FakeRuntimeConfig())
+    monkeypatch.setattr(harness_mod, "configure_rendering", lambda *_a, **_kw: None)
+    monkeypatch.setattr(runner_mod, "_run_single_attempt", fake_run_single_attempt)
+    monkeypatch.setattr(
+        harness_mod,
+        "check_phase_graceful_completion",
+        lambda *_a, **_kw: next(completion_results),
+    )
+    monkeypatch.setattr(harness_mod, "build_phase_resume_prompt", fake_resume_prompt)
+
+    from findings import checks_entry
+    monkeypatch.setattr(checks_entry, "run_frontmatter_validation", lambda: (0, ""))
+
+    rc = harness_mod.run_phase_mode(_args())
+
+    assert rc == 0
+    assert prompts == ["initial prompt", "resume prompt"]
+    assert captured == [["runs/phase-2-summary*.md was not updated during this run"]]
+
+
+def test_phase_mode_final_failure_prints_gate_failures(monkeypatch, capsys):
+    from codecome import harness as harness_mod
+    from codecome import runner as runner_mod
+
+    transcript = harness_mod.ROOT / "tmp" / "fake.jsonl"
+
+    monkeypatch.setattr(harness_mod, "ServerRunner", lambda: _FakeServerRunner())
+    monkeypatch.setenv("CODECOME_MAX_ITERATION_RETRIES", "0")
+    monkeypatch.setattr(harness_mod, "load_prompt", lambda *_a, **_kw: "initial prompt")
+    monkeypatch.setattr(harness_mod, "resolve_runtime_config", lambda _agent: _FakeRuntimeConfig())
+    monkeypatch.setattr(harness_mod, "configure_rendering", lambda *_a, **_kw: None)
+    monkeypatch.setattr(
+        runner_mod,
+        "_run_single_attempt",
+        lambda *_a, **_kw: (0, "ses_test", _terminal_result(), transcript),
+    )
+    monkeypatch.setattr(
+        harness_mod,
+        "check_phase_graceful_completion",
+        lambda *_a, **_kw: (False, ["Invalid: CC-0015 still contains template guidance"]),
+    )
+
+    rc = harness_mod.run_phase_mode(_args())
+    output = capsys.readouterr().out
+
+    assert rc == 2
+    assert "remaining gate failures" in output
+    assert "CC-0015 still contains template guidance" in output
+
+
 def test_phase1_subphase_does_not_reuse_previous_attempt_failures(monkeypatch):
     from codecome import phase_1 as p1
 
diff --git a/tests/test_phases_completion.py b/tests/test_phases_completion.py
index 22d650a9..219540ee 100644
--- a/tests/test_phases_completion.py
+++ b/tests/test_phases_completion.py
@@ -213,8 +213,8 @@ def test_phase1_path_diagnostics_handle_notes_root_outside_root(self, tmp_path):
             completion_mod.SANDBOX_PLAN_PATH = orig_sandbox_plan
             completion_mod.ROOT = orig_root
 
-    def test_phase2_accepts_summary_with_no_new_findings(self, tmp_path):
-        """Phase 2 should pass when only the run summary is fresh (no new findings)."""
+    def test_phase2_accepts_explicit_no_findings_summary(self, tmp_path):
+        """Phase 2 may pass with zero findings when the summary says so."""
         import os
         import phases.completion as completion_mod
 
@@ -224,7 +224,13 @@ def test_phase2_accepts_summary_with_no_new_findings(self, tmp_path):
         completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings"
         (tmp_path / "runs").mkdir(parents=True, exist_ok=True)
         summary = tmp_path / "runs" / "phase-2-summary-2026-06-05-143022.md"
-        summary.write_text("", encoding="utf-8")
+        summary.write_text(
+            "# Findings created\n\n"
+            "| ID | Title | Path |\n"
+            "|---|---|---|\n"
+            "| - | None. | - |\n",
+            encoding="utf-8",
+        )
         run_start = time.time() - 60
         os.utime(summary, (run_start + 60, run_start + 60))
 
@@ -236,6 +242,70 @@ def test_phase2_accepts_summary_with_no_new_findings(self, tmp_path):
             completion_mod.ROOT = orig_root
             completion_mod.FINDINGS_ROOT = orig_findings_root
 
+    def test_phase2_rejects_vague_no_finding_summary(self, tmp_path):
+        """A fresh summary without explicit no-finding wording is not enough."""
+        import os
+        import phases.completion as completion_mod
+
+        orig_root = completion_mod.ROOT
+        orig_findings_root = completion_mod.FINDINGS_ROOT
+        completion_mod.ROOT = tmp_path
+        completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings"
+        (tmp_path / "runs").mkdir(parents=True, exist_ok=True)
+        summary = tmp_path / "runs" / "phase-2-summary-2026-06-05-143022.md"
+        summary.write_text("# Goal\n\nLooked around.\n", encoding="utf-8")
+        run_start = time.time() - 60
+        os.utime(summary, (run_start + 60, run_start + 60))
+
+        try:
+            ok, failures = completion_mod.check_phase_graceful_completion("2", None, run_start)
+            assert ok is False
+            assert any("does not explicitly state" in f for f in failures), failures
+        finally:
+            completion_mod.ROOT = orig_root
+            completion_mod.FINDINGS_ROOT = orig_findings_root
+
+    def test_phase2_rejects_fresh_stub_finding(self, tmp_path):
+        """A fresh template-like finding should not satisfy Phase 2 completion."""
+        import os
+        import phases.completion as completion_mod
+
+        orig_root = completion_mod.ROOT
+        orig_findings_root = completion_mod.FINDINGS_ROOT
+        completion_mod.ROOT = tmp_path
+        completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings"
+        pending = tmp_path / "itemdb" / "findings" / "PENDING"
+        pending.mkdir(parents=True, exist_ok=True)
+        (tmp_path / "runs").mkdir(parents=True, exist_ok=True)
+        summary = tmp_path / "runs" / "phase-2-summary-2026-06-05-143022.md"
+        summary.write_text("# Findings created\n\n| ID | Title | Path |\n|---|---|---|\n| CC-0001 | Stub | itemdb/findings/PENDING/CC-0001-stub.md |\n", encoding="utf-8")
+        finding = pending / "CC-0001-stub.md"
+        finding.write_text(
+            "---\n"
+            "id: \"CC-0001\"\n"
+            "title: \"Stub\"\n"
+            "status: \"PENDING\"\n"
+            "severity: \"MEDIUM\"\n"
+            "cvss_v4:\n  vector: \"\"\n  score: 0.0\n  justification: \"\"\n"
+            "confidence: \"LOW\"\ncategory: \"Unclassified\"\ncwe: []\nlanguage: \"unknown\"\ntarget_area: \"unknown\"\n"
+            "files: []\nsymbols: []\nentry_points: []\nsources: []\nsinks: []\ntrust_boundary: \"unknown\"\nassets_at_risk: []\n"
+            "validation:\n  status: \"NOT_STARTED\"\n  methods: []\n  evidence_dir: \"itemdb/evidence/CC-0001\"\n  summary: \"\"\n"
+            "exploitation:\n  status: \"NOT_STARTED\"\n  impact_demonstrated: \"\"\n  exploit_type: \"\"\n  severity_before: \"\"\n  severity_after: \"\"\n  artifacts_dir: \"itemdb/evidence/CC-0001/exploits\"\n  summary: \"\"\n"
+            "created_at: \"2026-06-16\"\nupdated_at: \"2026-06-16\"\n---\n\n# Summary\n\nPending.\n",
+            encoding="utf-8",
+        )
+        run_start = time.time() - 60
+        for path in (summary, finding):
+            os.utime(path, (run_start + 60, run_start + 60))
+
+        try:
+            ok, failures = completion_mod.check_phase_graceful_completion("2", None, run_start)
+            assert ok is False
+            assert any("Phase 2 finding is incomplete" in f for f in failures), failures
+        finally:
+            completion_mod.ROOT = orig_root
+            completion_mod.FINDINGS_ROOT = orig_findings_root
+
     def test_phase2_failure_details_mention_run_summary(self, tmp_path):
         """Phase 2 should report missing run summary when nothing is freshened."""
         import phases.completion as completion_mod
@@ -340,7 +410,45 @@ def test_resume_prompt_with_failure_details_lists_missing_artifacts(self):
         )
         assert "Missing required artifacts:" in prompt
         assert "runs/phase-2-summary*.md" in prompt
-        assert "Fix only these missing items." in prompt
+        assert "Fix every listed item." in prompt
+        assert "authoritative completion-gate output" in prompt
+        assert "Do not claim validation passed" in prompt
+
+    def test_phase2_reports_all_quality_errors_without_truncation(self, tmp_path, monkeypatch):
+        import os
+        import phases.completion as completion_mod
+        from findings import quality as quality_mod
+
+        orig_root = completion_mod.ROOT
+        orig_findings_root = completion_mod.FINDINGS_ROOT
+        completion_mod.ROOT = tmp_path
+        completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings"
+        pending = tmp_path / "itemdb" / "findings" / "PENDING"
+        pending.mkdir(parents=True, exist_ok=True)
+        (tmp_path / "runs").mkdir(parents=True, exist_ok=True)
+        summary = tmp_path / "runs" / "phase-2-summary-2026-06-18-120000.md"
+        finding = pending / "CC-0099-many-errors.md"
+        summary.write_text("# Findings created\n\n| ID | Title | Path |\n|---|---|---|\n| CC-0099 | Many | itemdb/findings/PENDING/CC-0099-many-errors.md |\n", encoding="utf-8")
+        finding.write_text("placeholder", encoding="utf-8")
+        run_start = time.time() - 60
+        for path in (summary, finding):
+            os.utime(path, (run_start + 60, run_start + 60))
+        monkeypatch.setattr(
+            quality_mod,
+            "validate_phase2_finding_quality",
+            lambda _path: [f"error-{i}" for i in range(7)],
+        )
+
+        try:
+            ok, failures = completion_mod.check_phase_graceful_completion("2", None, run_start)
+        finally:
+            completion_mod.ROOT = orig_root
+            completion_mod.FINDINGS_ROOT = orig_findings_root
+
+        assert ok is False
+        joined = "\n".join(failures)
+        for i in range(7):
+            assert f"error-{i}" in joined
 
     def test_resume_prompt_without_failure_details_uses_generic_wording(self):
         """Resume prompt should use the generic reassess wording when no failure details given."""
@@ -792,7 +900,13 @@ def test_sweep_accepts_phase2_summary_sweep(self, tmp_path):
         completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings"
         (tmp_path / "runs").mkdir(parents=True, exist_ok=True)
         summary = tmp_path / "runs" / "phase-2-summary-sweep-src-foo-php-2026-06-12-143022.md"
-        summary.write_text("", encoding="utf-8")
+        summary.write_text(
+            "# Findings created\n\n"
+            "| ID | Title | Path |\n"
+            "|---|---|---|\n"
+            "| - | None. | - |\n",
+            encoding="utf-8",
+        )
         run_start = time.time() - 60
         os.utime(summary, (run_start + 60, run_start + 60))
 
@@ -849,7 +963,8 @@ def test_sweep_resume_prompt_uses_phase2_gate(self):
             ],
         )
         assert "runs/phase-2-summary*.md" in prompt
-        assert "Fix only these missing items." in prompt
+        assert "Fix every listed item." in prompt
+        assert "authoritative completion-gate output" in prompt
 
 
 class TestPhase3ChecklistMentionsRunSummary:
diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py
index 28fc300c..2bc6c911 100644
--- a/tools/codecome/harness.py
+++ b/tools/codecome/harness.py
@@ -16,6 +16,7 @@
 import os
 import signal
 import time
+from datetime import datetime
 from pathlib import Path
 from typing import Any, Optional
 
@@ -33,6 +34,65 @@
 )
 
 
+def _pending_finding_count() -> int:
+    pending_dir = ROOT / "itemdb" / "findings" / "PENDING"
+    if not pending_dir.exists():
+        return 0
+    return len([p for p in pending_dir.glob("CC-*.md") if p.is_file()])
+
+
+def _write_phase3_noop_summary() -> Path:
+    runs_dir = ROOT / "runs"
+    runs_dir.mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    path = runs_dir / f"phase-3-summary-{timestamp}.md"
+    path.write_text(
+        "# CodeCome Run Summary\n\n"
+        f"Date: {datetime.now().date().isoformat()}  \n"
+        "Phase: counter_analysis  \n"
+        "Agent: reviewer  \n"
+        "Target path: `./src`\n\n"
+        "# Goal\n\n"
+        "Review pending findings.\n\n"
+        "# Prompt\n\n"
+        "Phase 3 was requested, but there were no PENDING findings to review.\n\n"
+        "# Files read\n\n"
+        "- `itemdb/findings/PENDING/`\n\n"
+        "# Files created\n\n"
+        f"- `runs/{path.name}`\n\n"
+        "# Files modified\n\n"
+        "None.\n\n"
+        "# Findings created\n\n"
+        "| ID | Title | Path |\n"
+        "|---|---|---|\n"
+        "| - | None. | - |\n\n"
+        "# Findings moved\n\n"
+        "| ID | From | To | Reason |\n"
+        "|---|---|---|---|\n"
+        "| - | - | - | None. |\n\n"
+        "# Findings updated\n\n"
+        "| ID | Update summary |\n"
+        "|---|---|\n"
+        "| - | None. |\n\n"
+        "# Evidence created\n\n"
+        "None.\n\n"
+        "# Important observations\n\n"
+        "No PENDING findings were available, so Phase 3 had nothing to review.\n\n"
+        "# Assumptions\n\n"
+        "None.\n\n"
+        "# Open questions for the user\n\n"
+        "None.\n\n"
+        "# Re-run prompt hints\n\n"
+        "None.\n\n"
+        "# Limitations\n\n"
+        "No counter-analysis was performed because there were no PENDING findings.\n\n"
+        "# Recommended next step\n\n"
+        "Run Phase 2 if additional vulnerability hypotheses are needed.\n",
+        encoding="utf-8",
+    )
+    return path
+
+
 def run_phase_mode(args: argparse.Namespace) -> int:
     """Run a single phase with auto-retry/resume.
 
@@ -125,6 +185,14 @@ def _p1_forward_signal(signum: int, _frame: Any) -> None:
     if console is None:
         out.warn("rich is not installed; using plain structured output fallback")
 
+    if str(args.phase) == "3" and _pending_finding_count() == 0:
+        summary_path = _write_phase3_noop_summary()
+        out.separator(tone=T.SUCCESS)
+        out.success("Phase 3 completed successfully", symbol=True)
+        out.detail("  no PENDING findings; nothing to review")
+        out.detail(f"  summary: {summary_path.relative_to(ROOT)}")
+        return 0
+
     attempt_number = 0
     last_session_id: str = ""
     last_finish_reason: Optional[str] = None
@@ -169,6 +237,7 @@ def _forward_signal(signum: int, _frame: Any) -> None:
             attempt_number += 1
             phase_failures = []
             phase_ok = False
+            finish_warning = None
             # Clear per-session dedup state so retries don't suppress updates.
             _reset_subagent_state()
             returncode, session_id, run_result, transcript_path = _run_single_attempt(
@@ -315,7 +384,8 @@ def _forward_signal(signum: int, _frame: Any) -> None:
                             out.error(msg)
                             print(validation_output)
                         break
-                break
+                if returncode == 0:
+                    break
 
             if returncode == 2 and (
                 last_finish_reason in _FINISH_MID_TURN
@@ -367,6 +437,10 @@ def _forward_signal(signum: int, _frame: Any) -> None:
         )
         if finish_warning:
             out.error(f"  reason: {finish_warning}", strong=False)
+        if phase_failures:
+            out.error("  remaining gate failures:", strong=False)
+            for failure in phase_failures:
+                out.detail(f"    - {failure}")
         out.detail(f"  transcript: {transcript_path.relative_to(ROOT)}")
         out.warn(
             "  hint: the run is likely partial; rerun the phase or "
diff --git a/tools/codecome/transcript.py b/tools/codecome/transcript.py
index a3316e58..dd9ce14c 100644
--- a/tools/codecome/transcript.py
+++ b/tools/codecome/transcript.py
@@ -70,7 +70,11 @@ def for_phase(cls, phase: str, finding: str | None) -> Transcript:
             counter = _ATTEMPT_COUNTER.get(key, 1)
             _ATTEMPT_COUNTER[key] = counter + 1
 
-        path = _unique_transcript_path(_transcript_dir() / f"last-phase-{phase}-{finding_tag}-attempt-{counter}.jsonl")
+        stamp = time.strftime("%Y%m%d-%H%M%S")
+        path = _unique_transcript_path(
+            _transcript_dir()
+            / f"last-phase-{phase}-{finding_tag}-attempt-{counter}-{stamp}-pid{os.getpid()}.jsonl"
+        )
         return cls(path, path.open("x", encoding="utf-8", buffering=1))
 
     @classmethod
diff --git a/tools/findings/quality.py b/tools/findings/quality.py
new file mode 100644
index 00000000..4b0d5c6f
--- /dev/null
+++ b/tools/findings/quality.py
@@ -0,0 +1,165 @@
+# Copyright (C) 2025-2026 Pablo Ruiz García <pablo.ruiz@gmail.com>
+# SPDX-License-Identifier: GPL-3.0-or-later OR AGPL-3.0-or-later
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from findings.checks import load_sections, validate_finding
+from findings.frontmatter import load_frontmatter_strict
+
+
+PHASE2_CORE_SECTIONS = [
+    "Summary",
+    "Target context",
+    "Affected code",
+    "Vulnerability hypothesis",
+    "Source-to-sink reasoning",
+    "Attackability / trigger conditions",
+    "Impact",
+    "Validation plan",
+]
+
+PHASE2_REQUIRED_SECTIONS = PHASE2_CORE_SECTIONS + [
+    "Counter-analysis",
+    "Validation result",
+    "Evidence",
+]
+
+_PLACEHOLDER_TEXT = {
+    "",
+    "pending.",
+    "todo.",
+    "tbd.",
+    "not applicable.",
+}
+
+_TEMPLATE_MARKERS = [
+    "Briefly describe the suspected vulnerability.",
+    "Describe the relevant target type",
+    "List the relevant files",
+    "Explain the suspected vulnerability.",
+    "This section must clearly distinguish what is known from what is assumed.",
+    "Describe the path from attacker-controlled",
+    "Explain how an attacker",
+    "Explain the realistic security impact.",
+    "Describe exactly how to prove or disprove the finding.",
+    "Try to disprove the finding.",
+]
+
+_NON_FINDING_MARKERS = [
+    "does not represent an actual vulnerability",
+    "does not describe a real vulnerability",
+    "does not contain a vulnerability",
+    "no actual vulnerability",
+    "this is a test finding",
+    "test finding created to verify",
+    "template system itself does not contain a vulnerability",
+]
+
+
+def _is_placeholder(value: Any) -> bool:
+    if not isinstance(value, str):
+        return False
+    return value.strip().lower() in _PLACEHOLDER_TEXT
+
+
+def _contains_template_marker(value: str) -> bool:
+    return any(marker in value for marker in _TEMPLATE_MARKERS)
+
+
+def _contains_non_finding_marker(value: str) -> bool:
+    lowered = value.lower()
+    return any(marker in lowered for marker in _NON_FINDING_MARKERS)
+
+
+def _list_is_populated(data: dict[str, Any], key: str) -> bool:
+    value = data.get(key)
+    return isinstance(value, list) and any(str(item).strip() for item in value)
+
+
+def _scalar_is_populated(data: dict[str, Any], key: str) -> bool:
+    value = data.get(key)
+    if not isinstance(value, str):
+        return False
+    return value.strip().lower() not in _PLACEHOLDER_TEXT | {"unknown", "unclassified"}
+
+
+def validate_phase2_finding_quality(path: Path) -> list[str]:
+    """Return Phase 2 quality errors for a candidate finding.
+
+    This is intentionally stricter than frontmatter validation. A PENDING
+    finding can be syntactically valid while still being unusable as a Phase 2
+    durable artifact if it is mostly an untouched template.
+    """
+    errors: list[str] = []
+
+    errors.extend(validate_finding(path))
+    try:
+        data = load_frontmatter_strict(path)
+    except Exception as exc:
+        return errors or [str(exc)]
+
+    if data.get("status") != "PENDING":
+        errors.append("Phase 2 findings must remain in PENDING status")
+
+    title = str(data.get("title", "")).strip().lower()
+    if title.startswith("test finding") or title.startswith("template test"):
+        errors.append("Phase 2 findings must describe a target vulnerability, not a test/template artifact")
+
+    for key in ("category", "target_area"):
+        value = str(data.get(key, "")).strip().lower()
+        if value in {"test", "testing", "template"}:
+            errors.append(f"Phase 2 frontmatter field {key} describes a test/template artifact")
+
+    for key in ("category", "language", "target_area", "trust_boundary"):
+        if not _scalar_is_populated(data, key):
+            errors.append(f"Phase 2 requires populated frontmatter field: {key}")
+
+    for key in ("files", "sources", "sinks", "assets_at_risk"):
+        if not _list_is_populated(data, key):
+            errors.append(f"Phase 2 requires non-empty frontmatter list: {key}")
+
+    if not (_list_is_populated(data, "symbols") or _list_is_populated(data, "entry_points")):
+        errors.append("Phase 2 requires at least one symbol or entry point")
+
+    sections = load_sections(path)
+    combined_sections = "\n".join(str(value) for value in sections.values())
+    if _contains_non_finding_marker(combined_sections):
+        errors.append("Phase 2 finding text says it is not an actual target vulnerability")
+
+    for section in PHASE2_REQUIRED_SECTIONS:
+        body = sections.get(section)
+        if body is None:
+            errors.append(f"Phase 2 requires #{section} section")
+            continue
+        if section in PHASE2_CORE_SECTIONS:
+            if _is_placeholder(body):
+                errors.append(f"Phase 2 requires populated #{section} section")
+            elif _contains_template_marker(body):
+                errors.append(f"Phase 2 #{section} still contains template guidance")
+
+    counter = sections.get("Counter-analysis", "")
+    if counter and not _is_placeholder(counter) and _contains_template_marker(counter):
+        errors.append("Phase 2 #Counter-analysis still contains template guidance")
+
+    return errors
+
+
+def phase2_summary_declares_no_findings(path: Path) -> bool:
+    try:
+        content = path.read_text(encoding="utf-8")
+    except OSError:
+        return False
+    lowered = content.lower()
+    if "# findings created" not in lowered:
+        return False
+    no_finding_markers = [
+        "| - | none.",
+        "| - | none |",
+        "no findings created",
+        "no new findings",
+        "zero findings",
+    ]
+    return any(marker in lowered for marker in no_finding_markers)
diff --git a/tools/phases/artifact_checks.py b/tools/phases/artifact_checks.py
index 2b57a743..13b5a367 100644
--- a/tools/phases/artifact_checks.py
+++ b/tools/phases/artifact_checks.py
@@ -283,6 +283,71 @@ def _validate_risk_index() -> list[str]:
     return validate_file_risk_index()
 
 
+# -- Phase 2 -------------------------------------------------------------------
+
+
+def _phase2_summaries() -> list[Path]:
+    runs = ROOT / "runs"
+    if not runs.exists():
+        return []
+    return sorted(
+        [path for path in runs.glob("phase-2-summary*.md") if path.is_file()],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+
+
+def _phase2_pending_findings() -> list[Path]:
+    pending = ROOT / "itemdb" / "findings" / "PENDING"
+    if not pending.exists():
+        return []
+    return sorted(path for path in pending.glob("CC-*.md") if path.is_file())
+
+
+def check_phase_2_artifacts(allow_missing_generated: bool = False) -> list[str]:
+    """Validate Phase 2 artifacts.
+
+    A correct Phase 2 run may create zero findings, but that must be explicit in
+    the run summary. If findings exist, they must be reviewable Phase 2 artifacts
+    rather than untouched templates.
+    """
+    from findings.quality import (
+        phase2_summary_declares_no_findings,
+        validate_phase2_finding_quality,
+    )
+
+    errors: list[str] = []
+    summaries = _phase2_summaries()
+    if not summaries:
+        if not allow_missing_generated:
+            errors.append("Missing artifact: runs/phase-2-summary*.md")
+        return errors
+
+    if phase2_summary_declares_no_findings(summaries[0]):
+        return errors
+
+    findings = _phase2_pending_findings()
+    if not findings:
+        errors.append(
+            "Phase 2 summary does not explicitly state that no findings were found, "
+            "and no PENDING findings exist"
+        )
+        return errors
+
+    for finding in findings:
+        quality_errors = validate_phase2_finding_quality(finding)
+        if quality_errors:
+            try:
+                rel = finding.relative_to(ROOT)
+            except ValueError:
+                rel = finding
+            errors.append(
+                f"{rel} is not a complete Phase 2 finding: "
+                + "; ".join(quality_errors)
+            )
+    return errors
+
+
 # -- Dispatch ------------------------------------------------------------------
 
 
@@ -308,10 +373,10 @@ def check_phase_artifacts(
     phases_to_check: list[str]
 
     if phase == "all":
-        phases_to_check = ["1a", "1b", "1c"]
+        phases_to_check = ["1a", "1b", "1c", "2"]
     elif phase == "1":
         phases_to_check = ["1a", "1b", "1c"]
-    elif phase in ("2", "3", "4", "5", "6"):
+    elif phase in ("3", "4", "5", "6"):
         print(C.info(f"Phase {phase} artifact checks not yet implemented; nothing to do."))
         return 0
     else:
@@ -364,8 +429,16 @@ def _check_phase_1c_with_ct(
     return check_phase_1c_artifacts(allow_missing_generated=allow_missing_generated)
 
 
+def _check_phase_2_with_ct(
+    allow_missing_generated: bool = False,
+    phase_1a_start_time: float | None = None,
+) -> list[str]:
+    return check_phase_2_artifacts(allow_missing_generated=allow_missing_generated)
+
+
 _CHECKERS = {
     "1a": _check_phase_1a_with_ct,
     "1b": _check_phase_1b_with_ct,
     "1c": _check_phase_1c_with_ct,
+    "2": _check_phase_2_with_ct,
 }
diff --git a/tools/phases/completion.py b/tools/phases/completion.py
index 11bc7f25..b2970929 100644
--- a/tools/phases/completion.py
+++ b/tools/phases/completion.py
@@ -91,6 +91,16 @@ def _run_summary_is_fresh(phase_id: str, run_start_time: float) -> bool:
     return any(Path(p).stat().st_mtime >= run_start_time for p in matches)
 
 
+def _fresh_run_summaries(phase_id: str, run_start_time: float) -> list[Path]:
+    import glob as _glob
+    matches = [Path(p) for p in _glob.glob(str(ROOT / "runs" / f"phase-{phase_id}-summary*.md"))]
+    return sorted(
+        [p for p in matches if p.is_file() and p.stat().st_mtime >= run_start_time],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+
+
 def _append_run_summary_check(
     failures: list[str], phase_id: str, run_start_time: float
 ) -> None:
@@ -111,6 +121,29 @@ def _iter_files(root: Path) -> Iterator[Path]:
             yield path
 
 
+def _fresh_pending_findings(run_start_time: float) -> list[Path]:
+    pending_dir = FINDINGS_ROOT / "PENDING"
+    if not pending_dir.exists():
+        return []
+    return sorted(
+        [
+            path for path in pending_dir.glob("CC-*.md")
+            if path.is_file() and path.stat().st_mtime >= run_start_time
+        ],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True,
+    )
+
+
+def _latest_summary_declares_no_phase2_findings(run_start_time: float) -> bool:
+    from findings.quality import phase2_summary_declares_no_findings
+
+    summaries = _fresh_run_summaries("2", run_start_time)
+    if not summaries:
+        return False
+    return phase2_summary_declares_no_findings(summaries[0])
+
+
 def _find_finding_file(status_dir: Path, finding_id: str, run_start_time: float) -> Path | None:
     """Find a finding file matching the ID in the status directory.
 
@@ -256,14 +289,34 @@ def check_phase_graceful_completion(phase: str, finding: str | None, run_start_t
                 )
             return (len(failures) == 0, failures)
         elif phase_key in ("2", "sweep"):
-            import glob as _glob
-            run_summaries = _glob.glob(str(ROOT / "runs" / "phase-2-summary*.md"))
-            summary_fresh = any(Path(p).stat().st_mtime >= run_start_time for p in run_summaries)
-            if not summary_fresh:
+            from findings.quality import validate_phase2_finding_quality
+
+            fresh_summaries = _fresh_run_summaries("2", run_start_time)
+            if not fresh_summaries:
                 failures.append(
                     "Missing: runs/phase-2-summary*.md — run summary was not "
                     "created or updated"
                 )
+
+            fresh_findings = _fresh_pending_findings(run_start_time)
+            if fresh_findings:
+                for finding_path in fresh_findings:
+                    quality_errors = validate_phase2_finding_quality(finding_path)
+                    if quality_errors:
+                        try:
+                            rel = finding_path.relative_to(ROOT)
+                        except ValueError:
+                            rel = finding_path
+                        failures.append(
+                            f"Invalid: {rel} — Phase 2 finding is incomplete: "
+                            + "; ".join(quality_errors)
+                        )
+            elif fresh_summaries and not _latest_summary_declares_no_phase2_findings(run_start_time):
+                failures.append(
+                    "Missing: itemdb/findings/PENDING/ — no finding was created or updated during this run, "
+                    "and the fresh Phase 2 summary does not explicitly state that no findings were found"
+                )
+
             return (len(failures) == 0, failures)
         elif phase_key == "3":
             import glob as _glob
@@ -497,7 +550,8 @@ def build_phase_resume_prompt(
             lines.append(f"- {detail}")
         lines.append("")
         lines.append(
-            "Fix only these missing items. Do not redo completed work."
+            "Fix every listed item. Do not redo completed work. The list above is CodeCome's "
+            "authoritative completion-gate output; do not ignore any item."
         )
     else:
         lines.append("")
@@ -516,6 +570,10 @@ def build_phase_resume_prompt(
         "Before ending, verify that the required durable artifacts for "
         "this phase exist, are updated, and are internally consistent."
     )
+    lines.append(
+        "Do not claim validation passed unless the validation tool call actually completed successfully. "
+        "If a validation or shell tool call fails, fix the invocation or report that validation could not be run."
+    )
 
     return "\n".join(lines)
 
diff --git a/tools/phases/gates.py b/tools/phases/gates.py
index e92c6845..8e886827 100644
--- a/tools/phases/gates.py
+++ b/tools/phases/gates.py
@@ -177,10 +177,10 @@ def gate_phase_3() -> int:
 
     nv_count = count_findings("PENDING")
     if nv_count == 0:
-        print(fail("No findings in PENDING."))
+        print(warn("No findings in PENDING."))
         print()
-        print(info("Run Phase 2 first: make phase-2"))
-        return 1
+        print(info("Phase 3 has nothing to review and will complete as a no-op."))
+        return 0
 
     print(ok(f"{nv_count} finding(s) in PENDING."))
     print()

From e2f35c3c112a2b125dc9aacde0985bdbc05c9ed0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?=
 <pruiz@users.noreply.github.com>
Date: Fri, 19 Jun 2026 11:45:21 +0200
Subject: [PATCH 2/4] fix: address phase completion diagnostics review feedback

- Use generator counting in _pending_finding_count()
- Capture datetime.now() once in _write_phase3_noop_summary()
- Remove hardcoded Target path from Phase 3 noop summary
- Make _contains_template_marker() case-insensitive
- Avoid redundant _fresh_run_summaries call in completion gate
- Add test for lowercase template guidance detection
---
 tests/test_findings_quality.py | 23 +++++++++++++++++++++++
 tools/codecome/harness.py      | 10 +++++-----
 tools/findings/quality.py      |  3 ++-
 tools/phases/completion.py     |  9 ++++-----
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/tests/test_findings_quality.py b/tests/test_findings_quality.py
index ef00277c..b156fac2 100644
--- a/tests/test_findings_quality.py
+++ b/tests/test_findings_quality.py
@@ -74,3 +74,26 @@ def test_phase2_quality_rejects_test_template_artifact(tmp_path: Path) -> None:
 
     assert any("test/template artifact" in error for error in errors), errors
     assert any("not an actual target vulnerability" in error for error in errors), errors
+
+
+def test_phase2_quality_rejects_case_insensitive_template_markers(tmp_path: Path) -> None:
+    from findings.quality import validate_phase2_finding_quality
+
+    finding = tmp_path / "itemdb" / "findings" / "PENDING" / "CC-0099-case-variant.md"
+    _write_phase2_finding(
+        finding,
+        title="IPP identity bypass in request parser",
+        category="Auth",
+        target_area="Scheduler",
+        summary="A remote client may inject identity attributes.",
+    )
+    content = finding.read_text(encoding="utf-8")
+    content = content.replace(
+        "A remote client may control the user identity attribute",
+        "briefly describe the suspected vulnerability. the parser accepts unvalidated identity attributes.",
+    )
+    finding.write_text(content, encoding="utf-8")
+
+    errors = validate_phase2_finding_quality(finding)
+
+    assert any("contains template guidance" in error for error in errors), errors
diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py
index 2bc6c911..98249e41 100644
--- a/tools/codecome/harness.py
+++ b/tools/codecome/harness.py
@@ -38,20 +38,20 @@ def _pending_finding_count() -> int:
     pending_dir = ROOT / "itemdb" / "findings" / "PENDING"
     if not pending_dir.exists():
         return 0
-    return len([p for p in pending_dir.glob("CC-*.md") if p.is_file()])
+    return sum(1 for p in pending_dir.glob("CC-*.md") if p.is_file())
 
 
 def _write_phase3_noop_summary() -> Path:
     runs_dir = ROOT / "runs"
     runs_dir.mkdir(parents=True, exist_ok=True)
-    timestamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    now = datetime.now()
+    timestamp = now.strftime("%Y-%m-%d-%H%M%S")
     path = runs_dir / f"phase-3-summary-{timestamp}.md"
     path.write_text(
         "# CodeCome Run Summary\n\n"
-        f"Date: {datetime.now().date().isoformat()}  \n"
+        f"Date: {now.date().isoformat()}  \n"
         "Phase: counter_analysis  \n"
-        "Agent: reviewer  \n"
-        "Target path: `./src`\n\n"
+        "Agent: reviewer  \n\n"
         "# Goal\n\n"
         "Review pending findings.\n\n"
         "# Prompt\n\n"
diff --git a/tools/findings/quality.py b/tools/findings/quality.py
index 4b0d5c6f..ef1c490d 100644
--- a/tools/findings/quality.py
+++ b/tools/findings/quality.py
@@ -66,7 +66,8 @@ def _is_placeholder(value: Any) -> bool:
 
 
 def _contains_template_marker(value: str) -> bool:
-    return any(marker in value for marker in _TEMPLATE_MARKERS)
+    lowered = value.lower()
+    return any(marker.lower() in lowered for marker in _TEMPLATE_MARKERS)
 
 
 def _contains_non_finding_marker(value: str) -> bool:
diff --git a/tools/phases/completion.py b/tools/phases/completion.py
index b2970929..be71f93d 100644
--- a/tools/phases/completion.py
+++ b/tools/phases/completion.py
@@ -135,13 +135,12 @@ def _fresh_pending_findings(run_start_time: float) -> list[Path]:
     )
 
 
-def _latest_summary_declares_no_phase2_findings(run_start_time: float) -> bool:
+def _latest_summary_declares_no_phase2_findings(fresh_summaries: list[Path]) -> bool:
     from findings.quality import phase2_summary_declares_no_findings
 
-    summaries = _fresh_run_summaries("2", run_start_time)
-    if not summaries:
+    if not fresh_summaries:
         return False
-    return phase2_summary_declares_no_findings(summaries[0])
+    return phase2_summary_declares_no_findings(fresh_summaries[0])
 
 
 def _find_finding_file(status_dir: Path, finding_id: str, run_start_time: float) -> Path | None:
@@ -311,7 +310,7 @@ def check_phase_graceful_completion(phase: str, finding: str | None, run_start_t
                             f"Invalid: {rel} — Phase 2 finding is incomplete: "
                             + "; ".join(quality_errors)
                         )
-            elif fresh_summaries and not _latest_summary_declares_no_phase2_findings(run_start_time):
+            elif fresh_summaries and not _latest_summary_declares_no_phase2_findings(fresh_summaries):
                 failures.append(
                     "Missing: itemdb/findings/PENDING/ — no finding was created or updated during this run, "
                     "and the fresh Phase 2 summary does not explicitly state that no findings were found"

From bea6ace67c57601c61765d23592f2ca7e1ee6d60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?=
 <pruiz@users.noreply.github.com>
Date: Fri, 19 Jun 2026 16:09:23 +0200
Subject: [PATCH 3/4] fix: auto-resume on output-budget exhaustion (length,
 max_tokens)

Split _FINISH_FAILURE into _FINISH_BUDGET (length, max_tokens) and
_FINISH_HARD_FAILURE (content-filter, content_filter, error).

Budget-exhaustion reasons now trigger check_phase_graceful_completion
and auto-resume, matching the terminal-stop + missing-artifacts path.
Added regression test for length auto-resume.

Also updated phase_1._run_subphase to mirror the same behaviour.
---
 tests/test_phase_failure_state_reset.py | 55 +++++++++++++++++++++++++
 tools/codecome/harness.py               | 38 +++++++++++------
 tools/codecome/phase_1.py               | 41 ++++++++++++------
 tools/phases/completion.py              |  6 +++
 tools/rendering/events/__init__.py      |  4 ++
 tools/rendering/events/base.py          | 13 +++++-
 tools/rendering/events/step_finish.py   |  6 +--
 7 files changed, 135 insertions(+), 28 deletions(-)

diff --git a/tests/test_phase_failure_state_reset.py b/tests/test_phase_failure_state_reset.py
index e70ad83a..3c153219 100644
--- a/tests/test_phase_failure_state_reset.py
+++ b/tests/test_phase_failure_state_reset.py
@@ -194,6 +194,61 @@ def test_phase_mode_final_failure_prints_gate_failures(monkeypatch, capsys):
     assert "CC-0015 still contains template guidance" in output
 
 
+def _budget_result() -> RunResult:
+    return RunResult(
+        last_finish_reason="length",
+        any_step_finish_seen=True,
+        step_finish_count=1,
+    )
+
+
+def test_phase_mode_budget_exhaustion_auto_resumes(monkeypatch):
+    from codecome import harness as harness_mod
+    from codecome import runner as runner_mod
+
+    transcript = harness_mod.ROOT / "tmp" / "fake.jsonl"
+    attempts = iter([
+        (0, "ses_test", _budget_result(), transcript),
+        (0, "ses_test", _budget_result(), transcript),
+    ])
+    completion_results = iter([
+        (False, ["runs/phase-2-summary*.md was not updated during this run"]),
+        (True, []),
+    ])
+    captured: list[list[str] | None] = []
+    prompts: list[str] = []
+
+    def fake_run_single_attempt(_args, _console, prompt, *_a, **_kw):
+        prompts.append(prompt)
+        return next(attempts)
+
+    def fake_resume_prompt(*_args, failure_details=None, **_kw):
+        captured.append(failure_details)
+        return "resume prompt"
+
+    monkeypatch.setattr(harness_mod, "ServerRunner", lambda: _FakeServerRunner())
+    monkeypatch.setenv("CODECOME_MAX_ITERATION_RETRIES", "1")
+    monkeypatch.setattr(harness_mod, "load_prompt", lambda *_a, **_kw: "initial prompt")
+    monkeypatch.setattr(harness_mod, "resolve_runtime_config", lambda _agent: _FakeRuntimeConfig())
+    monkeypatch.setattr(harness_mod, "configure_rendering", lambda *_a, **_kw: None)
+    monkeypatch.setattr(runner_mod, "_run_single_attempt", fake_run_single_attempt)
+    monkeypatch.setattr(
+        harness_mod,
+        "check_phase_graceful_completion",
+        lambda *_a, **_kw: next(completion_results),
+    )
+    monkeypatch.setattr(harness_mod, "build_phase_resume_prompt", fake_resume_prompt)
+
+    from findings import checks_entry
+    monkeypatch.setattr(checks_entry, "run_frontmatter_validation", lambda: (0, ""))
+
+    rc = harness_mod.run_phase_mode(_args())
+
+    assert rc == 0
+    assert prompts == ["initial prompt", "resume prompt"]
+    assert captured == [["runs/phase-2-summary*.md was not updated during this run"]]
+
+
 def test_phase1_subphase_does_not_reuse_previous_attempt_failures(monkeypatch):
     from codecome import phase_1 as p1
 
diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py
index 98249e41..fcaf0895 100644
--- a/tools/codecome/harness.py
+++ b/tools/codecome/harness.py
@@ -25,7 +25,7 @@
 from codecome.console import build_console, _emit_fatal_error
 from rendering.dispatch import _get_rendering_ctx, configure_rendering, render_event
 from rendering.output import get_output, T
-from rendering.events import _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_FAILURE
+from rendering.events import _FINISH_TERMINAL_OK, _FINISH_MID_TURN, _FINISH_BUDGET, _FINISH_FAILURE
 from codecome.config import ROOT, resolve_color_mode, load_prompt, resolve_runtime_config
 from phases.completion import (
     check_phase_graceful_completion,
@@ -295,6 +295,12 @@ def _forward_signal(signum: int, _frame: Any) -> None:
                     "CodeCome observed a step_finish event without a finish reason, so the model/provider completion "
                     "state is ambiguous. Treating the run as incomplete."
                 )
+            elif last_finish_reason in _FINISH_BUDGET:
+                finish_warning = (
+                    f"CodeCome observed output budget exhaustion (finish reason '{last_finish_reason}'). "
+                    f"The model/provider stopped after {step_finish_count} completed loops before producing all "
+                    "required artifacts. Treating the run as incomplete."
+                )
             elif last_finish_reason in _FINISH_FAILURE:
                 finish_warning = (
                     f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped "
@@ -320,22 +326,29 @@ def _forward_signal(signum: int, _frame: Any) -> None:
 
             if finish_warning is not None:
                 if (
-                    last_finish_reason in _FINISH_MID_TURN
+                    (last_finish_reason in _FINISH_MID_TURN or last_finish_reason in _FINISH_BUDGET)
                     and last_permission_error is None
                 ):
                     phase_ok, phase_failures = check_phase_graceful_completion(
                         args.phase, args.finding, RUN_START_TIME)
-                    if phase_ok:
-                        msg = (
-                            f"CodeCome observed a mid-turn model/provider cutoff for Phase {args.phase} after {step_finish_count} "
-                            "completed loops, but expected durable artifacts were written during "
-                            "the run. Treating the phase as complete enough to run validation and auto-repair."
-                        )
-                        out.success(msg)
-                        finish_warning = None
-                        last_finish_reason = "graceful_forgiveness"
+                    if last_finish_reason in _FINISH_MID_TURN:
+                        if phase_ok:
+                            msg = (
+                                f"CodeCome observed a mid-turn model/provider cutoff for Phase {args.phase} after {step_finish_count} "
+                                "completed loops, but expected durable artifacts were written during "
+                                "the run. Treating the phase as complete enough to run validation and auto-repair."
+                            )
+                            out.success(msg)
+                            finish_warning = None
+                            last_finish_reason = "graceful_forgiveness"
+                        else:
+                            returncode = 2
                     else:
-                        returncode = 2
+                        # _FINISH_BUDGET: fail only if artifacts are incomplete
+                        if not phase_ok:
+                            returncode = 2
+                        else:
+                            finish_warning = None
                 else:
                     returncode = 2
 
@@ -389,6 +402,7 @@ def _forward_signal(signum: int, _frame: Any) -> None:
 
             if returncode == 2 and (
                 last_finish_reason in _FINISH_MID_TURN
+                or last_finish_reason in _FINISH_BUDGET
                 or (last_finish_reason in _FINISH_TERMINAL_OK and not phase_ok)
             ):
                 max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "3"))
diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py
index 703e4fc5..de054d6f 100644
--- a/tools/codecome/phase_1.py
+++ b/tools/codecome/phase_1.py
@@ -33,6 +33,7 @@
 from rendering.events import (
     _FINISH_TERMINAL_OK,
     _FINISH_MID_TURN,
+    _FINISH_BUDGET,
     _FINISH_FAILURE,
     _reset_subagent_state,
 )
@@ -453,6 +454,12 @@ def _run_subphase(
                 "CodeCome observed a step_finish event without a finish reason, so the model/provider completion "
                 "state is ambiguous. Treating the run as incomplete."
             )
+        elif last_finish_reason in _FINISH_BUDGET:
+            finish_warning = (
+                f"CodeCome observed output budget exhaustion (finish reason '{last_finish_reason}'). "
+                f"The model/provider stopped after {step_finish_count} completed loops before producing all "
+                "required artifacts. Treating the subphase as incomplete."
+            )
         elif last_finish_reason in _FINISH_FAILURE:
             finish_warning = (
                 f"CodeCome observed finish reason '{last_finish_reason}', which means the model/provider stopped "
@@ -478,22 +485,29 @@ def _run_subphase(
 
         if finish_warning is not None:
             if (
-                (not any_step_finish_seen or last_finish_reason in _FINISH_MID_TURN)
+                (last_finish_reason in _FINISH_MID_TURN or last_finish_reason in _FINISH_BUDGET)
                 and last_permission_error is None
             ):
                 phase_ok, phase_failures = check_phase_graceful_completion(
                     phase_id, finding, subphase_start_time)
-                if phase_ok:
-                    msg = (
-                        f"CodeCome observed an incomplete model/provider completion signal for Phase {phase_id} after "
-                        f"{step_finish_count} completed loops, but expected durable artifacts were written during "
-                        "the run. Treating the subphase as complete enough to run validation and auto-repair."
-                    )
-                    out.success(msg)
-                    finish_warning = None
-                    last_finish_reason = "graceful_forgiveness"
+                if last_finish_reason in _FINISH_MID_TURN:
+                    if phase_ok:
+                        msg = (
+                            f"CodeCome observed an incomplete model/provider completion signal for Phase {phase_id} after "
+                            f"{step_finish_count} completed loops, but expected durable artifacts were written during "
+                            "the run. Treating the subphase as complete enough to run validation and auto-repair."
+                        )
+                        out.success(msg)
+                        finish_warning = None
+                        last_finish_reason = "graceful_forgiveness"
+                    else:
+                        returncode = 2
                 else:
-                    returncode = 2
+                    # _FINISH_BUDGET: fail only if artifacts are incomplete
+                    if not phase_ok:
+                        returncode = 2
+                    else:
+                        finish_warning = None
             else:
                 returncode = 2
 
@@ -573,7 +587,10 @@ def _run_subphase(
 
             break
 
-        if returncode == 2 and last_finish_reason in _FINISH_MID_TURN:
+        if returncode == 2 and (
+            last_finish_reason in _FINISH_MID_TURN
+            or last_finish_reason in _FINISH_BUDGET
+        ):
             import os
             max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1"))
             if iteration_retry_count < max_iteration_retries:
diff --git a/tools/phases/completion.py b/tools/phases/completion.py
index be71f93d..5245bbc9 100644
--- a/tools/phases/completion.py
+++ b/tools/phases/completion.py
@@ -496,6 +496,7 @@ def _resume_opener_for_reason(reason: str) -> str:
       the gate still failed, so required artifacts are missing.
     """
     from rendering.events import (
+        _FINISH_BUDGET,
         _FINISH_FAILURE,
         _FINISH_MID_TURN,
         _FINISH_TERMINAL_OK,
@@ -508,6 +509,11 @@ def _resume_opener_for_reason(reason: str) -> str:
             f"Your previous run was cut off mid-turn (finish reason '{reason}') "
             "before completing all required artifacts."
         )
+    if reason in _FINISH_BUDGET:
+        return (
+            f"Your previous run exhausted its output budget (finish reason '{reason}') "
+            "before completing all required artifacts."
+        )
     if reason in _FINISH_FAILURE:
         return (
             f"Your previous run stopped with a failure finish reason '{reason}' "
diff --git a/tools/rendering/events/__init__.py b/tools/rendering/events/__init__.py
index 63bec010..706c883d 100644
--- a/tools/rendering/events/__init__.py
+++ b/tools/rendering/events/__init__.py
@@ -15,6 +15,8 @@
     EventRenderer,
     _FINISH_TERMINAL_OK,
     _FINISH_MID_TURN,
+    _FINISH_BUDGET,
+    _FINISH_HARD_FAILURE,
     _FINISH_FAILURE,
     _SUBAGENT_LAST_STATE,
     _reset_subagent_state,
@@ -40,6 +42,8 @@
     "EventRenderer",
     "_FINISH_TERMINAL_OK",
     "_FINISH_MID_TURN",
+    "_FINISH_BUDGET",
+    "_FINISH_HARD_FAILURE",
     "_FINISH_FAILURE",
     "_SUBAGENT_LAST_STATE",
     "_reset_subagent_state",
diff --git a/tools/rendering/events/base.py b/tools/rendering/events/base.py
index c5d6e9c2..d810a218 100644
--- a/tools/rendering/events/base.py
+++ b/tools/rendering/events/base.py
@@ -27,7 +27,18 @@
 # incomplete signal that warrants retry/resume handling.
 _FINISH_MID_TURN = {"tool-calls", "tool_use", "unknown"}
 
-_FINISH_FAILURE = {"content-filter", "content_filter", "length", "max_tokens", "error"}
+# Budget-exhaustion — the model/provider stopped because the output budget was
+# consumed (token limit reached).  The session is still viable; auto-resume can
+# let the model continue from where it was cut off.
+_FINISH_BUDGET = {"length", "max_tokens"}
+
+# Hard failure — the model/provider stopped with a non-recoverable error
+# (safety filter, internal error).  Retrying the same session is unlikely to
+# succeed.
+_FINISH_HARD_FAILURE = {"content-filter", "content_filter", "error"}
+
+# Legacy alias; kept for backward compatibility.
+_FINISH_FAILURE = _FINISH_BUDGET | _FINISH_HARD_FAILURE
 
 # Per-session dedup state for subagent update events.
 _SUBAGENT_LAST_STATE: dict[str, tuple[dict[str, Any], float]] = {}
diff --git a/tools/rendering/events/step_finish.py b/tools/rendering/events/step_finish.py
index 4050733a..867d620b 100644
--- a/tools/rendering/events/step_finish.py
+++ b/tools/rendering/events/step_finish.py
@@ -7,7 +7,7 @@
 
 from typing import Any
 
-from rendering.events.base import EventRenderer, _FINISH_FAILURE, _clear_hidden_reasoning_state
+from rendering.events.base import EventRenderer, _FINISH_FAILURE, _FINISH_BUDGET, _clear_hidden_reasoning_state
 import _colors as C
 
 
@@ -21,13 +21,13 @@ def render(self, event: dict[str, Any]) -> bool:
         tokens = self._format_tokens(part.get("tokens", {}))
         suffix = f" ({tokens})" if tokens else ""
         style = "dim"
-        if reason in _FINISH_FAILURE:
+        if reason in _FINISH_FAILURE or reason in _FINISH_BUDGET:
             style = "bold red"
         if self.rich:
             from rich.text import Text
             self.sink.write(Text(f"step finished: {reason}{suffix}", style=style))
         elif self.plain:
-            if reason in _FINISH_FAILURE:
+            if reason in _FINISH_FAILURE or reason in _FINISH_BUDGET:
                 self.sink.write_text(C.fail(f"step finished: {reason}{suffix}"))
             else:
                 self.sink.write_text(f"step finished: {reason}{suffix}")

From bbd49a22b22b5fbb9ce49ad332ecb3666337c586 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?=
 <pruiz@users.noreply.github.com>
Date: Fri, 19 Jun 2026 19:12:11 +0200
Subject: [PATCH 4/4] fix: differentiate budget-exhaustion from mid-turn in
 auto-resume messages

- Auto-resume [budget] now says output budget exhaustion instead of mid-turn
- No-session-ID fallback messages use generic wording for both cases
- Docstring updated: length moved from _FINISH_MID_TURN to _FINISH_BUDGET
- step_finish.py: removed redundant _FINISH_BUDGET check (_FINISH_FAILURE
  already covers it)
---
 tools/codecome/harness.py             | 21 +++++++++++++++------
 tools/codecome/phase_1.py             | 19 +++++++++++++------
 tools/phases/completion.py            |  9 ++++++---
 tools/rendering/events/step_finish.py |  6 +++---
 4 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/tools/codecome/harness.py b/tools/codecome/harness.py
index fcaf0895..7e420e1e 100644
--- a/tools/codecome/harness.py
+++ b/tools/codecome/harness.py
@@ -408,10 +408,18 @@ def _forward_signal(signum: int, _frame: Any) -> None:
                 max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "3"))
                 if iteration_retry_count < max_iteration_retries:
                     iteration_retry_count += 1
-                    msg = (
-                        "\n[Auto-Resume] CodeCome observed an incomplete run and will resume the same "
-                        f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})."
-                    )
+                    if last_finish_reason in _FINISH_BUDGET:
+                        msg = (
+                            "\n[Auto-Resume] CodeCome observed output budget exhaustion and will resume the same "
+                            f"session once to let the model finish the interrupted work "
+                            f"(retry {iteration_retry_count}/{max_iteration_retries})."
+                        )
+                    else:
+                        msg = (
+                            "\n[Auto-Resume] CodeCome observed an incomplete run and will resume the same "
+                            f"session once to let the model finish the interrupted work "
+                            f"(retry {iteration_retry_count}/{max_iteration_retries})."
+                        )
                     out.warn(msg)
                     if last_session_id and last_session_id != "id":
                         prompt = build_phase_resume_prompt(
@@ -421,8 +429,9 @@ def _forward_signal(signum: int, _frame: Any) -> None:
                         continue
                     else:
                         finish_warning = (
-                            "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine "
-                            "a session ID for automatic continuation. Treating the phase as incomplete."
+                            "CodeCome correctly detected that the model/provider stopped before completing "
+                            "the phase, but it could not determine a session ID for automatic continuation. "
+                            "Treating the phase as incomplete."
                         )
                         out.error("Could not determine session ID to resume.", strong=False)
                 break
diff --git a/tools/codecome/phase_1.py b/tools/codecome/phase_1.py
index de054d6f..bb953231 100644
--- a/tools/codecome/phase_1.py
+++ b/tools/codecome/phase_1.py
@@ -595,10 +595,16 @@ def _run_subphase(
             max_iteration_retries = int(os.environ.get("CODECOME_MAX_ITERATION_RETRIES", "1"))
             if iteration_retry_count < max_iteration_retries:
                 iteration_retry_count += 1
-                msg = (
-                    "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same "
-                    f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})."
-                )
+                if last_finish_reason in _FINISH_BUDGET:
+                    msg = (
+                        "\n[Auto-Resume] CodeCome observed output budget exhaustion and will resume the same "
+                        f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})."
+                    )
+                else:
+                    msg = (
+                        "\n[Auto-Resume] CodeCome observed a mid-turn model/provider cutoff and will resume the same "
+                        f"session once to let the model finish the interrupted work (retry {iteration_retry_count}/{max_iteration_retries})."
+                    )
                 out.warn(msg)
                 if last_session_id and last_session_id != "id":
                     prompt = build_phase_resume_prompt(
@@ -608,8 +614,9 @@ def _run_subphase(
                     continue
                 else:
                     finish_warning = (
-                        "CodeCome correctly detected that the model/provider stopped mid-turn, but it could not determine "
-                        "a session ID for automatic continuation. Treating the subphase as incomplete."
+                        "CodeCome correctly detected that the model/provider stopped before completing "
+                        "the phase, but it could not determine a session ID for automatic continuation. "
+                        "Treating the subphase as incomplete."
                     )
                     out.error("Could not determine session ID to resume.", strong=False)
             break
diff --git a/tools/phases/completion.py b/tools/phases/completion.py
index 5245bbc9..b0c4b4b9 100644
--- a/tools/phases/completion.py
+++ b/tools/phases/completion.py
@@ -486,9 +486,12 @@ def _resume_opener_for_reason(reason: str) -> str:
 
     - ``"infrastructure_error"`` — harness fatal-retry path.
     - A finish reason from ``rendering.events._FINISH_MID_TURN`` (e.g.
-      ``"length"``, ``"tool_use"``) — the model/provider cut off mid-turn.
-    - A finish reason from ``rendering.events._FINISH_FAILURE`` — the
-      model/provider reported a failure finish reason.
+      ``"tool_use"``, ``"unknown"``) — the model/provider cut off mid-turn.
+    - A finish reason from ``rendering.events._FINISH_BUDGET`` (e.g.
+      ``"length"``, ``"max_tokens"``) — the model/provider exhausted its
+      output budget before completing all required artifacts.
+    - A finish reason from ``rendering.events._FINISH_HARD_FAILURE`` — the
+      model/provider reported a non-recoverable failure finish reason.
     - ``"graceful_forgiveness"`` — synthesized by the harness when the
       mid-turn cutoff happened but partial artifacts were written.
     - A finish reason from ``rendering.events._FINISH_TERMINAL_OK`` (e.g.
diff --git a/tools/rendering/events/step_finish.py b/tools/rendering/events/step_finish.py
index 867d620b..4050733a 100644
--- a/tools/rendering/events/step_finish.py
+++ b/tools/rendering/events/step_finish.py
@@ -7,7 +7,7 @@
 
 from typing import Any
 
-from rendering.events.base import EventRenderer, _FINISH_FAILURE, _FINISH_BUDGET, _clear_hidden_reasoning_state
+from rendering.events.base import EventRenderer, _FINISH_FAILURE, _clear_hidden_reasoning_state
 import _colors as C
 
 
@@ -21,13 +21,13 @@ def render(self, event: dict[str, Any]) -> bool:
         tokens = self._format_tokens(part.get("tokens", {}))
         suffix = f" ({tokens})" if tokens else ""
         style = "dim"
-        if reason in _FINISH_FAILURE or reason in _FINISH_BUDGET:
+        if reason in _FINISH_FAILURE:
             style = "bold red"
         if self.rich:
             from rich.text import Text
             self.sink.write(Text(f"step finished: {reason}{suffix}", style=style))
         elif self.plain:
-            if reason in _FINISH_FAILURE or reason in _FINISH_BUDGET:
+            if reason in _FINISH_FAILURE:
                 self.sink.write_text(C.fail(f"step finished: {reason}{suffix}"))
             else:
                 self.sink.write_text(f"step finished: {reason}{suffix}")