From a98e1ec0ef42978a87880c71cfd504dc527dfa04 Mon Sep 17 00:00:00 2001 From: eshulman2 Date: Thu, 2 Jul 2026 18:15:04 +0300 Subject: [PATCH] feat: route local review feedback through implementation --- .../prompts/v1/implement-review-feedback.md | 17 +++ src/forge/prompts/v1/local-review-feature.md | 23 ++++ src/forge/workflow/bug/graph.py | 3 + src/forge/workflow/feature/graph.py | 34 ++++- src/forge/workflow/feature/state.py | 12 ++ src/forge/workflow/nodes/implementation.py | 102 +++++++++++++++ src/forge/workflow/nodes/local_reviewer.py | 123 +++++++++++------- tests/unit/workflow/feature/test_workflow.py | 27 ++++ .../workflow/nodes/test_implementation.py | 80 +++++++++++- .../test_local_review_pass_tracking_errors.py | 21 ++- ...al_review_status_comments_comprehensive.py | 34 ++--- .../workflow/nodes/test_local_reviewer.py | 118 ++++++++++++----- 12 files changed, 487 insertions(+), 107 deletions(-) create mode 100644 src/forge/prompts/v1/implement-review-feedback.md create mode 100644 src/forge/prompts/v1/local-review-feature.md diff --git a/src/forge/prompts/v1/implement-review-feedback.md b/src/forge/prompts/v1/implement-review-feedback.md new file mode 100644 index 00000000..bc6d883d --- /dev/null +++ b/src/forge/prompts/v1/implement-review-feedback.md @@ -0,0 +1,17 @@ +Address the automated local review feedback for ticket {ticket_key}. + +## Review Feedback + +{review_feedback} + +## Context + +{context} + +## Instructions + +1. Inspect the existing changes and the feedback. +2. Make the smallest code and test changes needed to address the feedback. +3. Run the relevant tests and lint commands for the touched area. +4. Commit your changes with a concise message. +5. Do not broaden the scope beyond the review feedback. diff --git a/src/forge/prompts/v1/local-review-feature.md b/src/forge/prompts/v1/local-review-feature.md new file mode 100644 index 00000000..baf60c32 --- /dev/null +++ b/src/forge/prompts/v1/local-review-feature.md @@ -0,0 +1,23 @@ +Review the code changes on this branch for breaking issues. Do not modify files. + +## Workspace + +{workspace_path} + +## Specification + +{spec_content} + +## Project Guidelines + +{guardrails} + +Run `git diff origin/main...HEAD` to understand what changed. You may run tests, lint, and read files, but you must not edit, create, delete, stage, or commit files. + +Output your verdict as one of: +- `verdict: adequate` +- `verdict: tests_incomplete` + +Followed by `feedback: `. + +Use `adequate` only if the implementation appears correct and relevant tests/lint pass or are reasonably covered. Use `tests_incomplete` if tests/lint fail, coverage is missing, behavior is incomplete, or the implementation needs changes. diff --git a/src/forge/workflow/bug/graph.py b/src/forge/workflow/bug/graph.py index 01eaf5ae..78bb4501 100644 --- a/src/forge/workflow/bug/graph.py +++ b/src/forge/workflow/bug/graph.py @@ -293,6 +293,9 @@ def _route_after_local_review(state: BugState) -> str: """Route after local_review considering qualitative verdict and retry count.""" from forge.workflow.nodes.local_reviewer import _QUALITATIVE_CAP, MAX_REVIEW_ATTEMPTS + if state.get("last_error"): + return "update_documentation" + verdict = state.get("local_review_verdict") retry_count = state.get("qualitative_retry_count", 0) diff --git a/src/forge/workflow/feature/graph.py b/src/forge/workflow/feature/graph.py index a51f010e..cbc1d299 100644 --- a/src/forge/workflow/feature/graph.py +++ b/src/forge/workflow/feature/graph.py @@ -328,6 +328,30 @@ def _route_implementation( return "implement_task" +def _route_after_local_review( + state: FeatureState, +) -> Literal["implement_task", "update_documentation"]: + """Route after one read-only local review pass.""" + from forge.workflow.nodes.local_reviewer import MAX_REVIEW_ATTEMPTS + + if state.get("last_error"): + return "update_documentation" + + if state.get("local_review_max_attempts_reached"): + return "update_documentation" + + if state.get("local_review_verdict") == "adequate": + return "update_documentation" + + if ( + state.get("local_review_has_unfixed_issues") + and state.get("local_review_attempts", 0) < MAX_REVIEW_ATTEMPTS + ): + return "implement_task" + + return "update_documentation" + + def _route_after_pr_creation( state: FeatureState, ) -> Literal["teardown_workspace", "escalate_blocked"]: @@ -411,8 +435,8 @@ def build_feature_graph() -> StateGraph: 14. task_router -> setup_workspace (or parallel fan-out) 15. setup_workspace -> implement_task 16. implement_task (all tasks done) -> local_review - 17. local_review: reviews git diff vs main, fixes breaking issues in-place (up to 2 passes) - 18. local_review -> create_pr + 17. local_review: reviews git diff vs main and emits feedback + 18. failed review under cap -> implement_task; adequate/cap -> create_pr 19. create_pr -> teardown_workspace 20. teardown_workspace -> wait_for_ci_gate (pause) or next repo 21. wait_for_ci_gate: resumes on GitHub CI webhook @@ -460,7 +484,7 @@ def build_feature_graph() -> StateGraph: graph.add_node("create_pr", create_pull_request) graph.add_node("teardown_workspace", teardown_and_route) - # Local code review node (pre-PR, fixes breaking issues in-place) + # Local code review node (pre-PR, read-only feedback gate) graph.add_node("local_review", local_review_changes) # Documentation update node (pre-PR, updates stale docs) @@ -697,8 +721,8 @@ def build_feature_graph() -> StateGraph: ) graph.add_conditional_edges( "local_review", - lambda s: s.get("current_node", "create_pr"), - {"local_review": "local_review", "create_pr": "update_documentation"}, + _route_after_local_review, + {"implement_task": "implement_task", "update_documentation": "update_documentation"}, ) graph.add_edge("update_documentation", "create_pr") graph.add_conditional_edges( diff --git a/src/forge/workflow/feature/state.py b/src/forge/workflow/feature/state.py index d67e84d9..4449496f 100644 --- a/src/forge/workflow/feature/state.py +++ b/src/forge/workflow/feature/state.py @@ -40,6 +40,14 @@ class FeatureState( parallel_branch_id: int | None parallel_total_branches: int | None + # Local review loop + local_review_attempts: int + local_review_pass_number: int + local_review_has_unfixed_issues: bool + local_review_max_attempts_reached: bool + local_review_verdict: str | None + qualitative_feedback: str | None + # Q&A mode # List of {question, answer, artifact_type, timestamp} qa_history: list[dict[str, str]] @@ -101,6 +109,10 @@ def create_initial_feature_state(ticket_key: str, **kwargs: Any) -> FeatureState "parallel_execution_enabled": True, "parallel_branch_id": None, "parallel_total_branches": None, + "local_review_has_unfixed_issues": False, + "local_review_max_attempts_reached": False, + "local_review_verdict": None, + "qualitative_feedback": None, "ci_failed_checks": [], "ci_skipped_checks": [], "ci_fix_attempt": 0, diff --git a/src/forge/workflow/nodes/implementation.py b/src/forge/workflow/nodes/implementation.py index 596e06ab..d73561ab 100644 --- a/src/forge/workflow/nodes/implementation.py +++ b/src/forge/workflow/nodes/implementation.py @@ -17,6 +17,7 @@ from forge.config import get_settings from forge.integrations.jira.client import JiraClient from forge.models.workflow import TicketType +from forge.prompts import load_prompt from forge.sandbox import ContainerRunner from forge.workflow.feature.state import FeatureState as WorkflowState from forge.workflow.nodes.error_handler import notify_error @@ -70,6 +71,13 @@ async def implement_task(state: WorkflowState) -> WorkflowState: current_task = task_key break + if not current_task and _has_review_feedback_to_fix(state): + return await _implement_review_feedback( + state, + workspace_path=workspace_path, + implementation_node=implementation_node, + ) + if not current_task: logger.info(f"All tasks implemented for {ticket_key}") @@ -203,6 +211,100 @@ def _implementation_node_name(state: WorkflowState) -> str: return "implement_bug_fix" if state.get("ticket_type") == TicketType.BUG else "implement_task" +def _has_review_feedback_to_fix(state: WorkflowState) -> bool: + """Return True when local review asked implementation to make another pass.""" + verdict = state.get("local_review_verdict") + return bool(verdict and verdict != "adequate") + + +async def _implement_review_feedback( + state: WorkflowState, + *, + workspace_path: str, + implementation_node: str, +) -> WorkflowState: + """Run the implementation container to address local review feedback.""" + ticket_key = state["ticket_key"] + current_repo = state.get("current_repo", "") + branch_name = state.get("context", {}).get("branch_name", "") + feedback = state.get("qualitative_feedback") or ( + f"Local review returned verdict {state.get('local_review_verdict')} without details." + ) + + logger.info(f"Addressing local review feedback for {ticket_key}") + + settings = get_settings() + try: + context_parts = [] + if state.get("spec_content"): + context_parts.extend(["## Specification", state.get("spec_content", "")]) + if state.get("plan_content"): + context_parts.extend(["## Approved Plan", state.get("plan_content", "")]) + if state.get("rca_content"): + context_parts.extend(["## Root Cause Analysis", state.get("rca_content", "")]) + + task_description = load_prompt( + "implement-review-feedback", + ticket_key=ticket_key, + review_feedback=feedback, + context="\n\n".join(context_parts) or "No additional context provided.", + ) + + runner = ContainerRunner(settings) + result = await runner.run( + workspace_path=Path(workspace_path), + task_summary=f"Address local review feedback for {ticket_key}", + task_description=task_description, + ticket_key=ticket_key, + task_key=f"{ticket_key}-review-fix", + repo_name=current_repo, + previous_task_keys=list(state.get("implemented_tasks", [])), + trace_context=_build_implementation_trace_context( + state, + implementation_node=implementation_node, + current_repo=current_repo, + ), + ) + + if not result.success: + error_msg = result.error_message or "Unknown container error" + logger.error(f"Review feedback fix failed for {ticket_key}: {error_msg}") + raise RuntimeError(error_msg) + + git = GitOperations( + Workspace( + path=Path(workspace_path), + repo_name=current_repo, + branch_name=branch_name, + ticket_key=ticket_key, + ) + ) + if git.has_uncommitted_changes(): + git.stage_all() + git.commit(f"[{ticket_key}] fix: address local review feedback") + + return update_state_timestamp( + { + **state, + "current_node": implementation_node, + "current_task_key": None, + "local_review_verdict": None, + "qualitative_feedback": None, + "last_error": None, + "retry_count": 0, + } + ) + except Exception as e: + logger.error(f"Failed to address local review feedback for {ticket_key}: {e}") + await notify_error(state, str(e), implementation_node) + return { + **state, + "last_error": str(e), + "current_node": implementation_node, + "retry_count": state.get("retry_count", 0) + 1, + } + + def _build_implementation_trace_context( state: WorkflowState, *, diff --git a/src/forge/workflow/nodes/local_reviewer.py b/src/forge/workflow/nodes/local_reviewer.py index ffeef0cb..06cfa73c 100644 --- a/src/forge/workflow/nodes/local_reviewer.py +++ b/src/forge/workflow/nodes/local_reviewer.py @@ -1,4 +1,4 @@ -"""Local code review node — reviews and fixes breaking issues before PR creation.""" +"""Local code review node — reviews implemented changes before PR creation.""" import logging import re @@ -20,6 +20,7 @@ MAX_REVIEW_ATTEMPTS = 2 _QUALITATIVE_CAP = 2 _VALID_VERDICTS = {"adequate", "tests_incomplete", "symptom_only"} +_VALID_FEATURE_VERDICTS = {"adequate", "tests_incomplete"} def _validate_pass_number(value: int | None) -> int | None: @@ -74,6 +75,40 @@ def _parse_bug_verdict(output: str) -> tuple[str, str]: return verdict, feedback +def _parse_feature_verdict(output: str) -> tuple[str, str]: + """Parse verdict and feedback from feature local review output.""" + verdict = "tests_incomplete" + feedback = "" + + verdict_match = re.search(r"verdict:\s*`?([a-zA-Z_]+)", output, re.IGNORECASE) + if verdict_match: + candidate = verdict_match.group(1).strip().lower() + if candidate in _VALID_FEATURE_VERDICTS: + verdict = candidate + else: + logger.warning( + f"Unrecognized feature review verdict '{candidate}', " + "defaulting to tests_incomplete" + ) + + feedback_match = re.search(r"feedback:\s*(.*)", output, re.IGNORECASE | re.DOTALL) + if feedback_match: + feedback = feedback_match.group(1).strip() + + return verdict, feedback + + +def _discard_reviewer_changes(git: GitOperations, ticket_key: str) -> None: + """Discard any file changes made by a read-only review container.""" + if not git.has_uncommitted_changes(): + return + + logger.warning( + f"Local review container modified files for {ticket_key}; discarding reviewer changes" + ) + git.reset_hard() + + def route_local_review(state: WorkflowState) -> str: """Route from local_review based on bug verdict and retry count. @@ -86,20 +121,21 @@ def route_local_review(state: WorkflowState) -> str: state: Current workflow state after local_review_changes ran. Returns: - Next node name: 'create_pr' or 'implement_bug_fix'. + Next node name recorded by the graph-specific local review router. """ return state.get("current_node", "create_pr") async def local_review_changes(state: WorkflowState) -> WorkflowState: - """Review implemented changes locally and fix breaking issues before PR creation. + """Review implemented changes locally before PR creation. For bug tickets: runs qualitative review (local-review-bug.md) that checks - root-cause alignment and test coverage. Parses verdict; routes to - implement_bug_fix on non-adequate verdicts (up to 2 retries), then create_pr. + root-cause alignment and test coverage. Parses verdict and records retry + metrics; graph routing decides whether to re-enter implementation. - For other tickets: runs mechanical review (local-review prompt) to find and - fix breaking issues in-place. + For other tickets: runs a read-only local review that evaluates the + implementation and emits feedback. Graph routing decides whether to + re-enter implementation for a fix pass. Args: state: Current workflow state. @@ -163,9 +199,7 @@ async def _run_bug_review(state: WorkflowState) -> WorkflowState: ) ) - if git.has_uncommitted_changes(): - git.stage_all() - git.commit(f"[{ticket_key}] fix: address review feedback") + _discard_reviewer_changes(git, ticket_key) output = (result.stdout or "") + (result.stderr or "") verdict, feedback = _parse_bug_verdict(output) @@ -180,7 +214,7 @@ async def _run_bug_review(state: WorkflowState) -> WorkflowState: "local_review_verdict": verdict, "qualitative_feedback": feedback or None, "qualitative_retry_count": qualitative_retry_count, - "current_node": "create_pr", + "current_node": "local_review", "last_error": None, } ) @@ -198,7 +232,7 @@ async def _run_bug_review(state: WorkflowState) -> WorkflowState: "qualitative_feedback": feedback or None, "qualitative_retry_count": new_retry_count, "qualitative_review_failed": True, - "current_node": "create_pr", + "current_node": "local_review", "last_error": None, } ) @@ -207,18 +241,15 @@ async def _run_bug_review(state: WorkflowState) -> WorkflowState: f"Bug qualitative review: verdict={verdict} for {ticket_key}, " f"retry {new_retry_count}/{_QUALITATIVE_CAP}" ) - linked_task_keys = state.get("linked_task_keys") or state.get("task_keys") or [] return update_state_timestamp( { **state, "local_review_verdict": verdict, "qualitative_feedback": feedback or None, "qualitative_retry_count": new_retry_count, - "current_node": "implement_bug_fix", + "current_node": "local_review", "last_error": None, - # Reset so implement_task re-runs the container instead of seeing "all done" - "implemented_tasks": [], - "current_task_key": linked_task_keys[0] if linked_task_keys else None, + "current_task_key": None, } ) @@ -228,14 +259,15 @@ async def _run_bug_review(state: WorkflowState) -> WorkflowState: { **state, "local_review_verdict": None, - "current_node": "create_pr", + "qualitative_feedback": None, + "current_node": "local_review", "last_error": str(e), } ) async def _run_feature_review(state: WorkflowState) -> WorkflowState: - """Run mechanical local review for non-bug tickets (existing behavior).""" + """Run read-only local review for non-bug tickets.""" ticket_key = state["ticket_key"] workspace_path = state["workspace_path"] review_attempts = state.get("local_review_attempts", 0) @@ -283,8 +315,12 @@ async def _run_feature_review(state: WorkflowState) -> WorkflowState: return update_state_timestamp( { **state, + "local_review_has_unfixed_issues": False, + "local_review_max_attempts_reached": True, "local_review_attempts": 0, - "current_node": "create_pr", + "local_review_verdict": state.get("local_review_verdict"), + "qualitative_feedback": state.get("qualitative_feedback"), + "current_node": "local_review", } ) @@ -297,7 +333,7 @@ async def _run_feature_review(state: WorkflowState) -> WorkflowState: guardrails = state.get("context", {}).get("guardrails", "") task_description = load_prompt( - "local-review", + "local-review-feature", workspace_path=workspace_path, spec_content=spec_content[:3000] if spec_content else "Not available", guardrails=guardrails[:2000] if guardrails else "", @@ -307,7 +343,7 @@ async def _run_feature_review(state: WorkflowState) -> WorkflowState: runner = ContainerRunner(settings) result = await runner.run( workspace_path=Path(workspace_path), - task_summary="Local code review — fix breaking issues", + task_summary="Local code review", task_description=task_description, ticket_key=ticket_key, task_key=f"{ticket_key}-review", @@ -323,41 +359,42 @@ async def _run_feature_review(state: WorkflowState) -> WorkflowState: ) ) - if git.has_uncommitted_changes(): - git.stage_all() - git.commit(f"[{ticket_key}] fix: address breaking issues found in local review") - logger.info(f"Committed local review fixes for {ticket_key}") + _discard_reviewer_changes(git, ticket_key) output = (result.stdout or "") + (result.stderr or "") - has_unfixed = _has_unfixed_breaking_issues(output) + verdict, feedback = _parse_feature_verdict(output) + has_unfixed = verdict != "adequate" - if has_unfixed and review_attempts + 1 < MAX_REVIEW_ATTEMPTS: + if has_unfixed: logger.warning( - f"Breaking issues remain after review attempt {review_attempts + 1}, retrying" + f"Local review found issues after attempt {review_attempts + 1}" ) next_pass = (validated_pass or 1) + 1 return update_state_timestamp( { **state, + "local_review_has_unfixed_issues": True, + "local_review_max_attempts_reached": False, + "local_review_verdict": verdict, + "qualitative_feedback": feedback or None, "local_review_attempts": review_attempts + 1, "local_review_pass_number": next_pass, "current_node": "local_review", + "last_error": None, } ) - if has_unfixed: - logger.warning( - f"Could not fix all breaking issues after {MAX_REVIEW_ATTEMPTS} attempts " - f"for {ticket_key}, proceeding to PR" - ) - else: - logger.info(f"Local review passed for {ticket_key}") + logger.info(f"Local review passed for {ticket_key}") return update_state_timestamp( { **state, + "local_review_has_unfixed_issues": False, + "local_review_max_attempts_reached": False, + "local_review_verdict": verdict, + "qualitative_feedback": feedback or None, "local_review_attempts": 0, - "current_node": "create_pr", + "current_node": "local_review", "last_error": None, } ) @@ -367,14 +404,12 @@ async def _run_feature_review(state: WorkflowState) -> WorkflowState: return update_state_timestamp( { **state, + "local_review_has_unfixed_issues": False, + "local_review_max_attempts_reached": False, "local_review_attempts": 0, - "current_node": "create_pr", + "local_review_verdict": None, + "qualitative_feedback": None, + "current_node": "local_review", "last_error": None, } ) - - -def _has_unfixed_breaking_issues(output: str) -> bool: - """Check if the review output indicates unfixed breaking issues remain.""" - lower = output.lower() - return "unfixed" in lower and "breaking" in lower diff --git a/tests/unit/workflow/feature/test_workflow.py b/tests/unit/workflow/feature/test_workflow.py index aa4c46ae..133e2f73 100644 --- a/tests/unit/workflow/feature/test_workflow.py +++ b/tests/unit/workflow/feature/test_workflow.py @@ -7,6 +7,7 @@ from forge.workflow.feature.graph import ( _route_after_epic_regeneration, _route_after_epic_task_regeneration, + _route_after_local_review, _route_after_prd_regeneration, _route_after_single_epic_update, _route_after_single_task_update, @@ -89,6 +90,32 @@ def test_create_initial_state(self): assert state["ticket_type"] == TicketType.FEATURE assert state["prd_content"] == "" + def test_local_review_routes_to_implementation_when_issues_remain_under_cap(self): + state = { + "local_review_has_unfixed_issues": True, + "local_review_verdict": "tests_incomplete", + "local_review_attempts": 1, + } + + assert _route_after_local_review(state) == "implement_task" + + def test_local_review_routes_to_docs_when_review_passes(self): + state = { + "local_review_has_unfixed_issues": False, + "local_review_verdict": "adequate", + "local_review_attempts": 0, + } + + assert _route_after_local_review(state) == "update_documentation" + + def test_local_review_routes_to_docs_at_retry_cap(self): + state = { + "local_review_has_unfixed_issues": True, + "local_review_attempts": 2, + } + + assert _route_after_local_review(state) == "update_documentation" + def test_resume_regenerate_all_epics_stays_on_regeneration_node(self): """Retrying full plan regeneration should not restart raw epic decomposition.""" state = { diff --git a/tests/unit/workflow/nodes/test_implementation.py b/tests/unit/workflow/nodes/test_implementation.py index 7c673597..0d8046e4 100644 --- a/tests/unit/workflow/nodes/test_implementation.py +++ b/tests/unit/workflow/nodes/test_implementation.py @@ -13,6 +13,7 @@ def _make_state( current_task_key="TASK-456", workspace_path="/tmp/ws", current_repo="acme/backend", + task_keys=None, tasks_by_repo=None, implemented_tasks=None, ): @@ -26,7 +27,9 @@ def _make_state( "workspace_path": workspace_path, "current_task_key": current_task_key, "current_repo": current_repo, - "task_keys": [current_task_key] if current_task_key else [], + "task_keys": ( + task_keys if task_keys is not None else ([current_task_key] if current_task_key else []) + ), "tasks_by_repo": tasks_by_repo or {current_repo: [current_task_key]}, "implemented_tasks": implemented_tasks or [], "context": {"branch_name": "forge/BUG-123", "guardrails": ""}, @@ -211,6 +214,81 @@ async def test_bug_missing_workspace_keeps_bug_implementation_node(self): assert result["current_node"] == "implement_bug_fix" assert result["last_error"] == "Workspace not set up" + @pytest.mark.asyncio + async def test_feature_review_feedback_runs_implementation_fix_pass(self): + """Failed local review routes to implementation for a dedicated fix pass.""" + from forge.workflow.nodes.implementation import implement_task + + state = _make_state( + ticket_key="FEAT-123", + ticket_type=TicketType.FEATURE, + current_task_key=None, + task_keys=[], + tasks_by_repo={"acme/backend": ["TASK-1"]}, + implemented_tasks=["TASK-1"], + ) + state["local_review_verdict"] = "tests_incomplete" + state["qualitative_feedback"] = "Add regression coverage." + runner = _make_successful_runner() + mock_git = MagicMock() + mock_git.has_uncommitted_changes.return_value = True + mock_git.stage_all = MagicMock() + mock_git.commit = MagicMock() + + with ( + patch( + "forge.workflow.nodes.implementation.ContainerRunner", + return_value=runner, + ), + patch("forge.workflow.nodes.implementation.GitOperations", return_value=mock_git), + patch("forge.workflow.nodes.implementation.Workspace", return_value=MagicMock()), + patch("forge.workflow.nodes.implementation.get_settings"), + ): + result = await implement_task(state) + + assert runner.run.call_args.kwargs["task_key"] == "FEAT-123-review-fix" + assert "Add regression coverage." in runner.run.call_args.kwargs["task_description"] + mock_git.stage_all.assert_called_once() + mock_git.commit.assert_called_once_with("[FEAT-123] fix: address local review feedback") + assert result["current_node"] == "implement_task" + assert result["local_review_verdict"] is None + assert result["qualitative_feedback"] is None + assert result["retry_count"] == 0 + + @pytest.mark.asyncio + async def test_bug_review_feedback_runs_bug_implementation_fix_pass(self): + """Bug graph uses the same implementation fix pass but keeps bug node naming.""" + from forge.workflow.nodes.implementation import implement_task + + state = _make_state( + ticket_key="BUG-123", + ticket_type=TicketType.BUG, + current_task_key=None, + task_keys=[], + tasks_by_repo={"acme/backend": ["TASK-1"]}, + implemented_tasks=["TASK-1"], + ) + state["local_review_verdict"] = "symptom_only" + state["qualitative_feedback"] = "Fix only handles a symptom." + runner = _make_successful_runner() + mock_git = MagicMock() + mock_git.has_uncommitted_changes.return_value = False + + with ( + patch( + "forge.workflow.nodes.implementation.ContainerRunner", + return_value=runner, + ), + patch("forge.workflow.nodes.implementation.GitOperations", return_value=mock_git), + patch("forge.workflow.nodes.implementation.Workspace", return_value=MagicMock()), + patch("forge.workflow.nodes.implementation.get_settings"), + ): + result = await implement_task(state) + + assert runner.run.call_args.kwargs["task_key"] == "BUG-123-review-fix" + assert result["current_node"] == "implement_bug_fix" + assert result["local_review_verdict"] is None + @pytest.mark.asyncio async def test_feature_container_failure_uses_feature_implementation_node(self): """Feature container failures must not checkpoint bug workflow node names.""" diff --git a/tests/unit/workflow/nodes/test_local_review_pass_tracking_errors.py b/tests/unit/workflow/nodes/test_local_review_pass_tracking_errors.py index c4c73845..b2990b5c 100644 --- a/tests/unit/workflow/nodes/test_local_review_pass_tracking_errors.py +++ b/tests/unit/workflow/nodes/test_local_review_pass_tracking_errors.py @@ -1,7 +1,6 @@ """Unit tests for defensive pass number tracking error handling in local_reviewer.py.""" import logging -from pathlib import Path from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -110,8 +109,8 @@ async def test_none_pass_number_posts_generic_comment(self, caplog): "🔧 Local review found issues, applying fixes.", ) - # Verify workflow continued successfully - assert result["current_node"] == "create_pr" + # Verify workflow continued successfully and left routing to the graph + assert result["current_node"] == "local_review" @pytest.mark.asyncio async def test_workflow_continues_when_pass_number_unavailable(self): @@ -148,8 +147,8 @@ async def test_workflow_continues_when_pass_number_unavailable(self): # Verify container was executed despite pass tracking failure assert mock_runner.run.called - # Verify state transitioned to create_pr - assert result["current_node"] == "create_pr" + # Verify state remains on local_review for graph routing + assert result["current_node"] == "local_review" class TestInvalidPassNumberValues: @@ -202,8 +201,8 @@ async def test_negative_pass_number_detected_and_logged(self, caplog): "🔧 Local review found issues, applying fixes.", ) - # Verify workflow continued - assert result["current_node"] == "create_pr" + # Verify workflow continued and left routing to the graph + assert result["current_node"] == "local_review" @pytest.mark.asyncio async def test_non_integer_pass_number_detected_and_logged(self, caplog): @@ -250,7 +249,7 @@ async def test_non_integer_pass_number_detected_and_logged(self, caplog): "🔧 Local review found issues, applying fixes.", ) - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" @pytest.mark.asyncio async def test_zero_pass_number_rejected_with_generic_comment(self, caplog): @@ -296,7 +295,7 @@ async def test_zero_pass_number_rejected_with_generic_comment(self, caplog): "🔧 Local review found issues, applying fixes.", ) - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" class TestNormalPassNumberLogging: @@ -514,7 +513,7 @@ async def test_pass_number_increments_correctly_after_retry(self): mock_runner = MagicMock() mock_result = MagicMock() mock_result.success = True - mock_result.stdout = "unfixed breaking issues remain" + mock_result.stdout = "verdict: tests_incomplete\n\nfeedback: breaking issues remain" mock_result.stderr = "" mock_runner.run = AsyncMock(return_value=mock_result) @@ -551,7 +550,7 @@ async def test_pass_number_recovers_from_none_and_increments(self): mock_runner = MagicMock() mock_result = MagicMock() mock_result.success = True - mock_result.stdout = "unfixed breaking issues remain" + mock_result.stdout = "verdict: tests_incomplete\n\nfeedback: breaking issues remain" mock_result.stderr = "" mock_runner.run = AsyncMock(return_value=mock_result) diff --git a/tests/unit/workflow/nodes/test_local_review_status_comments_comprehensive.py b/tests/unit/workflow/nodes/test_local_review_status_comments_comprehensive.py index b8cad415..b2ecfb82 100644 --- a/tests/unit/workflow/nodes/test_local_review_status_comments_comprehensive.py +++ b/tests/unit/workflow/nodes/test_local_review_status_comments_comprehensive.py @@ -34,12 +34,11 @@ def create_mock_container_runner(has_unfixed_issues=False): mock_result.success = True mock_result.error_message = None - # Set stdout/stderr to indicate unfixed issues if requested if has_unfixed_issues: - mock_result.stdout = "unfixed breaking issues remain" + mock_result.stdout = "verdict: tests_incomplete\n\nfeedback: breaking issues remain" mock_result.stderr = "" else: - mock_result.stdout = "all issues fixed" + mock_result.stdout = "verdict: adequate\n\nfeedback: all issues fixed" mock_result.stderr = "" mock.run = AsyncMock(return_value=mock_result) @@ -52,12 +51,13 @@ def create_mock_git_operations(has_changes=False): mock.has_uncommitted_changes = MagicMock(return_value=has_changes) mock.stage_all = MagicMock() mock.commit = MagicMock() + mock.reset_hard = MagicMock() return mock class TestPassNumberOneCommentPosting: """Tests verifying initial comment posts only when pass_number == 1. - + Acceptance Criteria: Unit tests verify initial comment posts only when pass_number == 1 """ @@ -160,7 +160,7 @@ async def test_no_initial_comment_when_pass_number_greater_than_one(self): class TestPassNumberGreaterThanOneCommentPosting: """Tests verifying fix comments post only when pass_number > 1. - + Acceptance Criteria: Unit tests verify fix comments post only when pass_number > 1 """ @@ -262,8 +262,8 @@ async def test_no_fix_comment_when_pass_number_equals_one(self): class TestCorrectPassNumberInCommentText: """Tests verifying correct pass number appears in comment text. - - Acceptance Criteria: Unit tests verify correct pass number appears in comment text + + Acceptance Criteria: Unit tests verify correct pass number appears in comment text for passes 2, 3, 4, 5+ """ @@ -436,7 +436,7 @@ async def test_comment_shows_high_pass_number_correctly(self): class TestGracefulHandlingWhenPassNumberUnavailable: """Tests verifying graceful handling when pass_number unavailable. - + Acceptance Criteria: Unit tests verify graceful handling when pass_number unavailable """ @@ -505,7 +505,7 @@ async def test_workflow_completes_successfully_without_pass_number(self): # Verify workflow completed successfully assert result is not None - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" assert mock_jira.close.called @pytest.mark.asyncio @@ -532,12 +532,12 @@ async def test_no_error_when_pass_number_none(self): ) as mock_post_status, ): mock_post_status.return_value = AsyncMock() - + # Should not raise exception try: result = await local_review_changes(state) - # Verify workflow completed - assert result["current_node"] == "create_pr" + # Verify review state is ready for graph routing + assert result["current_node"] == "local_review" except Exception as e: pytest.fail(f"Should handle None pass_number gracefully but raised: {e}") @@ -565,12 +565,12 @@ async def test_handles_pass_number_zero_gracefully(self): ) as mock_post_status, ): mock_post_status.return_value = AsyncMock() - + # Should not raise exception result = await local_review_changes(state) - # Verify workflow completed - assert result["current_node"] == "create_pr" + # Verify review state is ready for graph routing + assert result["current_node"] == "local_review" # With pass_number=0 (invalid), should use generic fallback comment assert mock_post_status.call_count == 1 @@ -592,10 +592,10 @@ async def test_comment_posted_before_container_execution(self): call_order = [] - async def track_post_status(*args, **kwargs): + async def track_post_status(*_args, **_kwargs): call_order.append("post_status_comment") - async def track_container_run(*args, **kwargs): + async def track_container_run(*_args, **_kwargs): call_order.append("container_run") mock_result = MagicMock() mock_result.success = True diff --git a/tests/unit/workflow/nodes/test_local_reviewer.py b/tests/unit/workflow/nodes/test_local_reviewer.py index 7a78a0ff..e54a6d19 100644 --- a/tests/unit/workflow/nodes/test_local_reviewer.py +++ b/tests/unit/workflow/nodes/test_local_reviewer.py @@ -7,6 +7,7 @@ from forge.models.workflow import TicketType from forge.workflow.nodes.local_reviewer import ( _parse_bug_verdict, + _parse_feature_verdict, local_review_changes, route_local_review, ) @@ -73,6 +74,7 @@ def _make_mock_git(has_changes=False): git.has_uncommitted_changes.return_value = has_changes git.stage_all = MagicMock() git.commit = MagicMock() + git.reset_hard = MagicMock() return git @@ -121,6 +123,27 @@ def test_verdict_in_inline_code_backticks_parses_correctly(self): assert verdict == "adequate" +class TestParseFeatureVerdict: + """Tests for the _parse_feature_verdict helper.""" + + def test_parses_adequate(self): + output = "verdict: adequate\n\nfeedback: Everything is correct." + verdict, feedback = _parse_feature_verdict(output) + assert verdict == "adequate" + assert "Everything is correct" in feedback + + def test_parses_tests_incomplete(self): + output = "verdict: tests_incomplete\n\nfeedback: Missing coverage." + verdict, feedback = _parse_feature_verdict(output) + assert verdict == "tests_incomplete" + assert "Missing coverage" in feedback + + def test_unknown_verdict_defaults_to_tests_incomplete(self): + output = "verdict: maybe\n\nfeedback: unclear" + verdict, _ = _parse_feature_verdict(output) + assert verdict == "tests_incomplete" + + class TestLocalReviewBug: """Tests for bug-specific local review behavior.""" @@ -158,8 +181,8 @@ async def run(self, workspace_path, task_description="", **_kwargs): # noqa: AR assert "Password validator" in desc or "Fix regex" in desc or "validators.py" in desc @pytest.mark.asyncio - async def test_adequate_verdict_routes_to_create_pr(self, base_bug_review_state): - """'adequate' verdict → routes to create_pr.""" + async def test_adequate_verdict_records_review_state(self, base_bug_review_state): + """'adequate' verdict is recorded for graph routing.""" runner = _make_mock_runner("verdict: adequate\n\nfeedback: Looks good.") mock_git = _make_mock_git() mock_workspace = MagicMock() @@ -171,12 +194,12 @@ async def test_adequate_verdict_routes_to_create_pr(self, base_bug_review_state) ): result = await local_review_changes(base_bug_review_state) - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" assert result["local_review_verdict"] == "adequate" @pytest.mark.asyncio async def test_tests_incomplete_increments_retry(self, base_bug_review_state): - """'tests_incomplete' verdict → qualitative_retry_count incremented, routes to implement_bug_fix.""" + """'tests_incomplete' verdict increments qualitative_retry_count for graph routing.""" runner = _make_mock_runner( "verdict: tests_incomplete\n\nfeedback: Tests do not fail without fix." ) @@ -190,14 +213,14 @@ async def test_tests_incomplete_increments_retry(self, base_bug_review_state): ): result = await local_review_changes(base_bug_review_state) - assert result["current_node"] == "implement_bug_fix" + assert result["current_node"] == "local_review" assert result["qualitative_retry_count"] == 1 assert result["local_review_verdict"] == "tests_incomplete" assert "Tests do not fail" in (result["qualitative_feedback"] or "") @pytest.mark.asyncio async def test_symptom_only_increments_retry(self, base_bug_review_state): - """'symptom_only' verdict → qualitative_retry_count incremented, routes to implement_bug_fix.""" + """'symptom_only' verdict increments qualitative_retry_count for graph routing.""" runner = _make_mock_runner("verdict: symptom_only\n\nfeedback: Root cause not addressed.") mock_git = _make_mock_git() mock_workspace = MagicMock() @@ -209,12 +232,12 @@ async def test_symptom_only_increments_retry(self, base_bug_review_state): ): result = await local_review_changes(base_bug_review_state) - assert result["current_node"] == "implement_bug_fix" + assert result["current_node"] == "local_review" assert result["qualitative_retry_count"] == 1 @pytest.mark.asyncio - async def test_retry_uses_task_keys_when_linked_task_keys_empty(self, base_bug_review_state): - """When linked_task_keys is empty, task_keys is used to reset current_task_key on retry.""" + async def test_retry_leaves_task_key_empty_for_review_feedback_fix(self, base_bug_review_state): + """Failed review asks implementation for a feedback fix pass, not a task rerun.""" base_bug_review_state["linked_task_keys"] = [] base_bug_review_state["task_keys"] = ["TASK-789"] runner = _make_mock_runner("verdict: tests_incomplete\n\nfeedback: Missing tests.") @@ -228,11 +251,11 @@ async def test_retry_uses_task_keys_when_linked_task_keys_empty(self, base_bug_r ): result = await local_review_changes(base_bug_review_state) - assert result["current_task_key"] == "TASK-789" + assert result["current_task_key"] is None @pytest.mark.asyncio - async def test_cap_at_two_retries_routes_to_create_pr(self, base_bug_review_state): - """qualitative_retry_count >= 2 → routes to create_pr with qualitative_review_failed=True.""" + async def test_cap_at_two_retries_records_failed_review(self, base_bug_review_state): + """qualitative_retry_count >= 2 records qualitative_review_failed=True.""" base_bug_review_state["qualitative_retry_count"] = 1 # Already 1, will become 2 → cap runner = _make_mock_runner("verdict: tests_incomplete\n\nfeedback: Still missing tests.") mock_git = _make_mock_git() @@ -245,10 +268,28 @@ async def test_cap_at_two_retries_routes_to_create_pr(self, base_bug_review_stat ): result = await local_review_changes(base_bug_review_state) - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" assert result["qualitative_review_failed"] is True assert result["qualitative_retry_count"] == 2 + @pytest.mark.asyncio + async def test_reviewer_changes_are_discarded_not_committed(self, base_bug_review_state): + """Bug reviewer output is feedback only; file edits are discarded.""" + runner = _make_mock_runner("verdict: tests_incomplete\n\nfeedback: Missing tests.") + mock_git = _make_mock_git(has_changes=True) + mock_workspace = MagicMock() + + with ( + patch("forge.workflow.nodes.local_reviewer.ContainerRunner", return_value=runner), + patch("forge.workflow.nodes.local_reviewer.GitOperations", return_value=mock_git), + patch("forge.workflow.nodes.local_reviewer.Workspace", return_value=mock_workspace), + ): + await local_review_changes(base_bug_review_state) + + mock_git.reset_hard.assert_called_once() + mock_git.stage_all.assert_not_called() + mock_git.commit.assert_not_called() + class TestRouteLocalReview: """Tests for the route_local_review routing function.""" @@ -284,7 +325,7 @@ async def run(self, **_kwargs): ): result = await local_review_changes(base_bug_review_state) - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" assert result["last_error"] is not None assert "Container crashed" in result["last_error"] @@ -309,16 +350,16 @@ async def run(self, **_kwargs): ): result = await local_review_changes(base_bug_review_state) - assert result["current_node"] == "create_pr" + assert result["current_node"] == "local_review" assert result["local_review_verdict"] is None class TestLocalReviewFeature: - """Tests that feature tickets use existing non-bug behavior.""" + """Tests that feature tickets use read-only local review behavior.""" @pytest.mark.asyncio - async def test_feature_uses_existing_prompt(self, base_feature_review_state): - """Feature ticket → existing prompt behavior (no qualitative verdict parsing).""" + async def test_feature_uses_read_only_prompt(self, base_feature_review_state): + """Feature ticket → read-only prompt behavior with verdict parsing.""" captured_desc = [] class _CapturingRunner: @@ -327,7 +368,7 @@ async def run(self, workspace_path, task_description="", **_kwargs): # noqa: AR result = MagicMock() result.success = True result.exit_code = 0 - result.stdout = "No issues found." + result.stdout = "verdict: adequate\n\nfeedback: No issues found." result.stderr = "" return result @@ -344,13 +385,14 @@ async def run(self, workspace_path, task_description="", **_kwargs): # noqa: AR ): result = await local_review_changes(base_feature_review_state) - assert result["current_node"] == "create_pr" - assert result.get("local_review_verdict") is None + assert result["current_node"] == "local_review" + assert result["local_review_has_unfixed_issues"] is False + assert result["local_review_verdict"] == "adequate" @pytest.mark.asyncio - async def test_feature_no_qualitative_fields_set(self, base_feature_review_state): - """Feature ticket → qualitative_retry_count and verdict not modified.""" - runner = _make_mock_runner("verdict: adequate\n\nfeedback: Good.") + async def test_feature_tests_incomplete_records_feedback(self, base_feature_review_state): + """Feature reviewer feedback drives a later implementation fix pass.""" + runner = _make_mock_runner("verdict: tests_incomplete\n\nfeedback: Add regression tests.") mock_git = _make_mock_git() mock_workspace = MagicMock() @@ -361,8 +403,26 @@ async def test_feature_no_qualitative_fields_set(self, base_feature_review_state ): result = await local_review_changes(base_feature_review_state) - # Feature state should not have qualitative fields modified - assert ( - "qualitative_retry_count" not in result or result.get("qualitative_retry_count") is None - ) - assert "local_review_verdict" not in result or result.get("local_review_verdict") is None + assert result["local_review_has_unfixed_issues"] is True + assert result["local_review_verdict"] == "tests_incomplete" + assert "Add regression tests" in (result["qualitative_feedback"] or "") + + @pytest.mark.asyncio + async def test_feature_reviewer_changes_are_discarded_not_committed( + self, base_feature_review_state + ): + """Feature reviewer output is feedback only; file edits are discarded.""" + runner = _make_mock_runner("verdict: tests_incomplete\n\nfeedback: Missing tests.") + mock_git = _make_mock_git(has_changes=True) + mock_workspace = MagicMock() + + with ( + patch("forge.workflow.nodes.local_reviewer.ContainerRunner", return_value=runner), + patch("forge.workflow.nodes.local_reviewer.GitOperations", return_value=mock_git), + patch("forge.workflow.nodes.local_reviewer.Workspace", return_value=mock_workspace), + ): + await local_review_changes(base_feature_review_state) + + mock_git.reset_hard.assert_called_once() + mock_git.stage_all.assert_not_called() + mock_git.commit.assert_not_called()