diff --git a/containers/Containerfile b/containers/Containerfile
index 7ac1f211..23e87998 100644
--- a/containers/Containerfile
+++ b/containers/Containerfile
@@ -30,8 +30,9 @@ RUN pip install --no-cache-dir \
 # Create workspace directory (will be mounted at runtime)
 RUN mkdir -p /workspace && chmod 777 /workspace
 
-# Copy entrypoint script
+# Copy entrypoint and review module
 COPY entrypoint.py /usr/local/bin/forge-entrypoint.py
+COPY review.py /usr/local/bin/review.py
 RUN chmod +x /usr/local/bin/forge-entrypoint.py
 
 # Set working directory
diff --git a/containers/entrypoint.py b/containers/entrypoint.py
index ae737e8e..1ab6da97 100644
--- a/containers/entrypoint.py
+++ b/containers/entrypoint.py
@@ -19,9 +19,20 @@
 import os
 import subprocess
 import sys
+import time
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 
+from review import (
+    ReviewCycleData,
+    Verdict,
+    detect_review_md,
+    parse_review_config,
+    parse_verdict,
+    write_cycle_file,
+)
+
 # Configure logging
 log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
 logging.basicConfig(
@@ -31,10 +42,52 @@
 )
 logger = logging.getLogger(__name__)
 
+# TTY detection for terminal progress display
+_IS_TTY = sys.stdout.isatty()
+
+
+def _print_review_progress(
+    cycle: int,
+    max_cycles: int,
+    verdict: str,
+    feedback: str,
+) -> None:
+    """Print review cycle progress to terminal (TTY only).
+
+    Displays progress in format: "Review cycle N/M: VERDICT - [truncated feedback]"
+    Only prints when stdout is a TTY (local/terminal mode).
+
+    Args:
+        cycle: Current cycle number (1-indexed).
+        max_cycles: Maximum cycles allowed.
+        verdict: Review verdict (e.g., "approved", "rejected").
+        feedback: Reviewer feedback text to truncate.
+    """
+    if not _IS_TTY:
+        return
+
+    # Build progress message
+    verdict_upper = verdict.upper()
+    message = f"Review cycle {cycle}/{max_cycles}: {verdict_upper}"
+
+    # Append truncated feedback for rejections
+    if feedback:
+        max_feedback_len = 200
+        if len(feedback) > max_feedback_len:
+            truncated = feedback[:max_feedback_len] + "..."
+        else:
+            truncated = feedback
+        message = f"{message} - {truncated}"
+
+    # Print immediately (flush ensures display within 1 second of call)
+    print(message, flush=True)
+
+
 # Enable LangChain debug/verbose mode if requested
 if os.environ.get("LANGCHAIN_VERBOSE", "").lower() in ("true", "1", "yes"):
     try:
         from langchain_core.globals import set_debug, set_verbose
+
         set_verbose(True)
         set_debug(True)
         logger.info("LangChain verbose/debug mode enabled")
@@ -344,7 +397,9 @@ async def run_agent_task(
         trace_context: Workflow fields forwarded to Langfuse only.
     """
     # Support both new (LLM_MODEL) and legacy (CLAUDE_MODEL) env var names
-    model_name = os.environ.get("LLM_MODEL") or os.environ.get("CLAUDE_MODEL", "claude-sonnet-4-5@20250929")
+    model_name = os.environ.get("LLM_MODEL") or os.environ.get(
+        "CLAUDE_MODEL", "claude-sonnet-4-5@20250929"
+    )
     logger.info(f"Implementing task: {task_summary}")
     logger.info(f"Model: {model_name}")
 
@@ -487,9 +542,7 @@ async def run_agent_task(
 
         # Run the agent (with Langfuse session context if enabled)
         initial_message = {
-            "messages": [
-                {"role": "user", "content": f"Implement this task:\n\n{task_description}"}
-            ]
+            "messages": [{"role": "user", "content": f"Implement this task:\n\n{task_description}"}]
         }
 
         if langfuse_enabled:
@@ -550,6 +603,305 @@ async def run_agent_task(
         return False
 
 
+async def run_reviewer_agent(
+    workspace: Path,
+    review_instructions: str,
+    task_key: str,
+) -> str:
+    """Run reviewer agent with review.md instructions.
+
+    Args:
+        workspace: Path to the workspace directory.
+        review_instructions: Instructions from review.md body.
+        task_key: Jira task key for tracing.
+
+    Returns:
+        The reviewer agent's output text.
+    """
+    # Support both new (LLM_MODEL) and legacy (CLAUDE_MODEL) env var names
+    model_name = os.environ.get("LLM_MODEL") or os.environ.get(
+        "CLAUDE_MODEL", "claude-sonnet-4-5@20250929"
+    )
+    logger.info(f"Running reviewer agent for {task_key}")
+    logger.info(f"Model: {model_name}")
+
+    from deepagents import create_deep_agent
+    from deepagents.backends import LocalShellBackend
+
+    # Check for API credentials
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    vertex_project = os.environ.get("ANTHROPIC_VERTEX_PROJECT_ID")
+
+    if not api_key and not vertex_project:
+        raise RuntimeError(
+            "No API credentials found (ANTHROPIC_API_KEY or ANTHROPIC_VERTEX_PROJECT_ID)"
+        )
+
+    # Create the agent with local shell backend (read-only access for review)
+    backend = LocalShellBackend(
+        root_dir=str(workspace),
+        inherit_env=True,
+        virtual_mode=False,
+        timeout=300,  # 5 minutes for review
+    )
+
+    # Build reviewer system prompt
+    system_prompt = f"""You are a code reviewer agent. Your job is to review the implementation and provide a verdict.
+
+## Review Instructions
+{review_instructions}
+
+## Verdict Format
+After reviewing the code, you MUST output your verdict as either:
+- APPROVED - if the implementation meets all requirements
+- REJECTED - followed by your feedback if the implementation needs changes
+
+Example outputs:
+- "The implementation looks good. APPROVED"
+- "REJECTED: The function is missing error handling. Please add try/except blocks."
+
+Be specific in your feedback if rejecting."""
+
+    # Determine model type (Gemini vs Claude)
+    is_gemini = model_name.lower().startswith(("gemini", "models/gemini"))
+
+    # Get max tokens from env (default 8192 for review)
+    max_tokens = int(os.environ.get("LLM_MAX_TOKENS", "8192"))
+
+    if vertex_project:
+        if is_gemini:
+            from langchain_google_genai import ChatGoogleGenerativeAI
+
+            model = ChatGoogleGenerativeAI(
+                model=model_name,
+                project=vertex_project,
+                location=os.environ.get("ANTHROPIC_VERTEX_REGION", "us-east5"),
+                vertexai=True,
+                max_output_tokens=max_tokens,
+            )
+        else:
+            from langchain_google_vertexai.model_garden import ChatAnthropicVertex
+
+            model = ChatAnthropicVertex(
+                model_name=model_name,
+                project=vertex_project,
+                location=os.environ.get("ANTHROPIC_VERTEX_REGION", "us-east5"),
+                max_tokens=max_tokens,
+            )
+    else:
+        if is_gemini:
+            raise RuntimeError(f"Gemini model '{model_name}' requires Vertex AI credentials")
+
+        from langchain_anthropic import ChatAnthropic
+
+        model = ChatAnthropic(
+            model=model_name,
+            api_key=api_key,
+            max_tokens=max_tokens,
+        )
+
+    # Create reviewer agent (no skills needed for review)
+    agent = create_deep_agent(
+        model=model,
+        backend=backend,
+        system_prompt=system_prompt,
+    )
+
+    # Run the reviewer
+    initial_message = {
+        "messages": [
+            {
+                "role": "user",
+                "content": "Please review the implementation in the workspace and provide your verdict.",
+            }
+        ]
+    }
+
+    result = await agent.ainvoke(initial_message)
+
+    # Extract output text from result
+    messages = result.get("messages", [])
+    if messages:
+        last_message = messages[-1]
+        content = getattr(last_message, "content", str(last_message))
+        return content if isinstance(content, str) else str(content)
+    return ""
+
+
+def load_conversation_history(workspace: Path, task_key: str) -> list[dict] | None:
+    """Load conversation history from .forge/history/{task_key}.json.
+
+    Args:
+        workspace: Path to the workspace directory.
+        task_key: Jira task key to load history for.
+
+    Returns:
+        List of message dicts or None if file doesn't exist.
+    """
+    history_file = workspace / ".forge" / "history" / f"{task_key}.json"
+    if not history_file.exists():
+        logger.warning(f"Conversation history not found: {history_file}")
+        return None
+
+    try:
+        history_data = json.loads(history_file.read_text())
+        return history_data.get("messages", [])
+    except Exception as e:
+        logger.warning(f"Failed to load conversation history: {e}")
+        return None
+
+
+async def run_worker_with_feedback(
+    workspace: Path,
+    task_key: str,
+    task_summary: str,
+    task_description: str,
+    guardrails: str,
+    feedback: str,
+    previous_task_keys: list[str] | None = None,
+) -> bool:
+    """Re-run worker agent with reviewer feedback injected.
+
+    Args:
+        workspace: Path to the workspace directory.
+        task_key: Jira task key being implemented.
+        task_summary: Short task summary.
+        task_description: Detailed task description.
+        guardrails: Repository guidelines.
+        feedback: Reviewer feedback to inject.
+        previous_task_keys: List of previously implemented task keys.
+
+    Returns:
+        True if agent completed successfully.
+    """
+    # Inject reviewer feedback section into task description
+    feedback_section = f"\n\n## Reviewer Feedback\n\n{feedback}\n"
+    enhanced_description = task_description + feedback_section
+
+    logger.info(f"Re-running worker with feedback for {task_key}")
+
+    # Load conversation history for context
+    history = load_conversation_history(workspace, task_key)
+    if history:
+        logger.info(f"Loaded {len(history)} messages from conversation history")
+
+    return await run_agent_task(
+        workspace=workspace,
+        task_key=task_key,
+        task_summary=task_summary,
+        task_description=enhanced_description,
+        guardrails=guardrails,
+        previous_task_keys=previous_task_keys,
+    )
+
+
+async def run_review_loop(
+    workspace: Path,
+    task_key: str,
+    task_summary: str,
+    task_description: str,
+    guardrails: str,
+    skill_name: str,
+    review_md_path: Path,
+    previous_task_keys: list[str] | None = None,
+) -> bool:
+    """Run the review loop after initial skill execution.
+
+    Args:
+        workspace: Path to the workspace directory.
+        task_key: Jira task key being implemented.
+        task_summary: Short task summary.
+        task_description: Detailed task description.
+        guardrails: Repository guidelines.
+        skill_name: Name of the skill being reviewed.
+        review_md_path: Path to the review.md file.
+        previous_task_keys: List of previously implemented task keys.
+
+    Returns:
+        True if review loop completed successfully (approved or max retries).
+    """
+    # Parse review configuration (SC-004, SC-005)
+    config = parse_review_config(review_md_path)
+    max_retries = config.max_retries
+    instructions = config.instructions
+
+    logger.info(f"Starting review loop for {skill_name} (max_retries={max_retries})")
+    logger.info(
+        f"Review instructions: {instructions[:200]}..."
+        if len(instructions) > 200
+        else f"Review instructions: {instructions}"
+    )
+
+    cycle = 0
+    while cycle < max_retries:
+        cycle += 1
+        cycle_start = time.perf_counter()
+        logger.info(f"Review cycle {cycle}/{max_retries}")
+
+        # Run reviewer agent (SC-001)
+        try:
+            reviewer_output = await run_reviewer_agent(
+                workspace=workspace,
+                review_instructions=instructions,
+                task_key=task_key,
+            )
+        except Exception as e:
+            logger.error(f"Reviewer agent failed: {e}")
+            # Treat reviewer failure as rejection
+            reviewer_output = f"REJECTED: Reviewer agent error: {e}"
+
+        # Parse verdict (SC-002)
+        verdict, feedback = parse_verdict(reviewer_output)
+        elapsed = time.perf_counter() - cycle_start
+        timestamp = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+        # Display terminal progress immediately (SC-011)
+        _print_review_progress(cycle, max_retries, verdict, feedback)
+
+        logger.info(f"Review verdict: {verdict}, elapsed: {elapsed:.2f}s")
+        if feedback:
+            logger.info(
+                f"Feedback: {feedback[:200]}..." if len(feedback) > 200 else f"Feedback: {feedback}"
+            )
+
+        # Write cycle file (SC-007)
+        cycle_data = ReviewCycleData(
+            cycle=cycle,
+            max_cycles=max_retries,
+            verdict=verdict,
+            feedback=feedback,
+            skill=skill_name,
+            elapsed_seconds=elapsed,
+            timestamp=timestamp,
+        )
+        write_cycle_file(workspace, skill_name, cycle_data)
+
+        # On APPROVED: Exit loop successfully (SC-002)
+        if verdict == Verdict.APPROVED:
+            logger.info(f"Review approved on cycle {cycle}")
+            return True
+
+        # On REJECTED with retries remaining (SC-003)
+        if cycle < max_retries:
+            logger.info(f"Retrying with feedback (attempt {cycle + 1}/{max_retries})")
+            worker_success = await run_worker_with_feedback(
+                workspace=workspace,
+                task_key=task_key,
+                task_summary=task_summary,
+                task_description=task_description,
+                guardrails=guardrails,
+                feedback=feedback,
+                previous_task_keys=previous_task_keys,
+            )
+            if not worker_success:
+                logger.error(f"Worker retry failed on cycle {cycle}")
+                # Continue to next cycle anyway, let reviewer decide
+
+    # Max retries exhausted - exit with success (BR-005)
+    logger.warning(f"Review loop exhausted max_retries ({max_retries}) for {skill_name}")
+    return True
+
+
 def main():
     parser = argparse.ArgumentParser(
         description="Forge container entrypoint for AI code implementation"
@@ -597,9 +949,11 @@ def main():
         previous_task_keys = task_data.get("previous_task_keys", [])
         raw_trace_context = task_data.get("trace_context", {})
         trace_context = raw_trace_context if isinstance(raw_trace_context, dict) else {}
+        skill_name = task_data.get("skill_name", "")
     elif args.task_summary and args.task_description:
         task_summary = args.task_summary
         task_description = args.task_description
+        skill_name = ""
     else:
         logger.error(
             "Task details required: use --task-file or --task-summary + --task-description"
@@ -611,6 +965,10 @@ def main():
         logger.error(f"Workspace not found: {workspace}")
         sys.exit(EXIT_CONFIG_ERROR)
 
+    # Get skill_name from env var if not in task data
+    if not skill_name:
+        skill_name = os.environ.get("FORGE_SKILL_NAME", "")
+
     # Configure git for commits
     configure_git()
 
@@ -646,16 +1004,49 @@ def main():
         logger.error("Task implementation failed")
         sys.exit(EXIT_TASK_FAILED)
 
+    # Check for review.md and run review loop if it exists (SC-001, SC-010)
+    if skill_name:
+        # Skills are mounted at /skills/skill_0/, /skills/skill_1/, etc.
+        # Check all skill paths for the review.md
+        skills_base = Path("/skills")
+        review_md_path = detect_review_md(skill_name, task_key, skills_base)
+
+        if review_md_path:
+            logger.info(f"Found review.md at {review_md_path}, starting review loop")
+            if not asyncio.run(
+                run_review_loop(
+                    workspace=workspace,
+                    task_key=task_key,
+                    task_summary=task_summary,
+                    task_description=task_description,
+                    guardrails=guardrails,
+                    skill_name=skill_name,
+                    review_md_path=review_md_path,
+                    previous_task_keys=previous_task_keys,
+                )
+            ):
+                logger.error("Review loop failed")
+                sys.exit(EXIT_TASK_FAILED)
+        else:
+            logger.info(f"No review.md found for skill {skill_name}, skipping review loop")
+    else:
+        logger.info("No skill_name provided, skipping review loop")
+
     # Ensure changes are committed (agent should have done this, but as fallback).
     # Skip if workspace is not a git repo — analysis tasks (RCA, reflection) write
     # artifacts to .forge/ without needing a commit.
-    is_git_repo = subprocess.run(
-        ["git", "rev-parse", "--is-inside-work-tree"],
-        cwd=workspace,
-        capture_output=True,
-    ).returncode == 0
+    is_git_repo = (
+        subprocess.run(
+            ["git", "rev-parse", "--is-inside-work-tree"],
+            cwd=workspace,
+            capture_output=True,
+        ).returncode
+        == 0
+    )
     if is_git_repo:
-        fallback_message = f"[{task_key}] {task_summary}\n\nAuto-committed by Forge container fallback."
+        fallback_message = (
+            f"[{task_key}] {task_summary}\n\nAuto-committed by Forge container fallback."
+        )
         if not git_commit(workspace, fallback_message):
             logger.error("Failed to commit changes")
             sys.exit(EXIT_TASK_FAILED)
diff --git a/containers/review.py b/containers/review.py
new file mode 100644
index 00000000..720b6946
--- /dev/null
+++ b/containers/review.py
@@ -0,0 +1,309 @@
+"""Review loop data structures for the container review engine.
+
+This module provides core data structures used by the review loop engine:
+
+- ReviewConfig: Configuration parsed from review.md frontmatter
+- ReviewCycleData: Data captured for each review cycle iteration
+- Verdict: Enum for review outcomes (APPROVED/REJECTED)
+- parse_review_config: Parser for review.md YAML frontmatter
+- detect_review_md: Locates review.md with project override precedence
+- parse_verdict: Extracts verdict and feedback from review output text
+- write_cycle_file: Writes review cycle data to JSON file
+"""
+
+import json
+import logging
+import os
+import re
+from dataclasses import asdict, dataclass
+from enum import StrEnum
+from pathlib import Path
+
+import yaml
+
+logger = logging.getLogger(__name__)
+
+# Default max retries for review loops
+DEFAULT_MAX_RETRIES = 3
+
+# Environment variable for max retries override
+ENV_MAX_RETRIES = "AUTO_REVIEW_MAX_RETRIES"
+
+
+class Verdict(StrEnum):
+    """Review verdict indicating approval or rejection."""
+
+    APPROVED = "approved"
+    REJECTED = "rejected"
+
+
+@dataclass
+class ReviewConfig:
+    """Configuration for review loops parsed from review.md frontmatter.
+
+    Attributes:
+        max_retries: Maximum number of retry attempts (default: 3).
+        instructions: Review instructions from the markdown body.
+    """
+
+    max_retries: int = DEFAULT_MAX_RETRIES
+    instructions: str = ""
+
+
+@dataclass
+class ReviewCycleData:
+    """Data captured for a single review cycle iteration.
+
+    Attributes:
+        cycle: Current cycle number (1-indexed).
+        max_cycles: Maximum cycles allowed.
+        verdict: Review outcome ("approved" or "rejected").
+        feedback: Reviewer feedback text.
+        skill: Name of the skill that performed the review.
+        elapsed_seconds: Time taken for this review cycle.
+        timestamp: ISO 8601 UTC timestamp of cycle completion.
+    """
+
+    cycle: int
+    max_cycles: int
+    verdict: str
+    feedback: str
+    skill: str
+    elapsed_seconds: float
+    timestamp: str
+
+
+# Regex pattern for YAML frontmatter: --- at start, optional content, --- delimiter
+# The frontmatter content between delimiters may be empty (just newlines)
+_FRONTMATTER_PATTERN = re.compile(
+    r"\A---\s*\n(.*?)---\s*\n?(.*)",
+    re.DOTALL,
+)
+
+
+def parse_review_config(review_md_path: Path) -> ReviewConfig:
+    """Parse ReviewConfig from a review.md file with YAML frontmatter.
+
+    The file format is:
+    ```
+    ---
+    max_retries: 5
+    ---
+    Review instructions here...
+    ```
+
+    Priority for max_retries:
+    1. YAML frontmatter value
+    2. AUTO_REVIEW_MAX_RETRIES environment variable
+    3. Default value (3)
+
+    If the file does not exist or YAML parsing fails, returns defaults
+    with a warning log (per BR-006).
+
+    Args:
+        review_md_path: Path to the review.md file.
+
+    Returns:
+        ReviewConfig with parsed values or defaults.
+    """
+    # Get env var fallback for max_retries
+    env_max_retries: int | None = None
+    env_value = os.environ.get(ENV_MAX_RETRIES)
+    if env_value is not None:
+        try:
+            env_max_retries = int(env_value)
+        except ValueError:
+            logger.warning(
+                "Invalid %s value %r, ignoring",
+                ENV_MAX_RETRIES,
+                env_value,
+            )
+
+    # Default config to return on errors
+    default_max_retries = env_max_retries if env_max_retries is not None else DEFAULT_MAX_RETRIES
+
+    if not review_md_path.exists():
+        logger.warning("Review config not found at %s, using defaults", review_md_path)
+        return ReviewConfig(max_retries=default_max_retries, instructions="")
+
+    try:
+        content = review_md_path.read_text(encoding="utf-8")
+    except OSError as e:
+        logger.warning("Failed to read %s: %s, using defaults", review_md_path, e)
+        return ReviewConfig(max_retries=default_max_retries, instructions="")
+
+    # Try to parse frontmatter
+    match = _FRONTMATTER_PATTERN.match(content)
+    if not match:
+        # No frontmatter found - treat entire content as instructions
+        return ReviewConfig(max_retries=default_max_retries, instructions=content.strip())
+
+    frontmatter_raw = match.group(1)
+    instructions = match.group(2).strip()
+
+    # Parse YAML frontmatter
+    try:
+        frontmatter = yaml.safe_load(frontmatter_raw)
+    except yaml.YAMLError as e:
+        logger.warning(
+            "Malformed YAML frontmatter in %s: %s, using defaults",
+            review_md_path,
+            e,
+        )
+        return ReviewConfig(max_retries=default_max_retries, instructions=instructions)
+
+    # Handle empty frontmatter
+    if frontmatter is None:
+        frontmatter = {}
+
+    # Extract max_retries with fallback chain
+    max_retries = default_max_retries
+    if isinstance(frontmatter, dict) and "max_retries" in frontmatter:
+        fm_value = frontmatter["max_retries"]
+        if isinstance(fm_value, int):
+            max_retries = fm_value
+        else:
+            logger.warning(
+                "Invalid max_retries value %r in %s, using default",
+                fm_value,
+                review_md_path,
+            )
+
+    return ReviewConfig(max_retries=max_retries, instructions=instructions)
+
+
+def detect_review_md(skill_name: str, ticket_key: str, skills_base: Path) -> Path | None:
+    """Detect the review.md file for a skill with project override precedence.
+
+    Mirrors the precedence logic from src/forge/skills/resolver.py:
+    project-specific override wins over default.
+
+    Search order:
+    1. skills/{project}/{skill_name}/review.md (project override)
+    2. skills/default/{skill_name}/review.md (default)
+
+    Args:
+        skill_name: Name of the skill (e.g., "local-code-review").
+        ticket_key: Jira ticket key (e.g., "AISOS-123") to extract project.
+        skills_base: Base path to the skills directory.
+
+    Returns:
+        Path to review.md if found, None otherwise (SC-010).
+    """
+    # Extract project key from ticket_key (e.g., "AISOS-123" -> "aisos")
+    project: str | None = None
+    if "-" in ticket_key:
+        project = ticket_key.split("-")[0].lower()
+
+    # Check project override first (if project can be extracted)
+    if project:
+        project_path = skills_base / project / skill_name / "review.md"
+        if project_path.is_file():
+            logger.debug(
+                "Found project override review.md: %s",
+                project_path,
+            )
+            return project_path
+
+    # Fall back to default
+    default_path = skills_base / "default" / skill_name / "review.md"
+    if default_path.is_file():
+        logger.debug("Found default review.md: %s", default_path)
+        return default_path
+
+    # No review.md found in either location (SC-010)
+    logger.debug(
+        "No review.md found for skill %r (ticket %s)",
+        skill_name,
+        ticket_key,
+    )
+    return None
+
+
+def parse_verdict(output_text: str) -> tuple[Verdict, str]:
+    """Extract verdict and feedback from review output text.
+
+    Performs case-insensitive search for "APPROVED" and "REJECTED" markers.
+    When both markers are present, the first occurrence wins (checks APPROVED first).
+
+    Args:
+        output_text: The raw output text from the review process.
+
+    Returns:
+        Tuple of (Verdict, feedback_string):
+        - (Verdict.APPROVED, "") when APPROVED marker is found first
+        - (Verdict.REJECTED, feedback) when REJECTED marker is found first,
+          where feedback is the text following the marker
+        - (Verdict.REJECTED, "Verdict could not be parsed") when neither marker found
+    """
+    # Normalize for case-insensitive matching
+    text_upper = output_text.upper()
+
+    # Find positions of both markers (case-insensitive)
+    approved_pos = text_upper.find("APPROVED")
+    rejected_pos = text_upper.find("REJECTED")
+
+    # Neither marker found
+    if approved_pos == -1 and rejected_pos == -1:
+        return (Verdict.REJECTED, "Verdict could not be parsed")
+
+    # Determine which marker comes first
+    # If APPROVED found and (REJECTED not found OR APPROVED comes first)
+    if approved_pos != -1 and (rejected_pos == -1 or approved_pos < rejected_pos):
+        return (Verdict.APPROVED, "")
+
+    # REJECTED marker found first (or only REJECTED found)
+    # Extract feedback from text following the REJECTED marker
+    # Skip past "REJECTED" keyword (8 characters)
+    feedback_start = rejected_pos + len("REJECTED")
+    feedback = output_text[feedback_start:].strip()
+
+    return (Verdict.REJECTED, feedback)
+
+
+def write_cycle_file(workspace: Path, step_name: str, cycle_data: ReviewCycleData) -> None:
+    """Write review cycle data to a JSON file.
+
+    Creates the directory .forge/{step_name}/ if it doesn't exist,
+    and writes review_cycle_N.json where N is the cycle number.
+
+    The JSON file contains all ReviewCycleData fields with proper formatting:
+    - cycle: Current cycle number (1-indexed)
+    - max_cycles: Maximum cycles allowed
+    - verdict: Lowercase string ("approved" or "rejected")
+    - feedback: Reviewer feedback text
+    - skill: Name of the skill that performed the review
+    - elapsed_seconds: Time taken for this review cycle
+    - timestamp: ISO 8601 UTC format timestamp
+
+    Args:
+        workspace: Path to the workspace root directory.
+        step_name: Name of the step (used as subdirectory name).
+        cycle_data: ReviewCycleData instance to serialize.
+    """
+    # Create .forge/{step_name}/ directory if it doesn't exist
+    output_dir = workspace / ".forge" / step_name
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Build output path: .forge/{step_name}/review_cycle_N.json
+    output_file = output_dir / f"review_cycle_{cycle_data.cycle}.json"
+
+    # Convert dataclass to dict
+    data = asdict(cycle_data)
+
+    # Ensure verdict is lowercase string (handle Verdict enum or string)
+    verdict_value = data["verdict"]
+    if hasattr(verdict_value, "value"):
+        # It's a Verdict enum
+        data["verdict"] = verdict_value.value
+    else:
+        # It's already a string, ensure lowercase
+        data["verdict"] = str(verdict_value).lower()
+
+    # Write JSON with proper formatting (2-space indent for readability)
+    output_file.write_text(
+        json.dumps(data, indent=2, ensure_ascii=False) + "\n",
+        encoding="utf-8",
+    )
+
+    logger.debug("Wrote review cycle file: %s", output_file)
diff --git a/docs/guide/auto-review.md b/docs/guide/auto-review.md
new file mode 100644
index 00000000..eb8b8df0
--- /dev/null
+++ b/docs/guide/auto-review.md
@@ -0,0 +1,419 @@
+# Auto-Review
+
+Auto-review is a quality gate that runs after skill execution. It spawns a reviewer agent to evaluate the skill's output and triggers retries with feedback when the result doesn't meet quality criteria.
+
+## Overview
+
+When a skill has a `review.md` file in its directory, Forge automatically invokes a reviewer agent after the skill completes. The reviewer evaluates the output according to instructions in `review.md` and issues a verdict:
+
+- **APPROVED** — The output meets quality criteria; workflow continues
+- **REJECTED** — The output has issues; skill re-runs with feedback injected
+
+```mermaid
+flowchart LR
+    A[Skill Executes] --> B{review.md exists?}
+    B -->|No| C[Continue Workflow]
+    B -->|Yes| D[Spawn Reviewer Agent]
+    D --> E{Verdict?}
+    E -->|APPROVED| C
+    E -->|REJECTED| F{Retries left?}
+    F -->|Yes| G[Re-run Skill with Feedback]
+    G --> D
+    F -->|No| C
+```
+
+### When to Use Auto-Review
+
+Auto-review is useful when:
+
+- A skill produces artifacts that need quality validation before proceeding
+- You want to catch common mistakes (missing tests, debug code, documentation gaps) without human intervention
+- You need consistent quality standards across all executions of a skill
+
+Auto-review is **not** a replacement for human code review — it catches machine-detectable issues early in the pipeline.
+
+## Quick Start
+
+Create a `review.md` file in your skill directory:
+
+```
+skills/
+└── default/
+    └── implement-task/
+        ├── SKILL.md
+        └── review.md      ← Reviewer configuration
+```
+
+Minimal `review.md`:
+
+```markdown
+---
+max_retries: 3
+---
+
+# Quality Check
+
+Verify the implementation:
+
+1. All new public functions have tests
+2. No debug print statements in committed code
+3. Error handling uses specific exception types
+
+Output APPROVED if all criteria pass, or REJECTED with specific feedback.
+```
+
+That's it. The next time `implement-task` runs, the reviewer will evaluate its output.
+
+## Configuration
+
+### `max_retries` in Frontmatter
+
+The `max_retries` field in YAML frontmatter controls how many times a skill can retry after a REJECTED verdict:
+
+```markdown
+---
+max_retries: 5
+---
+
+Review instructions here...
+```
+
+### `AUTO_REVIEW_MAX_RETRIES` Environment Variable
+
+Set a global default for all skills that don't specify `max_retries` in their `review.md`:
+
+```bash
+# In .env or environment
+AUTO_REVIEW_MAX_RETRIES=5
+```
+
+### Priority Chain
+
+When determining retry limit, Forge checks in order:
+
+| Priority | Source | Example |
+|----------|--------|---------|
+| 1 (highest) | Frontmatter `max_retries` | `max_retries: 5` in review.md |
+| 2 | Environment variable | `AUTO_REVIEW_MAX_RETRIES=5` |
+| 3 (lowest) | Built-in default | `3` |
+
+### Default Value
+
+If neither frontmatter nor environment variable is set, the default `max_retries` is **3**.
+
+## Writing Review Instructions
+
+The prose body of `review.md` (everything after the `---` closing delimiter) becomes the reviewer agent's instructions. Write clear, actionable criteria.
+
+### Effective Review Instructions
+
+**Do:**
+
+- Define explicit pass/fail criteria for each quality dimension
+- Include examples of acceptable vs. flagged patterns
+- Specify the expected output format (APPROVED/REJECTED markers)
+- Reference specific files or artifacts the reviewer should examine
+
+**Don't:**
+
+- Write vague instructions ("make sure the code is good")
+- Include implementation details unrelated to review
+- Duplicate instructions from the skill's SKILL.md
+
+### Example: Comprehensive Review
+
+```markdown
+---
+max_retries: 3
+---
+
+# Implementation Review
+
+Review code changes via `git diff origin/main...HEAD`.
+
+## Criteria
+
+### Test Coverage
+- Public functions must have tests
+- Flag: new public function without corresponding test
+
+### Error Handling
+- No bare `except:` or empty `catch {}`
+- Flag: generic error swallowing
+
+### Documentation
+- Public APIs need docstrings
+- Flag: public function without documentation
+
+## Verdict
+
+Output exactly one:
+- `APPROVED` — all criteria pass
+- `REJECTED` — followed by specific, actionable feedback
+```
+
+See [skills/default/implement-task/review.md](https://github.com/your-org/forge/blob/main/skills/default/implement-task/review.md) for a complete example.
+
+## Verdict Protocol
+
+The reviewer must output exactly one verdict marker. Forge scans the output text for these markers.
+
+### APPROVED
+
+Use when all criteria pass:
+
+```
+APPROVED
+```
+
+Or with optional notes:
+
+```
+APPROVED
+
+Notes:
+- Consider adding edge case tests (non-blocking)
+```
+
+### REJECTED
+
+Use when any criterion fails. Always include specific, actionable feedback:
+
+```
+REJECTED
+
+Issues:
+- [Test Coverage] src/handler.py:42 — `process_data()` is public but has no test
+- [Error Handling] pkg/db.go:87 — error from `Query()` discarded with `_`
+
+Required changes:
+1. Add test for `process_data()` in `test_handler.py`
+2. Handle or propagate error from `Query()` at line 87
+```
+
+### Verdict Detection
+
+Forge performs case-insensitive substring matching for `APPROVED` and `REJECTED`:
+
+- First marker found wins (if both appear, the one occurring first determines verdict)
+- Marker can appear anywhere in the output (start, middle, end)
+- Surrounding text is captured as feedback for REJECTED verdicts
+
+### No Verdict Found
+
+If neither marker appears, Forge treats the output as **REJECTED** with the feedback:
+
+```
+Verdict could not be parsed
+```
+
+This prevents silent failures from malformed reviewer output.
+
+## Per-Project Overrides
+
+Review configuration follows the same precedence rules as skill files. See [Authoring Skills](../skills/authoring.md) for the general pattern.
+
+### Resolution Order
+
+For a ticket `MYPROJECT-123` running skill `implement-task`:
+
+| Priority | Path | Description |
+|----------|------|-------------|
+| 1 (highest) | `skills/myproject/implement-task/review.md` | Project override |
+| 2 | `skills/default/implement-task/review.md` | Default configuration |
+
+### Override Behavior
+
+- If the project override exists, it **completely replaces** the default (no merging)
+- If neither exists, the skill runs **without a reviewer**
+- Project key is extracted from ticket key and lowercased (`MYPROJECT-123` → `myproject`)
+
+### Example: Project-Specific Review
+
+```
+skills/
+├── default/
+│   └── implement-task/
+│       └── review.md         ← Generic criteria for all projects
+└── myproject/
+    └── implement-task/
+        └── review.md         ← Stricter criteria for myproject
+```
+
+The `myproject` override might add additional criteria:
+
+```markdown
+---
+max_retries: 5
+---
+
+# MyProject Code Review
+
+All default criteria plus:
+
+## MyProject-Specific
+
+### Performance
+- No N+1 database queries
+- Bulk operations must use batching
+
+### Security
+- All user input must be validated
+- No raw SQL queries (use ORM)
+```
+
+## Observability
+
+### Review Cycle Files
+
+Each review cycle writes a JSON file to the workspace:
+
+```
+.forge/{step-name}/review_cycle_N.json
+```
+
+Where:
+- `{step-name}` is the workflow step (e.g., `implement_task`, `local_review`)
+- `N` is the cycle number (1-indexed)
+
+**Example path:** `.forge/implement_task/review_cycle_1.json`
+
+### File Contents
+
+```json
+{
+  "cycle": 1,
+  "max_cycles": 3,
+  "verdict": "rejected",
+  "feedback": "Missing tests for process_data() function",
+  "skill": "implement-task",
+  "elapsed_seconds": 12.5,
+  "timestamp": "2024-01-15T10:30:00Z"
+}
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `cycle` | int | Current cycle number (1-indexed) |
+| `max_cycles` | int | Maximum allowed cycles |
+| `verdict` | string | `"approved"` or `"rejected"` |
+| `feedback` | string | Reviewer feedback text (empty for approved) |
+| `skill` | string | Name of the skill being reviewed |
+| `elapsed_seconds` | float | Time spent on this review cycle |
+| `timestamp` | string | ISO 8601 UTC timestamp |
+
+### Prometheus Metrics
+
+Forge exposes these metrics at `/metrics`:
+
+| Metric | Type | Labels | Description |
+|--------|------|--------|-------------|
+| `forge_review_cycles_total` | Counter | `skill`, `step` | Total review cycles detected |
+| `forge_review_verdicts_total` | Counter | `skill`, `step`, `verdict` | Verdicts by outcome |
+| `forge_review_duration_seconds` | Histogram | `skill`, `step` | Review cycle duration |
+
+### Terminal Progress
+
+In local/terminal mode, Forge prints progress during review loops:
+
+```
+Review cycle 1/3: REJECTED - Missing tests for process_data() function...
+Review cycle 2/3: REJECTED - Tests added but error handling still missing...
+Review cycle 3/3: APPROVED
+```
+
+Feedback is truncated to 200 characters in terminal output.
+
+## Troubleshooting
+
+### Reviewer Times Out
+
+**Symptom:** Review cycle takes > 5 minutes and fails.
+
+**Causes:**
+- Reviewer instructions are too complex or ambiguous
+- Large codebase with many files to examine
+- Reviewer agent loops or gets stuck
+
+**Solutions:**
+- Simplify review criteria to focus on specific, checkable items
+- Use `git diff` instead of reading entire files
+- Add explicit instructions to limit scope
+
+### Verdict Parsing Failures
+
+**Symptom:** Every cycle shows "Verdict could not be parsed" even though reviewer output looks correct.
+
+**Causes:**
+- Marker is misspelled (`APROVED`, `REJECT`)
+- Marker appears inside a code block (not detected)
+- Non-ASCII characters in marker
+
+**Solutions:**
+- Ensure reviewer outputs exactly `APPROVED` or `REJECTED`
+- Add explicit instructions: "Output the word APPROVED or REJECTED on its own line"
+- Check reviewer output in cycle JSON files
+
+### Endless Loops (Max Retries Hit)
+
+**Symptom:** Skill always exhausts `max_retries` without passing.
+
+**Causes:**
+- Review criteria are impossible to satisfy automatically
+- Feedback is not actionable by the skill
+- Skill doesn't understand the feedback format
+
+**Solutions:**
+- Review the feedback in cycle JSON files — is it specific enough?
+- Simplify criteria to things the skill can actually fix
+- Increase `max_retries` if issues are intermittent
+- Consider whether the criteria should cause rejection at all
+
+### review.md Not Detected
+
+**Symptom:** Skill runs without review even though `review.md` exists.
+
+**Causes:**
+- File is in wrong directory or has wrong name
+- File is named `Review.md` (wrong case)
+- Project override path is incorrect
+
+**Solutions:**
+- Verify exact path: `skills/{project}/{skill-name}/review.md`
+- Check file is named `review.md` (lowercase)
+- Verify project key extraction (`PROJ-123` → `proj`)
+
+### YAML Frontmatter Errors
+
+**Symptom:** `max_retries` setting is ignored.
+
+**Causes:**
+- Frontmatter is malformed YAML
+- `max_retries` value is not an integer
+- Missing `---` delimiters
+
+**Solutions:**
+- Validate YAML syntax
+- Use integer value: `max_retries: 3` (not `"3"` or `three`)
+- Ensure file starts with `---` on first line
+- Check worker logs for "Malformed YAML" warnings
+
+### Feedback Not Injected on Retry
+
+**Symptom:** Skill re-runs but doesn't see previous feedback.
+
+**Causes:**
+- Conversation history file missing or corrupted
+- Feedback format not recognized by skill
+
+**Solutions:**
+- Check `.forge/history/{task_key}.json` exists
+- Verify "## Reviewer Feedback" section appears in retry prompt
+- Review skill's handling of feedback sections
+
+## Related Documentation
+
+- [Authoring Skills](../skills/authoring.md) — General skill authoring guide
+- [review.md Schema](../reference/review-md-schema.md) — Complete file format specification
+- [Feature Workflow](feature-workflow.md) — How auto-review fits in the feature pipeline
+- [Bug Workflow](bug-workflow.md) — How auto-review fits in the bug pipeline
diff --git a/docs/reference/config.md b/docs/reference/config.md
index 72f94b5d..34dd861a 100644
--- a/docs/reference/config.md
+++ b/docs/reference/config.md
@@ -93,6 +93,16 @@ Use these to skip the Jira project property requirement during local development
 | `CONTAINER_MEMORY_LIMIT` | Memory limit for task containers (default: `4g`) |
 | `CONTAINER_CPU_LIMIT` | CPU limit for task containers (default: `2`) |
 
+## Auto-Review
+
+Settings for the automatic review loop that runs after skill execution. See the [Auto-Review Guide](../guide/auto-review.md) for details.
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `AUTO_REVIEW_MAX_RETRIES` | `3` | Default maximum retry attempts when a skill's `review.md` doesn't specify `max_retries` |
+| `AUTO_REVIEW_POLL_INTERVAL` | `5.0` | Polling interval in seconds for detecting review cycle files during container execution |
+| `AUTO_REVIEW_RECORD_POLLED_FILES` | (none) | Recording mode for polled review cycle files: `log` logs cycle data at INFO level, `copy` copies files to recording directory |
+
 ## Observability
 
 ### Langfuse Tracing
diff --git a/docs/reference/review-md-schema.md b/docs/reference/review-md-schema.md
new file mode 100644
index 00000000..49b6afd8
--- /dev/null
+++ b/docs/reference/review-md-schema.md
@@ -0,0 +1,228 @@
+# review.md Schema
+
+This document specifies the file format and validation rules for `review.md` files used to configure skill-specific review loops.
+
+## Overview
+
+A `review.md` file configures how an AI reviewer evaluates the output of a skill before proceeding. It specifies retry limits and provides review instructions that guide the reviewer's analysis.
+
+## File Location
+
+### Naming Convention
+
+The file must be named exactly `review.md` (lowercase) and placed alongside `SKILL.md` in a skill directory:
+
+```
+skills/
+├── default/
+│   └── local-code-review/
+│       ├── SKILL.md          # Skill definition
+│       └── review.md         # Review configuration
+└── myproject/
+    └── local-code-review/
+        ├── SKILL.md          # Project-specific skill override
+        └── review.md         # Project-specific review override
+```
+
+### Override Precedence
+
+Review configuration follows the same precedence rules as skills:
+
+| Priority | Path | Description |
+|----------|------|-------------|
+| 1 (highest) | `skills/{project}/{skill-name}/review.md` | Project-specific override |
+| 2 | `skills/default/{skill-name}/review.md` | Default configuration |
+
+The project key is extracted from the Jira ticket key and lowercased. For ticket `MYPROJECT-123`, Forge checks `skills/myproject/` first.
+
+**Examples:**
+
+- Ticket `AISOS-456` with skill `local-code-review`:
+  1. Check `skills/aisos/local-code-review/review.md`
+  2. Fall back to `skills/default/local-code-review/review.md`
+
+- If neither path exists, the skill runs **without a reviewer**.
+
+## File Format
+
+The `review.md` file uses YAML frontmatter followed by Markdown prose, consistent with `SKILL.md` files:
+
+```markdown
+---
+max_retries: 5
+---
+
+# Review Instructions
+
+Verify that the implementation meets all acceptance criteria...
+```
+
+### YAML Frontmatter Schema
+
+The frontmatter section is delimited by `---` markers and contains YAML key-value pairs:
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `max_retries` | `int` | No | `3` | Maximum retry attempts after a REJECTED verdict |
+
+**Frontmatter rules:**
+
+- The opening `---` must be the first line of the file (no leading whitespace or blank lines)
+- The closing `---` must appear on its own line
+- Unknown fields are silently ignored (forward compatibility)
+- Field values must match the expected type; invalid types trigger a warning and use defaults
+
+**Priority for `max_retries`:**
+
+1. Frontmatter value (if valid integer)
+2. `AUTO_REVIEW_MAX_RETRIES` environment variable (if set and valid)
+3. Built-in default: `3`
+
+### Prose Body
+
+Everything after the closing `---` delimiter is treated as reviewer instructions. This Markdown content is passed to the reviewer agent as its system prompt.
+
+**Best practices for instructions:**
+
+- Define what constitutes approval vs. rejection
+- Specify quality criteria and acceptance thresholds
+- Reference any artifacts the reviewer should examine (e.g., `.forge/` files, test output)
+- Keep instructions focused on review criteria, not implementation details
+
+**Example:**
+
+```markdown
+---
+max_retries: 3
+---
+
+# Code Review Checklist
+
+Evaluate the implementation against these criteria:
+
+## Required (reject if missing)
+- [ ] All acceptance criteria from the task are addressed
+- [ ] No obvious security vulnerabilities introduced
+- [ ] Tests cover the new functionality
+
+## Recommended (note but don't reject)
+- [ ] Code follows existing patterns
+- [ ] Documentation updated if needed
+
+## Output Format
+
+End your review with exactly one of:
+- `APPROVED` — all required criteria pass
+- `REJECTED` — followed by specific feedback for the implementer
+```
+
+## Edge Cases
+
+### Empty File
+
+A `review.md` file with zero bytes or only whitespace:
+
+- Reviewer spawns with **no instructions** (empty prompt)
+- Uses default `max_retries: 3` (or env var if set)
+
+### Frontmatter-Only File
+
+A file with frontmatter but no prose body:
+
+```markdown
+---
+max_retries: 5
+---
+```
+
+- Reviewer spawns with **no instructions** (empty prompt)
+- Uses the specified `max_retries` value
+
+### Missing Frontmatter
+
+A file without `---` delimiters:
+
+```markdown
+Review the code for bugs and style issues.
+```
+
+- Entire file content becomes reviewer instructions
+- Uses default `max_retries: 3` (or env var if set)
+
+### Malformed YAML
+
+If the frontmatter contains invalid YAML:
+
+```markdown
+---
+max_retries: "not a number"
+---
+
+Instructions here...
+```
+
+- A warning is logged
+- Default `max_retries` is used
+- Prose body is still extracted and used as instructions
+
+### No review.md Found
+
+If no `review.md` exists in either the project or default skill directory:
+
+- **No reviewer is spawned** — the skill output is accepted without review
+- This is the expected behavior for skills that don't require review
+
+## Review Cycle Output
+
+Each review cycle writes its results to:
+
+```
+.forge/{step-name}/review_cycle_N.json
+```
+
+Where:
+- `{step-name}` is the workflow step (e.g., `implement_task`, `local_code_review`)
+- `N` is the 1-indexed cycle number
+
+**Example:** `.forge/implement_task/review_cycle_1.json`
+
+**JSON schema:**
+
+```json
+{
+  "cycle": 1,
+  "max_cycles": 3,
+  "verdict": "rejected",
+  "feedback": "Missing test coverage for edge case...",
+  "skill": "local-code-review",
+  "elapsed_seconds": 45.2,
+  "timestamp": "2024-01-15T10:30:00Z"
+}
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `cycle` | `int` | Current cycle number (1-indexed) |
+| `max_cycles` | `int` | Maximum cycles allowed (from `max_retries + 1`) |
+| `verdict` | `string` | `"approved"` or `"rejected"` |
+| `feedback` | `string` | Reviewer feedback (empty string if approved) |
+| `skill` | `string` | Name of the skill that performed the review |
+| `elapsed_seconds` | `float` | Time taken for this review cycle |
+| `timestamp` | `string` | ISO 8601 UTC timestamp of cycle completion |
+
+## Validation Summary
+
+| Condition | Behavior |
+|-----------|----------|
+| File missing | No reviewer spawned |
+| File empty | Reviewer spawns with no instructions, default retries |
+| No frontmatter | Entire content used as instructions, default retries |
+| Empty frontmatter | Prose used as instructions, default retries |
+| Malformed YAML | Warning logged, prose used, default retries |
+| Invalid `max_retries` type | Warning logged, default used |
+| Unknown frontmatter fields | Silently ignored |
+
+## See Also
+
+- [Authoring Skills](../skills/authoring.md) — how to create and customize skills
+- [Configuration](config.md) — environment variables including `AUTO_REVIEW_MAX_RETRIES`
diff --git a/docs/skills/authoring.md b/docs/skills/authoring.md
index bf08bd6c..6dcb86e7 100644
--- a/docs/skills/authoring.md
+++ b/docs/skills/authoring.md
@@ -102,6 +102,22 @@ Write `.forge/fix-plan.md` with:
 3. Proposed fix approach
 ```
 
+## Auto-Review
+
+Skills can include a `review.md` file alongside `SKILL.md` to enable automatic quality gates. When present, Forge spawns a reviewer agent after the skill completes, evaluating the output against criteria you define.
+
+```
+skills/
+└── myteam/
+    └── implement-task/
+        ├── SKILL.md
+        └── review.md      ← reviewer instructions
+```
+
+The reviewer issues APPROVED or REJECTED verdicts. On rejection, the skill re-runs with feedback injected — up to `max_retries` attempts (default: 3).
+
+See the [Auto-Review Guide](../guide/auto-review.md) for configuration options and writing effective review instructions.
+
 ## Using your skills with Forge
 
 See [Customize Forge for your project](../dev/contributing.md#customize-forge-for-your-project) for how to point a Jira project at your skills repo using `forge project-setup` and the skill installer.
diff --git a/docs/skills/defaults.md b/docs/skills/defaults.md
index b2a9f83f..edeb380b 100644
--- a/docs/skills/defaults.md
+++ b/docs/skills/defaults.md
@@ -50,6 +50,8 @@ Drives the code implementation agent running inside an ephemeral container.
 
 **Scope:** Given a task file at `.forge/task.json`, implement the code, run tests, and commit. No external network access.
 
+**Auto-review:** Includes `review.md` that checks test coverage, error handling, documentation, and debug code before PR creation. See [Auto-Review Guide](../guide/auto-review.md).
+
 ---
 
 ### `local-code-review`
@@ -58,6 +60,8 @@ Reviews the implementation diff against `main` before PR creation.
 
 **Focus:** Breaking changes, test failures, security issues, and spec mismatches. Up to 2 fix passes.
 
+**Auto-review:** Includes `review.md` that validates test suite execution and checks for breaking changes, security issues, and spec alignment. See [Auto-Review Guide](../guide/auto-review.md).
+
 ---
 
 ### `analyze-ci`
diff --git a/docs/skills/index.md b/docs/skills/index.md
index 875c8c06..b3f55b90 100644
--- a/docs/skills/index.md
+++ b/docs/skills/index.md
@@ -40,7 +40,8 @@ skills/
 │   └── review-code/
 └── {project}/            # Per-project overrides (Jira key, lowercase)
     └── {skill-name}/
-        └── SKILL.md
+        ├── SKILL.md
+        └── review.md     # Optional auto-review config
 ```
 
 ## Available Skills
@@ -59,6 +60,12 @@ skills/
 | `review-code` | AI PR review against spec |
 | `analyze-bug` | Bug RCA generation |
 
+### Optional Files
+
+| File | Purpose |
+|------|---------|
+| `review.md` | Auto-review configuration — when present, Forge spawns a reviewer agent after skill execution. See [Auto-Review Guide](../guide/auto-review.md). |
+
 ## The Primary Way to Contribute
 
 Writing a skill set for your team's stack is the fastest way to make Forge work better for your project — and to share that knowledge with others using similar tooling.
diff --git a/pyproject.toml b/pyproject.toml
index 2c0875a8..dcf7d8af 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,7 @@ dependencies = [
     "opentelemetry-exporter-otlp>=1.27.0",
     "md-to-adf>=1.1.0",
     "pyyaml>=6.0.0",
+    "aiofiles>=24.1.0",
 ]
 
 [project.optional-dependencies]
diff --git a/skills/aisos/implement-task/review.md b/skills/aisos/implement-task/review.md
new file mode 100644
index 00000000..65d9ec9d
--- /dev/null
+++ b/skills/aisos/implement-task/review.md
@@ -0,0 +1,126 @@
+---
+max_retries: 2
+---
+
+# Implementation Review — Forge/AISOS Override
+
+This is a project-specific override for the Forge codebase. It inherits from the default review criteria in `skills/default/implement-task/review.md` and adds Forge-specific requirements.
+
+> **For skill authors**: This file demonstrates per-project override patterns. Copy this structure to create overrides for your own project by placing it in `skills/{project-key-lowercase}/implement-task/review.md`.
+
+## Base Criteria (Inherited)
+
+Apply all criteria from the default `review.md`:
+
+1. **Test Coverage** — Public functions must have tests
+2. **Error Handling** — Explicit exception handling, no bare `except:`
+3. **Documentation** — Public APIs need docstrings
+4. **No Debug Code** — No stray `print()`, TODOs, or commented-out code
+
+## Forge-Specific Criteria
+
+These project-specific criteria extend the defaults for the Forge/AISOS codebase.
+
+### 5. Testing with pytest
+
+All tests must use pytest, not unittest or generic "run tests" approaches.
+
+- Test files must be in `tests/` and match `test_*.py` naming
+- Use pytest fixtures for setup, not `setUp()` methods
+- Use `pytest.raises()` for exception testing, not try/except in tests
+- Parameterized tests use `@pytest.mark.parametrize`, not loops
+- Async tests use `pytest-asyncio` with `@pytest.mark.asyncio`
+
+**Flag if**: `unittest.TestCase` subclasses, `self.assertEqual()` patterns, or missing pytest markers for async tests.
+
+### 6. Type Hints on Public Functions
+
+All public functions and methods must have type annotations.
+
+- Return types are required (use `-> None` for procedures)
+- Use `X | None` instead of `Optional[X]` (PEP 604)
+- Use `list[T]` and `dict[K, V]` instead of `List` and `Dict` from typing
+- Complex types should use `TypeAlias` or `TypedDict` for clarity
+
+**Flag if**: Public function missing parameter types or return type annotation.
+
+**Acceptable**: Private functions (prefixed with `_`) without full annotations.
+
+### 7. Logging via logging Module
+
+Use the `logging` module for all diagnostic output. Never use `print()` for logging.
+
+- Import pattern: `logger = logging.getLogger(__name__)`
+- Use appropriate log levels: `debug`, `info`, `warning`, `error`, `exception`
+- Exception logging must use `logger.exception()` or pass `exc_info=True`
+- Log messages should be lazy-formatted: `logger.info("Processing %s", item)` not f-strings
+
+**Flag if**: `print()` calls for diagnostic output, f-string log messages, exceptions logged without stack traces.
+
+**Acceptable**: `print()` in CLI entrypoints for user-facing output, test assertions.
+
+### 8. Async/Await Patterns
+
+Code using asyncio must follow proper async patterns.
+
+- Async functions must be awaited at call sites
+- Use `asyncio.gather()` for concurrent operations, not sequential awaits in loops
+- Context managers for async resources use `async with`
+- No blocking I/O (`time.sleep()`, sync HTTP calls) inside async functions
+- Use `contextlib.suppress()` instead of try/except/pass patterns
+
+**Flag if**: 
+- `await` inside a for-loop when operations are independent (should use `gather()`)
+- Sync `sleep()` or blocking calls in async context
+- Missing `await` on coroutine calls
+- `asyncio.run()` called from within an async context
+
+**Acceptable**: Sequential awaits when operations have dependencies.
+
+## Verdict
+
+After reviewing all criteria (base + Forge-specific), output exactly one marker:
+
+```
+APPROVED
+```
+
+Use when all criteria pass or only minor issues exist that don't block merge.
+
+```
+REJECTED
+```
+
+Use when any criterion fails with blocking issues.
+
+## Feedback Format
+
+On rejection, include the criterion number and name for Forge-specific violations:
+
+```
+REJECTED
+
+Issues:
+- [Criterion: Test Coverage] src/forge/api/handler.py:42 — `process_webhook()` is public but has no test
+- [Criterion: Type Hints] src/forge/models/task.py:15 — `parse_task()` missing return type annotation
+- [Criterion: Logging] src/forge/worker.py:87 — uses `print()` for error output instead of logger
+- [Criterion: Async/Await] src/forge/queue/consumer.py:32 — sequential awaits in loop should use `asyncio.gather()`
+
+Required changes:
+1. Add test for `process_webhook()` in `tests/unit/test_handler.py`
+2. Add `-> Task` return type to `parse_task()` function signature
+3. Replace `print(f"Error: {e}")` with `logger.error("Error: %s", e)`
+4. Refactor loop to: `await asyncio.gather(*[process(item) for item in items])`
+```
+
+## Override Configuration
+
+This file demonstrates per-project customization:
+
+| Setting | Default | This Override | Rationale |
+|---------|---------|---------------|-----------|
+| `max_retries` | 3 | 2 | Forge CI is fast; 2 retries is sufficient |
+| Test framework | generic | pytest | Forge uses pytest exclusively |
+| Type hints | optional | required | Forge enforces mypy strict mode |
+| Logging | generic | logging module | Forge uses structured logging |
+| Async patterns | n/a | strict | Forge uses asyncio throughout |
diff --git a/skills/default/implement-task/review.md b/skills/default/implement-task/review.md
new file mode 100644
index 00000000..04a537e6
--- /dev/null
+++ b/skills/default/implement-task/review.md
@@ -0,0 +1,96 @@
+---
+max_retries: 3
+---
+
+# Implementation Review
+
+Review the code changes for quality, completeness, and adherence to project standards. Run `git diff origin/main...HEAD` to see all changes on this branch.
+
+## Review Criteria
+
+Evaluate the diff against each criterion. Flag violations; do not flag items that pass.
+
+### 1. Test Coverage
+
+New public functions, methods, or exported symbols must have corresponding tests.
+
+- Public function added without test? Flag it.
+- Existing test file updated to cover new behavior? Passes.
+- Internal/private helpers without direct tests are acceptable if exercised by public API tests.
+
+### 2. Error Handling
+
+Errors must be handled explicitly with specific exception types or error values.
+
+- Bare `except:` (Python), empty `catch {}` (JS/TS), or `_ =>` discarding errors (Rust)? Flag it.
+- Generic catches that log and re-raise, or catch-all with explicit handling? Acceptable.
+- Go: Errors must be checked, not discarded with `_`.
+
+### 3. Documentation
+
+Public APIs must have documentation.
+
+- Public function/method without docstring, JSDoc, GoDoc, or rustdoc? Flag it.
+- Internal/private symbols without docs? Acceptable.
+- Complex logic without inline comments explaining the "why"? Consider flagging.
+
+### 4. No Debug Code
+
+Committed code must not contain debug artifacts.
+
+- `print()`, `console.log()`, `fmt.Println()` for debugging (not intentional logging)? Flag it.
+- `TODO`, `FIXME`, `XXX`, `HACK` comments in new code? Flag it.
+- Commented-out code blocks? Flag it.
+- Intentional logging via proper logger frameworks? Acceptable.
+
+## Verdict
+
+After reviewing all criteria, output exactly one of these markers:
+
+```
+APPROVED
+```
+
+Use when all criteria pass or only minor issues exist that don't block merge.
+
+```
+REJECTED
+```
+
+Use when any criterion fails with blocking issues.
+
+## Feedback Format
+
+On rejection, provide structured feedback for each violation:
+
+```
+REJECTED
+
+Issues:
+- [Criterion: Test Coverage] file.py:42 — `process_data()` is public but has no test
+- [Criterion: Error Handling] handler.go:87 — error from `db.Query()` discarded with `_`
+- [Criterion: No Debug Code] utils.ts:15 — contains `console.log()` debug statement
+- [Criterion: Documentation] api.rs:23 — public function `calculate_score` lacks rustdoc
+
+Required changes:
+1. Add test for `process_data()` in `test_file.py`
+2. Handle or propagate error from `db.Query()` at line 87
+3. Remove `console.log()` or replace with proper logger
+4. Add documentation comment for `calculate_score`
+```
+
+On approval with optional suggestions:
+
+```
+APPROVED
+
+Notes:
+- Consider adding edge case tests for `validate_input()` (non-blocking)
+```
+
+## Important
+
+- Be specific: include file path and line number for every issue
+- Be actionable: state exactly what needs to change
+- Be fair: don't flag acceptable patterns listed above
+- Stay language-agnostic: apply criteria to any stack (Python, Go, Node.js, Rust, etc.)
diff --git a/skills/default/local-code-review/review.md b/skills/default/local-code-review/review.md
new file mode 100644
index 00000000..3e7c1bbf
--- /dev/null
+++ b/skills/default/local-code-review/review.md
@@ -0,0 +1,131 @@
+---
+max_retries: 2
+---
+
+# Local Code Review Quality Gate
+
+This review runs after the local-code-review skill completes. It validates the changes are ready for PR creation by checking for breaking changes, test failures, security issues, and spec alignment.
+
+## Before Reviewing
+
+Run the project's test suite to verify all tests pass locally.
+
+### Detecting the Test Command
+
+Check for a test command in the following order:
+
+1. **pyproject.toml** — look for `[tool.pytest]` or `[project.scripts]` with test entry
+   - Default: `uv run pytest` or `pytest`
+2. **package.json** — look for `scripts.test`
+   - Default: `npm test`
+3. **Makefile** — look for `test:` target
+   - Default: `make test`
+4. **Cargo.toml** — Rust project
+   - Default: `cargo test`
+5. **go.mod** — Go project
+   - Default: `go test ./...`
+
+Run the detected test command. If tests fail, this review must be `REJECTED`.
+
+## Review Criteria
+
+Evaluate the diff (`git diff origin/main...HEAD`) against each criterion. Flag violations; do not flag items that pass.
+
+### 1. Breaking Changes
+
+API or interface changes must not break existing consumers without a migration path.
+
+- Public function signature changed (parameters added/removed, types changed) without deprecation or migration? Flag it.
+- Public class/struct fields removed or renamed without migration? Flag it.
+- Exported constant or enum value removed? Flag it.
+- Internal/private changes that don't affect the public API? Acceptable.
+- Changes with documented migration in commit message or changelog? Acceptable.
+
+### 2. Test Failures
+
+All tests must pass locally before PR creation.
+
+- Test suite executed and any test failed? Flag it with the failing test name(s).
+- Test suite not executed (skipped or couldn't run)? Flag it — tests must run.
+- All tests pass? Acceptable.
+
+### 3. Security Issues
+
+Code must not introduce security vulnerabilities.
+
+- **Hardcoded secrets**: API keys, passwords, tokens, or credentials in source code? Flag it.
+- **SQL injection**: String concatenation or f-strings in SQL queries instead of parameterized queries? Flag it.
+- **Path traversal**: User input used directly in file paths without validation or sanitization? Flag it.
+- **Shell injection**: User input passed to shell commands without proper escaping? Flag it.
+- Environment variables, config files, or secret managers for credentials? Acceptable.
+- Parameterized queries or ORM methods? Acceptable.
+- Path validation with allowlists or canonicalization? Acceptable.
+
+### 4. Spec Alignment
+
+Changes must match the task description and acceptance criteria.
+
+- Core requirement from the task description clearly not implemented? Flag it.
+- Acceptance criterion explicitly inverted (does the opposite of what's specified)? Flag it.
+- Implementation approach differs but satisfies the requirement? Acceptable.
+- Additional functionality beyond spec (if not breaking anything)? Acceptable.
+
+## Verdict
+
+After reviewing all criteria, output exactly one of these markers:
+
+```
+APPROVED
+```
+
+Use when all criteria pass:
+- No breaking changes without migration
+- All tests pass locally
+- No security vulnerabilities detected
+- Implementation aligns with task description
+
+```
+REJECTED
+```
+
+Use when any criterion fails.
+
+## Feedback Format
+
+On rejection, provide structured feedback for each violation:
+
+```
+REJECTED
+
+Issues:
+- [Criterion: Test Failures] `test_user_auth` failed with AssertionError
+- [Criterion: Breaking Changes] api.py:42 — `get_user()` parameter `user_id` changed to `id` without deprecation
+- [Criterion: Security] db.py:87 — SQL query uses f-string: `f"SELECT * FROM users WHERE id = {user_id}"`
+- [Criterion: Spec Alignment] handler.py:15 — task requires validation of email format but no validation implemented
+
+Required changes:
+1. Fix failing test `test_user_auth` or update test to match new behavior
+2. Add `user_id` as deprecated alias for `id` parameter or document breaking change
+3. Use parameterized query: `cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,))`
+4. Add email format validation using regex or email-validator library
+```
+
+On approval:
+
+```
+APPROVED
+
+All criteria pass:
+- No breaking API changes detected
+- All 47 tests pass
+- No security issues found
+- Implementation matches task requirements
+```
+
+## Important
+
+- Run tests first — this is a hard requirement, not optional
+- Be specific: include file path and line number for every issue
+- Be actionable: state exactly what needs to change
+- Security issues are always blocking — never approve code with hardcoded secrets or injection vulnerabilities
+- This is a pre-PR gate — catch issues before they reach CI or human reviewers
diff --git a/src/forge/api/routes/metrics.py b/src/forge/api/routes/metrics.py
index 42c49656..9776b2be 100644
--- a/src/forge/api/routes/metrics.py
+++ b/src/forge/api/routes/metrics.py
@@ -122,6 +122,26 @@
     ["service", "operation", "error_type"],
 )
 
+# Review cycle metrics
+REVIEW_CYCLES = Counter(
+    "forge_review_cycles_total",
+    "Total review cycles detected",
+    ["skill", "step"],
+)
+
+REVIEW_VERDICTS = Counter(
+    "forge_review_verdicts_total",
+    "Review verdicts by outcome",
+    ["skill", "step", "verdict"],  # verdict: approved, rejected
+)
+
+REVIEW_DURATION = Histogram(
+    "forge_review_duration_seconds",
+    "Review cycle duration",
+    ["skill", "step"],
+    buckets=[1, 5, 10, 30, 60, 120, 300, 600],  # Same as AGENT_DURATION
+)
+
 
 @router.get("/metrics")
 async def metrics() -> Response:
@@ -232,3 +252,35 @@ def record_external_api_error(service: str, operation: str, error_type: str) ->
         error_type: Type of error (timeout, rate_limit, auth, etc.).
     """
     EXTERNAL_API_ERRORS.labels(service=service, operation=operation, error_type=error_type).inc()
+
+
+def record_review_cycle(skill: str, step: str) -> None:
+    """Record a review cycle detected.
+
+    Args:
+        skill: Skill name (e.g., implement-task, fix-ci).
+        step: Workflow step name.
+    """
+    REVIEW_CYCLES.labels(skill=skill, step=step).inc()
+
+
+def record_review_verdict(skill: str, step: str, verdict: str) -> None:
+    """Record a review verdict.
+
+    Args:
+        skill: Skill name (e.g., implement-task, fix-ci).
+        step: Workflow step name.
+        verdict: Verdict outcome (approved, rejected).
+    """
+    REVIEW_VERDICTS.labels(skill=skill, step=step, verdict=verdict).inc()
+
+
+def observe_review_duration(skill: str, step: str, duration: float) -> None:
+    """Record review cycle duration.
+
+    Args:
+        skill: Skill name (e.g., implement-task, fix-ci).
+        step: Workflow step name.
+        duration: Duration in seconds.
+    """
+    REVIEW_DURATION.labels(skill=skill, step=step).observe(duration)
diff --git a/src/forge/config.py b/src/forge/config.py
index e1bc7db9..79c64f27 100644
--- a/src/forge/config.py
+++ b/src/forge/config.py
@@ -330,6 +330,21 @@ def ignored_ci_checks(self) -> list[str]:
         description="Container CPU limit",
     )
 
+    # Auto Review Configuration
+    auto_review_poll_interval: float = Field(
+        default=5.0,
+        description="Polling interval in seconds for detecting review cycle files during container execution",
+    )
+    auto_review_record_polled_files: Literal["log", "copy"] | None = Field(
+        default=None,
+        description=(
+            "Recording mode for polled review cycle files. "
+            "'log' logs cycle data at INFO level. "
+            "'copy' copies files to {recording_dir}/{step-name}/review_cycle_*.json. "
+            "None disables recording."
+        ),
+    )
+
     # Queue Consumer Configuration
     queue_max_concurrent_tasks: int = Field(
         default=20,
diff --git a/src/forge/observability/__init__.py b/src/forge/observability/__init__.py
index b6fedc0d..b7c1dff2 100644
--- a/src/forge/observability/__init__.py
+++ b/src/forge/observability/__init__.py
@@ -10,6 +10,10 @@
     get_correlation_id,
     set_correlation_id,
 )
+from forge.observability.review_notifier import NotifyResult, ReviewJiraNotifier
+from forge.observability.review_poller import ReviewCycleData, ReviewCyclePoller
+from forge.observability.review_recorder import ReviewCycleData as RecorderReviewCycleData
+from forge.observability.review_recorder import ReviewCycleRecorder
 
 __all__ = [
     "configure_tracing",
@@ -18,4 +22,10 @@
     "CorrelationContext",
     "get_correlation_id",
     "set_correlation_id",
+    "ReviewCycleData",
+    "ReviewCyclePoller",
+    "RecorderReviewCycleData",
+    "ReviewCycleRecorder",
+    "ReviewJiraNotifier",
+    "NotifyResult",
 ]
diff --git a/src/forge/observability/review_notifier.py b/src/forge/observability/review_notifier.py
new file mode 100644
index 00000000..f790304d
--- /dev/null
+++ b/src/forge/observability/review_notifier.py
@@ -0,0 +1,191 @@
+"""Jira notifier for review cycle events.
+
+This module provides the ReviewJiraNotifier class for posting Jira comments
+when review cycles are detected during container execution.
+
+The notifier includes rate limiting to prevent comment spam and handles
+Jira API errors gracefully by logging and continuing without failing.
+
+Usage:
+    notifier = ReviewJiraNotifier(
+        jira_client=jira_client,
+        ticket_key="PROJ-123",
+    )
+
+    # Post a comment for a review cycle
+    await notifier.notify(cycle_data)
+
+    # With custom rate limit interval
+    notifier = ReviewJiraNotifier(
+        jira_client=jira_client,
+        ticket_key="PROJ-123",
+        rate_limit_seconds=60.0,
+    )
+"""
+
+import logging
+import time
+from dataclasses import dataclass
+
+from forge.integrations.jira import JiraClient
+from forge.observability.review_poller import ReviewCycleData
+
+logger = logging.getLogger(__name__)
+
+# Default rate limit interval between comments (seconds)
+DEFAULT_RATE_LIMIT_SECONDS = 30.0
+
+# Maximum length for feedback in comments before truncation
+MAX_FEEDBACK_LENGTH = 500
+
+
+@dataclass
+class NotifyResult:
+    """Result of a notify attempt.
+
+    Attributes:
+        posted: Whether the comment was successfully posted.
+        rate_limited: Whether the attempt was skipped due to rate limiting.
+        error: Error message if posting failed, None otherwise.
+    """
+
+    posted: bool
+    rate_limited: bool
+    error: str | None = None
+
+
+class ReviewJiraNotifier:
+    """Posts Jira comments for review cycle events with rate limiting.
+
+    This class handles posting review cycle information as Jira comments,
+    including cycle number, verdict, feedback summary, skill name, and duration.
+
+    Rate limiting prevents comment spam by enforcing a configurable interval
+    between comments. Jira API errors are logged but don't cause failures.
+
+    Attributes:
+        jira_client: The JiraClient instance for API calls.
+        ticket_key: The Jira ticket key to post comments to.
+        rate_limit_seconds: Minimum seconds between comments.
+    """
+
+    def __init__(
+        self,
+        jira_client: JiraClient,
+        ticket_key: str,
+        rate_limit_seconds: float = DEFAULT_RATE_LIMIT_SECONDS,
+    ):
+        """Initialize the review notifier.
+
+        Args:
+            jira_client: JiraClient instance for posting comments.
+            ticket_key: The Jira ticket key (e.g., "PROJ-123") for comments.
+            rate_limit_seconds: Minimum seconds between comments. Default 30s.
+        """
+        self.jira_client = jira_client
+        self.ticket_key = ticket_key
+        self.rate_limit_seconds = rate_limit_seconds
+        self._last_notify_time: float | None = None
+
+    def _is_rate_limited(self) -> bool:
+        """Check if we should skip due to rate limiting.
+
+        Returns:
+            True if rate limited (should skip), False if can proceed.
+        """
+        if self._last_notify_time is None:
+            return False
+
+        elapsed = time.monotonic() - self._last_notify_time
+        return elapsed < self.rate_limit_seconds
+
+    def _format_comment(self, cycle_data: ReviewCycleData) -> str:
+        """Format review cycle data as a readable Jira comment.
+
+        Args:
+            cycle_data: The review cycle data to format.
+
+        Returns:
+            Formatted comment string.
+        """
+        # Build the verdict display
+        verdict_upper = cycle_data.verdict.upper()
+        verdict_icon = "✅" if verdict_upper == "APPROVED" else "❌"
+
+        # Format duration
+        elapsed = cycle_data.elapsed_seconds
+        duration_str = f"{elapsed / 60:.1f}m" if elapsed >= 60 else f"{elapsed:.1f}s"
+
+        # Build header
+        lines = [
+            f"*Review Cycle {cycle_data.cycle}/{cycle_data.max_cycles}* — {verdict_icon} *{verdict_upper}*",
+            "",
+        ]
+
+        # Add skill and duration
+        if cycle_data.skill:
+            lines.append(f"*Skill:* {cycle_data.skill}")
+        lines.append(f"*Duration:* {duration_str}")
+
+        # Add feedback summary (truncated if too long)
+        feedback = cycle_data.feedback.strip()
+        if feedback:
+            lines.append("")
+            lines.append("*Feedback:*")
+
+            # Truncate if too long
+            if len(feedback) > MAX_FEEDBACK_LENGTH:
+                truncated = feedback[:MAX_FEEDBACK_LENGTH].rsplit(" ", 1)[0]
+                if len(truncated) < MAX_FEEDBACK_LENGTH // 2:
+                    # No good word boundary, just cut
+                    truncated = feedback[:MAX_FEEDBACK_LENGTH]
+                lines.append(f"{truncated}...")
+            else:
+                lines.append(feedback)
+
+        return "\n".join(lines)
+
+    async def notify(self, cycle_data: ReviewCycleData) -> NotifyResult:
+        """Post a Jira comment for a review cycle.
+
+        Handles rate limiting and API errors gracefully. If rate limited,
+        returns without posting. If API call fails, logs the error and
+        returns without raising.
+
+        Args:
+            cycle_data: The review cycle data to post.
+
+        Returns:
+            NotifyResult indicating success, rate limiting, or error.
+        """
+        # Check rate limiting
+        if self._is_rate_limited():
+            remaining = self.rate_limit_seconds - (time.monotonic() - (self._last_notify_time or 0))
+            logger.debug(
+                f"Rate limited: skipping Jira comment for {self.ticket_key} (wait {remaining:.1f}s)"
+            )
+            return NotifyResult(posted=False, rate_limited=True)
+
+        # Format the comment
+        comment_body = self._format_comment(cycle_data)
+
+        # Post to Jira with error handling
+        try:
+            await self.jira_client.add_comment(self.ticket_key, comment_body)
+            self._last_notify_time = time.monotonic()
+            logger.info(
+                f"Posted review cycle {cycle_data.cycle}/{cycle_data.max_cycles} "
+                f"comment to {self.ticket_key}"
+            )
+            return NotifyResult(posted=True, rate_limited=False)
+        except Exception as e:
+            # Log and continue - don't fail the container
+            logger.warning(f"Failed to post review comment to {self.ticket_key}: {e}")
+            return NotifyResult(posted=False, rate_limited=False, error=str(e))
+
+    def reset_rate_limit(self) -> None:
+        """Reset the rate limit timer.
+
+        Call this to allow immediate posting after a long pause.
+        """
+        self._last_notify_time = None
diff --git a/src/forge/observability/review_poller.py b/src/forge/observability/review_poller.py
new file mode 100644
index 00000000..3ed01a44
--- /dev/null
+++ b/src/forge/observability/review_poller.py
@@ -0,0 +1,293 @@
+"""Review cycle poller for observability during container execution.
+
+This module provides the ReviewCyclePoller class that implements async polling
+for review cycle files written by container agents during task execution.
+
+The poller detects files at the step-specific path:
+    .forge/{step-name}/review_cycle_*.json
+
+where step-name (e.g., "implement_task", "generate_prd") is passed when
+creating the poller instance.
+"""
+
+import asyncio
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+import aiofiles
+
+from forge.config import Settings, get_settings
+
+logger = logging.getLogger(__name__)
+
+# Maximum retries for JSON parsing (race condition with container writes)
+MAX_JSON_PARSE_RETRIES = 3
+# Delay between JSON parse retries in seconds
+JSON_PARSE_RETRY_DELAY = 0.5
+
+
+@dataclass
+class ReviewCycleData:
+    """Data captured for a single review cycle iteration.
+
+    This mirrors the ReviewCycleData from containers.review for use in
+    the orchestrator-side polling.
+
+    Attributes:
+        cycle: Current cycle number (1-indexed).
+        max_cycles: Maximum cycles allowed.
+        verdict: Review outcome ("approved" or "rejected").
+        feedback: Reviewer feedback text.
+        skill: Name of the skill that performed the review.
+        elapsed_seconds: Time taken for this review cycle.
+        timestamp: ISO 8601 UTC timestamp of cycle completion.
+        file_path: Path to the source JSON file (not serialized).
+    """
+
+    cycle: int
+    max_cycles: int
+    verdict: str
+    feedback: str
+    skill: str
+    elapsed_seconds: float
+    timestamp: str
+    file_path: str = ""
+
+    @classmethod
+    def from_dict(cls, data: dict, file_path: str = "") -> "ReviewCycleData":
+        """Create ReviewCycleData from a dictionary.
+
+        Args:
+            data: Dictionary containing review cycle fields.
+            file_path: Path to the source file for tracking.
+
+        Returns:
+            ReviewCycleData instance.
+        """
+        return cls(
+            cycle=data["cycle"],
+            max_cycles=data["max_cycles"],
+            verdict=data["verdict"],
+            feedback=data.get("feedback", ""),
+            skill=data.get("skill", ""),
+            elapsed_seconds=data.get("elapsed_seconds", 0.0),
+            timestamp=data.get("timestamp", ""),
+            file_path=file_path,
+        )
+
+
+class ReviewCyclePoller:
+    """Async poller for review cycle files during container execution.
+
+    This class polls for review_cycle_*.json files in the step-specific
+    directory and returns newly detected ReviewCycleData objects.
+
+    Usage:
+        poller = ReviewCyclePoller(
+            workspace_path=Path("/workspace"),
+            step_name="implement_task",
+        )
+
+        # Start polling in background
+        async for new_cycles in poller.poll():
+            for cycle in new_cycles:
+                print(f"Review cycle {cycle.cycle}: {cycle.verdict}")
+
+        # Or poll once
+        new_cycles = await poller.poll_once()
+    """
+
+    def __init__(
+        self,
+        workspace_path: Path,
+        step_name: str,
+        settings: Settings | None = None,
+    ):
+        """Initialize the review cycle poller.
+
+        Args:
+            workspace_path: Path to the workspace root (where .forge/ is located).
+            step_name: Name of the step (e.g., "implement_task") for path detection.
+            settings: Application settings. Uses default if not provided.
+        """
+        self.workspace_path = Path(workspace_path)
+        self.step_name = step_name
+        self._settings = settings
+        self._processed_files: set[str] = set()
+        self._running = False
+
+    @property
+    def settings(self) -> Settings:
+        """Get settings, lazily loading if not provided."""
+        if self._settings is None:
+            self._settings = get_settings()
+        return self._settings
+
+    @property
+    def poll_interval(self) -> float:
+        """Get the polling interval from settings."""
+        return self.settings.auto_review_poll_interval
+
+    @property
+    def review_cycle_dir(self) -> Path:
+        """Get the directory path for review cycle files."""
+        return self.workspace_path / ".forge" / self.step_name
+
+    def _get_review_cycle_files(self) -> list[Path]:
+        """Get list of review_cycle_*.json files in the step directory.
+
+        Returns:
+            List of Path objects for matching files.
+        """
+        cycle_dir = self.review_cycle_dir
+        if not cycle_dir.exists():
+            return []
+
+        return sorted(cycle_dir.glob("review_cycle_*.json"))
+
+    async def _parse_json_with_retry(self, file_path: Path) -> dict | None:
+        """Parse JSON file with retry for incomplete reads.
+
+        This handles race conditions where the container may still be
+        writing the file when we try to read it.
+
+        Args:
+            file_path: Path to the JSON file.
+
+        Returns:
+            Parsed JSON dict, or None if parsing fails after retries.
+        """
+        for attempt in range(MAX_JSON_PARSE_RETRIES):
+            try:
+                async with aiofiles.open(file_path, encoding="utf-8") as f:
+                    content = await f.read()
+
+                if not content.strip():
+                    # Empty file, likely still being written
+                    if attempt < MAX_JSON_PARSE_RETRIES - 1:
+                        logger.debug(
+                            "Empty file %s, retrying (%d/%d)",
+                            file_path,
+                            attempt + 1,
+                            MAX_JSON_PARSE_RETRIES,
+                        )
+                        await asyncio.sleep(JSON_PARSE_RETRY_DELAY)
+                        continue
+                    return None
+
+                return json.loads(content)
+
+            except json.JSONDecodeError as e:
+                if attempt < MAX_JSON_PARSE_RETRIES - 1:
+                    logger.debug(
+                        "JSON parse error for %s: %s, retrying (%d/%d)",
+                        file_path,
+                        e,
+                        attempt + 1,
+                        MAX_JSON_PARSE_RETRIES,
+                    )
+                    await asyncio.sleep(JSON_PARSE_RETRY_DELAY)
+                else:
+                    logger.warning(
+                        "Failed to parse %s after %d attempts: %s",
+                        file_path,
+                        MAX_JSON_PARSE_RETRIES,
+                        e,
+                    )
+                    return None
+
+            except OSError as e:
+                logger.warning("Error reading %s: %s", file_path, e)
+                return None
+
+        return None
+
+    async def poll_once(self) -> list[ReviewCycleData]:
+        """Poll for new review cycle files once.
+
+        Returns:
+            List of newly detected ReviewCycleData objects.
+        """
+        new_cycles: list[ReviewCycleData] = []
+        files = self._get_review_cycle_files()
+
+        for file_path in files:
+            file_key = str(file_path)
+
+            if file_key in self._processed_files:
+                continue
+
+            data = await self._parse_json_with_retry(file_path)
+            if data is None:
+                continue
+
+            try:
+                cycle_data = ReviewCycleData.from_dict(data, file_path=file_key)
+                new_cycles.append(cycle_data)
+                self._processed_files.add(file_key)
+                logger.debug(
+                    "Detected review cycle %d for step %s: %s",
+                    cycle_data.cycle,
+                    self.step_name,
+                    cycle_data.verdict,
+                )
+            except (KeyError, TypeError) as e:
+                logger.warning("Invalid review cycle data in %s: %s", file_path, e)
+
+        return new_cycles
+
+    def poll(self) -> "ReviewCyclePoller":
+        """Start the polling loop as an async iterator.
+
+        Yields:
+            List of newly detected ReviewCycleData objects on each poll.
+
+        Usage:
+            async for new_cycles in poller.poll():
+                for cycle in new_cycles:
+                    process(cycle)
+        """
+        self._running = True
+        return self
+
+    def stop(self) -> None:
+        """Stop the polling loop."""
+        self._running = False
+
+    def __aiter__(self) -> "ReviewCyclePoller":
+        """Return self as async iterator."""
+        return self
+
+    async def __anext__(self) -> list[ReviewCycleData]:
+        """Get next batch of new review cycles.
+
+        Returns:
+            List of newly detected ReviewCycleData objects.
+
+        Raises:
+            StopAsyncIteration: When polling is stopped.
+        """
+        if not self._running:
+            raise StopAsyncIteration
+
+        # Wait for the poll interval before checking
+        await asyncio.sleep(self.poll_interval)
+
+        if not self._running:
+            raise StopAsyncIteration
+
+        return await self.poll_once()
+
+    def reset(self) -> None:
+        """Reset the set of processed files.
+
+        This allows re-detecting previously processed files.
+        """
+        self._processed_files.clear()
+
+    @property
+    def processed_count(self) -> int:
+        """Get the number of processed files."""
+        return len(self._processed_files)
diff --git a/src/forge/observability/review_recorder.py b/src/forge/observability/review_recorder.py
new file mode 100644
index 00000000..961b628f
--- /dev/null
+++ b/src/forge/observability/review_recorder.py
@@ -0,0 +1,206 @@
+"""Review cycle recorder for observability of review cycle data.
+
+This module provides the ReviewCycleRecorder class that records review cycle
+data either by logging or by copying files to a recording directory.
+
+Modes:
+    - mode="log": Log cycle data at INFO level via logging
+    - mode="copy": Copy files to {recording_dir}/{step-name}/review_cycle_*.json
+    - mode=None: No recording (disabled)
+
+The step name is passed to the recorder constructor and used to organize
+recorded files into step-specific subdirectories.
+"""
+
+import json
+import logging
+import shutil
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Literal
+
+logger = logging.getLogger(__name__)
+
+# Type alias for recording modes
+RecordingMode = Literal["log", "copy"] | None
+
+
+@dataclass
+class ReviewCycleData:
+    """Data captured for a single review cycle iteration.
+
+    This dataclass stores all fields specified in the Epic spec for
+    review cycle recording and observability.
+
+    Attributes:
+        cycle: Current cycle number (1-indexed).
+        max_cycles: Maximum cycles allowed.
+        verdict: Review outcome ("approved" or "rejected").
+        feedback: Reviewer feedback text.
+        skill: Name of the skill that performed the review.
+        elapsed_seconds: Time taken for this review cycle.
+        timestamp: Datetime of cycle completion.
+    """
+
+    cycle: int
+    max_cycles: int
+    verdict: str
+    feedback: str
+    skill: str
+    elapsed_seconds: float
+    timestamp: datetime
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary with ISO 8601 formatted timestamp.
+
+        Returns:
+            Dictionary representation suitable for JSON serialization.
+        """
+        data = asdict(self)
+        # Convert datetime to ISO 8601 string
+        data["timestamp"] = self.timestamp.isoformat()
+        return data
+
+    def to_json(self) -> str:
+        """Convert to JSON string with pretty printing.
+
+        Returns:
+            JSON string representation.
+        """
+        return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
+
+
+class ReviewCycleRecorder:
+    """Records review cycle data based on configured mode.
+
+    This class handles recording of review cycle data either by logging
+    to stdout/file or by copying files to a recording directory.
+
+    Usage:
+        recorder = ReviewCycleRecorder(
+            step_name="implement_task",
+            mode="log",
+        )
+        recorder.record(cycle_data)
+
+        # Or for copy mode
+        recorder = ReviewCycleRecorder(
+            step_name="implement_task",
+            mode="copy",
+            recording_dir=Path("/recordings"),
+        )
+        recorder.record_file(Path("/workspace/.forge/step/review_cycle_1.json"))
+    """
+
+    def __init__(
+        self,
+        step_name: str,
+        mode: RecordingMode = None,
+        recording_dir: Path | None = None,
+    ):
+        """Initialize the review cycle recorder.
+
+        Args:
+            step_name: Name of the step (e.g., "implement_task") for file organization.
+            mode: Recording mode - "log", "copy", or None (disabled).
+            recording_dir: Base directory for copying files (required for copy mode).
+
+        Raises:
+            ValueError: If mode is "copy" but recording_dir is not provided.
+        """
+        self.step_name = step_name
+        self.mode = mode
+        self.recording_dir = Path(recording_dir) if recording_dir else None
+
+        if mode == "copy" and not self.recording_dir:
+            raise ValueError("recording_dir is required when mode='copy'")
+
+    @property
+    def step_dir(self) -> Path | None:
+        """Get the step-specific directory for recorded files.
+
+        Returns:
+            Path to {recording_dir}/{step-name}/ or None if no recording_dir.
+        """
+        if self.recording_dir is None:
+            return None
+        return self.recording_dir / self.step_name
+
+    def record(self, cycle_data: ReviewCycleData) -> None:
+        """Record review cycle data based on configured mode.
+
+        For log mode, logs the data at INFO level with structured format.
+        For copy mode, this method does nothing (use record_file for files).
+        For disabled mode (None), this method does nothing.
+
+        Args:
+            cycle_data: The review cycle data to record.
+        """
+        if self.mode is None:
+            return
+
+        if self.mode == "log":
+            self._log_cycle_data(cycle_data)
+        # Copy mode is handled by record_file
+
+    def record_file(self, source_file: Path) -> Path | None:
+        """Record a review cycle file by copying it to the recording directory.
+
+        Only works in copy mode. Creates the step-specific subdirectory if needed.
+
+        Args:
+            source_file: Path to the source review_cycle_*.json file.
+
+        Returns:
+            Path to the copied file, or None if mode is not "copy" or copy failed.
+        """
+        if self.mode != "copy":
+            return None
+
+        if not self.recording_dir:
+            return None
+
+        if not source_file.exists():
+            logger.warning("Source file does not exist: %s", source_file)
+            return None
+
+        # Create step-specific subdirectory
+        step_dir = self.step_dir
+        if step_dir is None:
+            return None
+
+        try:
+            step_dir.mkdir(parents=True, exist_ok=True)
+        except OSError as e:
+            logger.error("Failed to create directory %s: %s", step_dir, e)
+            return None
+
+        # Copy file preserving the filename
+        dest_file = step_dir / source_file.name
+        try:
+            shutil.copy2(source_file, dest_file)
+            logger.debug("Copied review cycle file to %s", dest_file)
+            return dest_file
+        except OSError as e:
+            logger.error("Failed to copy file to %s: %s", dest_file, e)
+            return None
+
+    def _log_cycle_data(self, cycle_data: ReviewCycleData) -> None:
+        """Log review cycle data at INFO level with structured format.
+
+        Args:
+            cycle_data: The review cycle data to log.
+        """
+        logger.info(
+            "Review cycle %d/%d for %s: verdict=%s skill=%s elapsed=%.2fs feedback=%r",
+            cycle_data.cycle,
+            cycle_data.max_cycles,
+            self.step_name,
+            cycle_data.verdict,
+            cycle_data.skill,
+            cycle_data.elapsed_seconds,
+            cycle_data.feedback[:100] + "..."
+            if len(cycle_data.feedback) > 100
+            else cycle_data.feedback,
+        )
diff --git a/src/forge/sandbox/runner.py b/src/forge/sandbox/runner.py
index a422ef91..50c06c42 100644
--- a/src/forge/sandbox/runner.py
+++ b/src/forge/sandbox/runner.py
@@ -13,20 +13,64 @@
 """
 
 import asyncio
+import contextlib
 import json
 import logging
 import os
 import shutil
 from dataclasses import dataclass, field
+from datetime import datetime
 from pathlib import Path
 from typing import Any
 
+from forge.api.routes.metrics import (
+    observe_review_duration,
+    record_review_cycle,
+    record_review_verdict,
+)
 from forge.config import Settings, get_settings
+from forge.observability import (
+    RecorderReviewCycleData,
+    ReviewCycleData,
+    ReviewCyclePoller,
+    ReviewCycleRecorder,
+)
 from forge.prompts import load_prompt
 from forge.skills.resolver import resolve_skill_paths
 
 logger = logging.getLogger(__name__)
 
+
+def _poller_to_recorder_cycle(
+    poller_cycle: ReviewCycleData, _step_name: str
+) -> RecorderReviewCycleData:
+    """Convert poller's ReviewCycleData to recorder's format.
+
+    Args:
+        poller_cycle: ReviewCycleData from the poller (has string timestamp).
+        step_name: The workflow step name for context.
+
+    Returns:
+        RecorderReviewCycleData suitable for the recorder (has datetime timestamp).
+    """
+    # Parse ISO 8601 timestamp string to datetime
+    try:
+        timestamp = datetime.fromisoformat(poller_cycle.timestamp.replace("Z", "+00:00"))
+    except (ValueError, AttributeError):
+        # Fallback to now if parsing fails
+        timestamp = datetime.now()
+
+    return RecorderReviewCycleData(
+        cycle=poller_cycle.cycle,
+        max_cycles=poller_cycle.max_cycles,
+        verdict=poller_cycle.verdict,
+        feedback=poller_cycle.feedback,
+        skill=poller_cycle.skill,
+        elapsed_seconds=poller_cycle.elapsed_seconds,
+        timestamp=timestamp,
+    )
+
+
 # Default container image (can be overridden via CONTAINER_IMAGE env var)
 # Use localhost/ prefix to avoid podman short-name resolution prompts
 DEFAULT_IMAGE = "localhost/forge-dev:latest"
@@ -48,6 +92,7 @@ class ContainerResult:
     stderr: str
     tests_passed: bool | None = None  # None if tests were skipped
     error_message: str | None = None
+    review_cycles: list[ReviewCycleData] = field(default_factory=list)
 
     @property
     def tests_failed(self) -> bool:
@@ -366,6 +411,136 @@ async def _stop_timed_out_container(
             process.kill()
             await process.wait()
 
+    def _sweep_review_cycles(
+        self,
+        workspace_path: Path,
+        step_name: str,
+        processed_files: set[str],
+        collected_cycles: list[ReviewCycleData],
+        recorder: ReviewCycleRecorder,
+    ) -> None:
+        """Synchronous post-execution sweep for missed review cycle files.
+
+        This method scans for any review_cycle_*.json files that may have been
+        missed during async polling, especially if the container exits quickly
+        after writing.
+
+        Args:
+            workspace_path: Path to the workspace root.
+            step_name: Name of the step for path organization.
+            processed_files: Set of file paths already processed by the poller.
+            collected_cycles: List to append newly found cycles to.
+            recorder: Recorder for logging/copying detected cycles.
+        """
+        cycle_dir = workspace_path / ".forge" / step_name
+        if not cycle_dir.exists():
+            return
+
+        # Find all review cycle files
+        all_files = sorted(cycle_dir.glob("review_cycle_*.json"))
+
+        missed_count = 0
+        for file_path in all_files:
+            file_key = str(file_path)
+
+            # Skip files already processed by the async poller
+            if file_key in processed_files:
+                continue
+
+            # This file was missed during polling - parse and collect it
+            try:
+                content = file_path.read_text(encoding="utf-8")
+                if not content.strip():
+                    logger.warning("Empty review cycle file during sweep: %s", file_path)
+                    continue
+
+                data = json.loads(content)
+                cycle_data = ReviewCycleData.from_dict(data, file_path=file_key)
+                collected_cycles.append(cycle_data)
+                missed_count += 1
+
+                # Record via recorder
+                recorder.record(_poller_to_recorder_cycle(cycle_data, step_name))
+                recorder.record_file(file_path)
+
+                # Emit Prometheus metrics
+                record_review_cycle(cycle_data.skill, step_name)
+                record_review_verdict(cycle_data.skill, step_name, cycle_data.verdict)
+                observe_review_duration(cycle_data.skill, step_name, cycle_data.elapsed_seconds)
+
+                logger.debug(
+                    "Sweep caught review cycle %d/%d for %s: %s",
+                    cycle_data.cycle,
+                    cycle_data.max_cycles,
+                    step_name,
+                    cycle_data.verdict,
+                )
+
+            except json.JSONDecodeError as e:
+                logger.warning("Failed to parse review cycle file %s: %s", file_path, e)
+            except (KeyError, TypeError) as e:
+                logger.warning("Invalid review cycle data in %s: %s", file_path, e)
+            except OSError as e:
+                logger.warning("Error reading review cycle file %s: %s", file_path, e)
+
+        if missed_count > 0:
+            logger.warning(
+                "Sweep caught %d review cycle file(s) missed during async polling for step %s",
+                missed_count,
+                step_name,
+            )
+
+    async def _poll_review_cycles(
+        self,
+        poller: ReviewCyclePoller,
+        recorder: ReviewCycleRecorder,
+        collected_cycles: list[ReviewCycleData],
+    ) -> None:
+        """Background task to poll for review cycle files during container execution.
+
+        This task polls the workspace for review_cycle_*.json files and:
+        - Collects detected ReviewCycleData into the provided list
+        - Records cycles via the recorder (log or copy mode)
+        - Emits Prometheus metrics for observability
+
+        Args:
+            poller: The ReviewCyclePoller instance to use for polling.
+            recorder: The ReviewCycleRecorder for recording cycles.
+            collected_cycles: List to aggregate detected cycles into.
+        """
+        try:
+            async for new_cycles in poller.poll():
+                for cycle in new_cycles:
+                    collected_cycles.append(cycle)
+
+                    # Record cycle via recorder (handles log/copy modes)
+                    recorder.record(
+                        # Convert poller's ReviewCycleData to recorder's format
+                        # (using to_dict style conversion)
+                        _poller_to_recorder_cycle(cycle, poller.step_name)
+                    )
+
+                    # Copy file if in copy mode
+                    if cycle.file_path:
+                        recorder.record_file(Path(cycle.file_path))
+
+                    # Emit Prometheus metrics
+                    record_review_cycle(cycle.skill, poller.step_name)
+                    record_review_verdict(cycle.skill, poller.step_name, cycle.verdict)
+                    observe_review_duration(cycle.skill, poller.step_name, cycle.elapsed_seconds)
+
+                    logger.debug(
+                        "Review cycle %d/%d for %s: %s",
+                        cycle.cycle,
+                        cycle.max_cycles,
+                        poller.step_name,
+                        cycle.verdict,
+                    )
+        except asyncio.CancelledError:
+            # Clean exit on cancellation
+            logger.debug("Review polling task cancelled")
+            raise
+
     async def run(
         self,
         workspace_path: Path,
@@ -377,6 +552,7 @@ async def run(
         repo_name: str | None = None,
         previous_task_keys: list[str] | None = None,
         trace_context: dict[str, Any] | None = None,
+        step_name: str | None = None,
     ) -> ContainerResult:
         """Run a task in a container sandbox.
 
@@ -390,9 +566,12 @@ async def run(
             repo_name: Repository name (e.g., "owner/repo") for container naming.
             previous_task_keys: List of previously implemented task keys for handoff context.
             trace_context: Workflow fields forwarded to Langfuse only.
+            step_name: Workflow step name (e.g., "implement_task", "local_review")
+                for organizing review cycle files under .forge/{step-name}/.
+                If not provided, review polling is disabled.
 
         Returns:
-            ContainerResult with execution status and logs.
+            ContainerResult with execution status, logs, and review_cycles.
         """
         config = config or self._default_config()
 
@@ -409,6 +588,10 @@ async def run(
         }
         task_file.write_text(json.dumps(task_data, indent=2))
 
+        # List to collect review cycles detected during execution
+        collected_cycles: list[ReviewCycleData] = []
+        polling_task: asyncio.Task | None = None
+
         try:
             # Build container name and command
             container_name = self._build_container_name(ticket_key, repo_name)
@@ -426,6 +609,27 @@ async def run(
                 stderr=asyncio.subprocess.PIPE,
             )
 
+            # Start review polling background task if step_name is provided
+            if step_name:
+                poller = ReviewCyclePoller(
+                    workspace_path=workspace_path,
+                    step_name=step_name,
+                    settings=self.settings,
+                )
+                # Note: recording_dir is not currently configurable.
+                # For copy mode, the recorder needs a recording_dir, so we use
+                # log mode only unless copy mode is explicitly needed in the future.
+                record_mode = self.settings.auto_review_record_polled_files
+                recorder = ReviewCycleRecorder(
+                    step_name=step_name,
+                    mode=record_mode if record_mode != "copy" else "log",
+                    recording_dir=None,
+                )
+                polling_task = asyncio.create_task(
+                    self._poll_review_cycles(poller, recorder, collected_cycles)
+                )
+                logger.debug(f"Started review polling for step: {step_name}")
+
             try:
                 stdout, stderr = await asyncio.wait_for(
                     process.communicate(),
@@ -440,11 +644,46 @@ async def run(
                     stdout="",
                     stderr="Container execution timed out",
                     error_message="Timeout exceeded",
+                    review_cycles=collected_cycles,
                 )
             except asyncio.CancelledError:
                 logger.warning(f"Container execution cancelled, stopping {container_name}")
                 await self._stop_timed_out_container(container_name, process)
                 raise  # Re-raise CancelledError
+            finally:
+                # Stop and clean up polling task
+                if polling_task:
+                    # Stop the poller
+                    poller.stop()
+                    # Cancel the polling task
+                    polling_task.cancel()
+                    with contextlib.suppress(asyncio.CancelledError):
+                        await polling_task
+                    logger.debug("Review polling task stopped")
+
+                    # Do one final async poll to catch any remaining files
+                    final_cycles = await poller.poll_once()
+                    for cycle in final_cycles:
+                        collected_cycles.append(cycle)
+                        recorder.record(_poller_to_recorder_cycle(cycle, poller.step_name))
+                        if cycle.file_path:
+                            recorder.record_file(Path(cycle.file_path))
+                        record_review_cycle(cycle.skill, poller.step_name)
+                        record_review_verdict(cycle.skill, poller.step_name, cycle.verdict)
+                        observe_review_duration(
+                            cycle.skill, poller.step_name, cycle.elapsed_seconds
+                        )
+
+                    # Synchronous sweep for any files missed during async polling
+                    # This catches files written just before container exit that may
+                    # not have been detected by the async poller
+                    self._sweep_review_cycles(
+                        workspace_path=workspace_path,
+                        step_name=step_name,
+                        processed_files=poller._processed_files,
+                        collected_cycles=collected_cycles,
+                        recorder=recorder,
+                    )
 
             exit_code = process.returncode or 0
             stdout_str = stdout.decode("utf-8", errors="replace")
@@ -480,6 +719,7 @@ async def run(
                     stdout=stdout_str,
                     stderr=stderr_str,
                     tests_passed=True,
+                    review_cycles=collected_cycles,
                 )
             elif exit_code == EXIT_TESTS_FAILED:
                 return ContainerResult(
@@ -489,6 +729,7 @@ async def run(
                     stderr=stderr_str,
                     tests_passed=False,
                     error_message="Tests failed after max retries",
+                    review_cycles=collected_cycles,
                 )
             else:
                 return ContainerResult(
@@ -497,6 +738,7 @@ async def run(
                     stdout=stdout_str,
                     stderr=stderr_str,
                     error_message=f"Task failed with exit code {exit_code}",
+                    review_cycles=collected_cycles,
                 )
 
         finally:
diff --git a/src/forge/workflow/nodes/ci_evaluator.py b/src/forge/workflow/nodes/ci_evaluator.py
index d94d3161..a8a379e8 100644
--- a/src/forge/workflow/nodes/ci_evaluator.py
+++ b/src/forge/workflow/nodes/ci_evaluator.py
@@ -324,6 +324,7 @@ async def attempt_ci_fix(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-ci-analyze",
             repo_name=state.get("current_repo", ""),
+            step_name="analyze_ci",
         )
 
         if not fix_plan_file.exists():
@@ -352,6 +353,7 @@ async def attempt_ci_fix(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-ci-fix",
             repo_name=state.get("current_repo", ""),
+            step_name="fix_ci",
         )
 
         workspace = Workspace(
diff --git a/src/forge/workflow/nodes/code_review.py b/src/forge/workflow/nodes/code_review.py
index 95fb7692..ac73aee0 100644
--- a/src/forge/workflow/nodes/code_review.py
+++ b/src/forge/workflow/nodes/code_review.py
@@ -66,6 +66,7 @@ async def run_post_change_review(
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-review-{label}",
             repo_name=current_repo,
+            step_name="code_review",
         )
 
         git = GitOperations(
diff --git a/src/forge/workflow/nodes/docs_updater.py b/src/forge/workflow/nodes/docs_updater.py
index b454c3f2..7c32fa36 100644
--- a/src/forge/workflow/nodes/docs_updater.py
+++ b/src/forge/workflow/nodes/docs_updater.py
@@ -59,6 +59,7 @@ async def update_documentation(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-docs",
             repo_name=current_repo,
+            step_name="update_docs",
         )
 
         git = GitOperations(
diff --git a/src/forge/workflow/nodes/implement_review.py b/src/forge/workflow/nodes/implement_review.py
index fcf66e90..27b519f0 100644
--- a/src/forge/workflow/nodes/implement_review.py
+++ b/src/forge/workflow/nodes/implement_review.py
@@ -178,6 +178,7 @@ async def implement_review(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-review-analyze",
             repo_name=current_repo,
+            step_name="implement_review_analyze",
         )
 
         # ── Check for objections ──────────────────────────────────────────────
@@ -220,6 +221,7 @@ async def implement_review(state: WorkflowState) -> WorkflowState:
                 ticket_key=ticket_key,
                 task_key=f"{ticket_key}-review-fix",
                 repo_name=current_repo,
+                step_name="implement_review_fix",
             )
 
             # Commit any uncommitted changes the container left
diff --git a/src/forge/workflow/nodes/implementation.py b/src/forge/workflow/nodes/implementation.py
index 596e06ab..a57b1ce6 100644
--- a/src/forge/workflow/nodes/implementation.py
+++ b/src/forge/workflow/nodes/implementation.py
@@ -145,6 +145,7 @@ async def implement_task(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=current_task,
             repo_name=current_repo,
+            step_name="implement_task",
             previous_task_keys=implemented_tasks,
             trace_context=_build_implementation_trace_context(
                 state,
diff --git a/src/forge/workflow/nodes/local_reviewer.py b/src/forge/workflow/nodes/local_reviewer.py
index ffeef0cb..31dca022 100644
--- a/src/forge/workflow/nodes/local_reviewer.py
+++ b/src/forge/workflow/nodes/local_reviewer.py
@@ -148,6 +148,7 @@ async def _run_bug_review(state: WorkflowState) -> WorkflowState:
         result = await runner.run(
             workspace_path=Path(workspace_path),
             task_summary="Qualitative bug review — root cause and test coverage",
+            step_name="local_review",
             task_description=task_description,
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-qualreview",
@@ -312,6 +313,7 @@ async def _run_feature_review(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-review",
             repo_name=current_repo,
+            step_name="local_review",
         )
 
         git = GitOperations(
diff --git a/src/forge/workflow/nodes/plan_bug_fix.py b/src/forge/workflow/nodes/plan_bug_fix.py
index e59ad448..fff9c5e5 100644
--- a/src/forge/workflow/nodes/plan_bug_fix.py
+++ b/src/forge/workflow/nodes/plan_bug_fix.py
@@ -138,6 +138,7 @@ async def _run_plan_container(
                 task_description=task_description,
                 ticket_key=ticket_key,
                 task_key=f"{ticket_key}-plan",
+                step_name="plan_bug_fix",
             )
 
             if not result.success:
diff --git a/src/forge/workflow/nodes/rca_analysis.py b/src/forge/workflow/nodes/rca_analysis.py
index a95b6f96..b59efa32 100644
--- a/src/forge/workflow/nodes/rca_analysis.py
+++ b/src/forge/workflow/nodes/rca_analysis.py
@@ -96,6 +96,7 @@ async def analyze_bug(state: BugState) -> BugState:
                 task_description=task_description,
                 ticket_key=ticket_key,
                 task_key=f"{ticket_key}-analysis",
+                step_name="analyze_bug",
             )
 
             if not result.success:
@@ -242,6 +243,7 @@ async def reflect_rca(state: BugState) -> BugState:
                 task_description=task_description,
                 ticket_key=ticket_key,
                 task_key=task_key,
+                step_name="reflect_rca",
             )
 
             if not result.success:
diff --git a/src/forge/workflow/nodes/rebase.py b/src/forge/workflow/nodes/rebase.py
index a4b0071a..d7010204 100644
--- a/src/forge/workflow/nodes/rebase.py
+++ b/src/forge/workflow/nodes/rebase.py
@@ -155,6 +155,7 @@ async def rebase_pr(state: WorkflowState) -> WorkflowState:
             ticket_key=ticket_key,
             task_key=f"{ticket_key}-rebase",
             repo_name=current_repo,
+            step_name="rebase",
         )
 
         if result.exit_code != 0:
diff --git a/tests/unit/api/routes/test_metrics.py b/tests/unit/api/routes/test_metrics.py
index 21a962f1..8aa8814f 100644
--- a/tests/unit/api/routes/test_metrics.py
+++ b/tests/unit/api/routes/test_metrics.py
@@ -88,3 +88,138 @@ def test_increment_webhook_counter(self):
 
         # Should not raise
         WEBHOOKS_RECEIVED.labels(source="github", event_type="check_run").inc()
+
+
+class TestReviewCycleMetrics:
+    """Tests for review cycle metrics registration and recording."""
+
+    def test_review_cycles_counter_exists(self):
+        """Review cycles counter is registered."""
+        from forge.api.routes.metrics import REVIEW_CYCLES
+
+        assert REVIEW_CYCLES is not None
+
+    def test_review_cycles_counter_has_labels(self):
+        """Review cycles counter has skill and step labels."""
+        from forge.api.routes.metrics import REVIEW_CYCLES
+
+        labeled = REVIEW_CYCLES.labels(skill="implement-task", step="implementation")
+        assert labeled is not None
+
+    def test_review_verdicts_counter_exists(self):
+        """Review verdicts counter is registered."""
+        from forge.api.routes.metrics import REVIEW_VERDICTS
+
+        assert REVIEW_VERDICTS is not None
+
+    def test_review_verdicts_counter_has_labels(self):
+        """Review verdicts counter has skill, step, and verdict labels."""
+        from forge.api.routes.metrics import REVIEW_VERDICTS
+
+        labeled = REVIEW_VERDICTS.labels(
+            skill="implement-task", step="implementation", verdict="approved"
+        )
+        assert labeled is not None
+
+    def test_review_duration_histogram_exists(self):
+        """Review duration histogram is registered."""
+        from forge.api.routes.metrics import REVIEW_DURATION
+
+        assert REVIEW_DURATION is not None
+
+    def test_review_duration_histogram_has_labels(self):
+        """Review duration histogram has skill and step labels."""
+        from forge.api.routes.metrics import REVIEW_DURATION
+
+        labeled = REVIEW_DURATION.labels(skill="fix-ci", step="ci_fix")
+        assert labeled is not None
+
+    def test_record_review_cycle_helper(self):
+        """record_review_cycle helper increments counter."""
+        from forge.api.routes.metrics import REVIEW_CYCLES, record_review_cycle
+
+        # Get initial value
+        initial = REVIEW_CYCLES.labels(skill="test-skill", step="test-step")._value.get()
+
+        record_review_cycle(skill="test-skill", step="test-step")
+
+        # Counter should be incremented
+        final = REVIEW_CYCLES.labels(skill="test-skill", step="test-step")._value.get()
+        assert final == initial + 1
+
+    def test_record_review_verdict_helper(self):
+        """record_review_verdict helper increments counter."""
+        from forge.api.routes.metrics import REVIEW_VERDICTS, record_review_verdict
+
+        # Get initial value
+        initial = REVIEW_VERDICTS.labels(
+            skill="test-skill", step="test-step", verdict="approved"
+        )._value.get()
+
+        record_review_verdict(skill="test-skill", step="test-step", verdict="approved")
+
+        # Counter should be incremented
+        final = REVIEW_VERDICTS.labels(
+            skill="test-skill", step="test-step", verdict="approved"
+        )._value.get()
+        assert final == initial + 1
+
+    def test_record_review_verdict_rejected(self):
+        """record_review_verdict helper handles rejected verdict."""
+        from forge.api.routes.metrics import REVIEW_VERDICTS, record_review_verdict
+
+        # Get initial value
+        initial = REVIEW_VERDICTS.labels(
+            skill="task-skill", step="task-step", verdict="rejected"
+        )._value.get()
+
+        record_review_verdict(skill="task-skill", step="task-step", verdict="rejected")
+
+        # Counter should be incremented
+        final = REVIEW_VERDICTS.labels(
+            skill="task-skill", step="task-step", verdict="rejected"
+        )._value.get()
+        assert final == initial + 1
+
+    def test_observe_review_duration_helper(self):
+        """observe_review_duration helper observes histogram."""
+        from forge.api.routes.metrics import REVIEW_DURATION, observe_review_duration
+
+        # Get initial sum
+        initial_sum = REVIEW_DURATION.labels(
+            skill="duration-skill", step="duration-step"
+        )._sum.get()
+
+        observe_review_duration(skill="duration-skill", step="duration-step", duration=45.5)
+
+        # Sum should increase by observed value
+        final_sum = REVIEW_DURATION.labels(
+            skill="duration-skill", step="duration-step"
+        )._sum.get()
+        # Just check that something was observed (sum increased)
+        assert final_sum >= initial_sum + 45.5
+
+    @pytest.mark.asyncio
+    async def test_review_metrics_in_endpoint_output(self):
+        """Review metrics appear in /metrics endpoint output."""
+        from forge.api.routes.metrics import (
+            observe_review_duration,
+            record_review_cycle,
+            record_review_verdict,
+        )
+
+        # Record some metrics first
+        record_review_cycle(skill="endpoint-skill", step="endpoint-step")
+        record_review_verdict(skill="endpoint-skill", step="endpoint-step", verdict="approved")
+        observe_review_duration(skill="endpoint-skill", step="endpoint-step", duration=30.0)
+
+        async with AsyncClient(
+            transport=ASGITransport(app=app),
+            base_url="http://test"
+        ) as client:
+            response = await client.get("/metrics")
+
+        body = response.text
+        assert "forge_review_cycles_total" in body
+        assert "forge_review_verdicts_total" in body
+        assert "forge_review_duration_seconds" in body
diff --git a/tests/unit/containers/__init__.py b/tests/unit/containers/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/containers/conftest.py b/tests/unit/containers/conftest.py
new file mode 100644
index 00000000..74931205
--- /dev/null
+++ b/tests/unit/containers/conftest.py
@@ -0,0 +1,8 @@
+"""Container tests conftest - adds containers/ to sys.path."""
+
+import sys
+from pathlib import Path
+
+_containers_dir = str(Path(__file__).parents[3] / "containers")
+if _containers_dir not in sys.path:
+    sys.path.insert(0, _containers_dir)
diff --git a/tests/unit/containers/test_entrypoint_review.py b/tests/unit/containers/test_entrypoint_review.py
new file mode 100644
index 00000000..67018acf
--- /dev/null
+++ b/tests/unit/containers/test_entrypoint_review.py
@@ -0,0 +1,789 @@
+"""Unit tests for review loop integration in entrypoint.py."""
+
+import json
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Add containers to path
+sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+
+# ---------------------------------------------------------------------------
+# Test load_conversation_history
+# ---------------------------------------------------------------------------
+
+
+class TestLoadConversationHistory:
+    def test_loads_existing_history(self, tmp_path: Path):
+        """Test loading conversation history from existing file (SC-003)."""
+        from entrypoint import load_conversation_history
+
+        # Create history file
+        history_dir = tmp_path / ".forge" / "history"
+        history_dir.mkdir(parents=True)
+        history_file = history_dir / "TEST-123.json"
+        history_data = {
+            "task_key": "TEST-123",
+            "messages": [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+            ],
+        }
+        history_file.write_text(json.dumps(history_data))
+
+        result = load_conversation_history(tmp_path, "TEST-123")
+
+        assert result is not None
+        assert len(result) == 2
+        assert result[0]["role"] == "user"
+        assert result[1]["role"] == "assistant"
+
+    def test_returns_none_when_file_missing(self, tmp_path: Path):
+        """Test returns None when history file doesn't exist."""
+        from entrypoint import load_conversation_history
+
+        result = load_conversation_history(tmp_path, "MISSING-123")
+
+        assert result is None
+
+    def test_returns_none_on_json_error(self, tmp_path: Path):
+        """Test returns None when JSON parsing fails."""
+        from entrypoint import load_conversation_history
+
+        # Create malformed history file
+        history_dir = tmp_path / ".forge" / "history"
+        history_dir.mkdir(parents=True)
+        history_file = history_dir / "BAD-123.json"
+        history_file.write_text("not valid json")
+
+        result = load_conversation_history(tmp_path, "BAD-123")
+
+        assert result is None
+
+
+# ---------------------------------------------------------------------------
+# Test run_worker_with_feedback
+# ---------------------------------------------------------------------------
+
+
+class TestRunWorkerWithFeedback:
+    @pytest.mark.asyncio
+    async def test_injects_feedback_section(self, tmp_path: Path):
+        """Test that feedback is injected into task description (SC-003)."""
+        with patch("entrypoint.run_agent_task") as mock_run_agent:
+            mock_run_agent.return_value = True
+
+            from entrypoint import run_worker_with_feedback
+
+            result = await run_worker_with_feedback(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Original description",
+                guardrails="Some guardrails",
+                feedback="Please fix the error handling",
+                previous_task_keys=["TEST-122"],
+            )
+
+            assert result is True
+            mock_run_agent.assert_called_once()
+
+            # Check that feedback was injected
+            call_kwargs = mock_run_agent.call_args
+            task_description = call_kwargs[1]["task_description"]
+            assert "## Reviewer Feedback" in task_description
+            assert "Please fix the error handling" in task_description
+            assert "Original description" in task_description
+
+    @pytest.mark.asyncio
+    async def test_loads_conversation_history(self, tmp_path: Path):
+        """Test that conversation history is loaded (SC-003)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create history file
+        history_dir = tmp_path / ".forge" / "history"
+        history_dir.mkdir(parents=True)
+        history_file = history_dir / "TEST-123.json"
+        history_data = {"task_key": "TEST-123", "messages": [{"role": "user", "content": "Test"}]}
+        history_file.write_text(json.dumps(history_data))
+
+        with patch("entrypoint.run_agent_task") as mock_run_agent:
+            mock_run_agent.return_value = True
+
+            from entrypoint import run_worker_with_feedback
+
+            result = await run_worker_with_feedback(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Original description",
+                guardrails="",
+                feedback="Feedback text",
+            )
+
+            assert result is True
+
+
+# ---------------------------------------------------------------------------
+# Test run_review_loop
+# ---------------------------------------------------------------------------
+
+
+class TestRunReviewLoop:
+    @pytest.mark.asyncio
+    async def test_approved_verdict_exits_successfully(self, tmp_path: Path):
+        """Test that APPROVED verdict terminates loop successfully (SC-002)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create review.md
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 3\n---\nReview instructions here")
+
+        with patch("entrypoint.run_reviewer_agent") as mock_reviewer:
+            mock_reviewer.return_value = "The code looks great. APPROVED"
+
+            from entrypoint import run_review_loop
+
+            result = await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Description",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            assert result is True
+            mock_reviewer.assert_called_once()
+
+            # Check cycle file was written
+            cycle_file = tmp_path / ".forge" / "test-skill" / "review_cycle_1.json"
+            assert cycle_file.exists()
+            cycle_data = json.loads(cycle_file.read_text())
+            assert cycle_data["verdict"] == "approved"
+            assert cycle_data["cycle"] == 1
+
+    @pytest.mark.asyncio
+    async def test_rejected_triggers_retry(self, tmp_path: Path):
+        """Test that REJECTED verdict triggers retry with feedback (SC-003)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create review.md with 2 retries
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 2\n---\nReview instructions")
+
+        call_count = 0
+
+        async def mock_reviewer_side_effect(*_args, **_kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                return "REJECTED: Missing error handling"
+            return "APPROVED"
+
+        with (
+            patch("entrypoint.run_reviewer_agent") as mock_reviewer,
+            patch("entrypoint.run_worker_with_feedback") as mock_worker,
+        ):
+            mock_reviewer.side_effect = mock_reviewer_side_effect
+            mock_worker.return_value = True
+
+            from entrypoint import run_review_loop
+
+            result = await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Description",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            assert result is True
+            assert mock_reviewer.call_count == 2
+            mock_worker.assert_called_once()
+
+            # Check feedback was passed to worker
+            worker_call = mock_worker.call_args
+            assert "Missing error handling" in worker_call[1]["feedback"]
+
+    @pytest.mark.asyncio
+    async def test_max_retries_exhausted_exits_success(self, tmp_path: Path):
+        """Test that max retries exhausted exits with success (BR-005)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create review.md with 2 retries
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 2\n---\nReview instructions")
+
+        with (
+            patch("entrypoint.run_reviewer_agent") as mock_reviewer,
+            patch("entrypoint.run_worker_with_feedback") as mock_worker,
+        ):
+            # Always reject
+            mock_reviewer.return_value = "REJECTED: Still not good"
+            mock_worker.return_value = True
+
+            from entrypoint import run_review_loop
+
+            result = await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Description",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            # Should exit with success even after max retries
+            assert result is True
+            assert mock_reviewer.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_uses_frontmatter_max_retries(self, tmp_path: Path):
+        """Test that max_retries from frontmatter is enforced (SC-004)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create review.md with 5 retries
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 5\n---\nReview instructions")
+
+        with (
+            patch("entrypoint.run_reviewer_agent") as mock_reviewer,
+            patch("entrypoint.run_worker_with_feedback") as mock_worker,
+        ):
+            mock_reviewer.return_value = "REJECTED"
+            mock_worker.return_value = True
+
+            from entrypoint import run_review_loop
+
+            result = await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Description",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            # Should have run 5 review cycles
+            assert mock_reviewer.call_count == 5
+            assert result is True
+
+    @pytest.mark.asyncio
+    async def test_uses_env_var_fallback(self, tmp_path: Path, monkeypatch):
+        """Test that AUTO_REVIEW_MAX_RETRIES env var is used as fallback (SC-005)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        monkeypatch.setenv("AUTO_REVIEW_MAX_RETRIES", "4")
+
+        # Create review.md without frontmatter
+        review_md = tmp_path / "review.md"
+        review_md.write_text("Review instructions only, no frontmatter")
+
+        with (
+            patch("entrypoint.run_reviewer_agent") as mock_reviewer,
+            patch("entrypoint.run_worker_with_feedback") as mock_worker,
+        ):
+            mock_reviewer.return_value = "REJECTED"
+            mock_worker.return_value = True
+
+            from entrypoint import run_review_loop
+
+            result = await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test task",
+                task_description="Description",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            # Should have run 4 review cycles (from env var)
+            assert mock_reviewer.call_count == 4
+            assert result is True
+
+    @pytest.mark.asyncio
+    async def test_cycle_file_written_to_correct_path(self, tmp_path: Path):
+        """Test that cycle file is written to .forge/{step_name}/review_cycle_N.json (SC-007)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 1\n---\nReview")
+
+        with patch("entrypoint.run_reviewer_agent") as mock_reviewer:
+            mock_reviewer.return_value = "APPROVED"
+
+            from entrypoint import run_review_loop
+
+            await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test",
+                task_description="Desc",
+                guardrails="",
+                skill_name="my-custom-skill",
+                review_md_path=review_md,
+            )
+
+            # Check file path matches spec
+            cycle_file = tmp_path / ".forge" / "my-custom-skill" / "review_cycle_1.json"
+            assert cycle_file.exists()
+
+    @pytest.mark.asyncio
+    async def test_reviewer_agent_receives_instructions(self, tmp_path: Path):
+        """Test that reviewer agent receives review.md instructions (SC-001)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 1\n---\nCheck for security issues and code quality")
+
+        with patch("entrypoint.run_reviewer_agent") as mock_reviewer:
+            mock_reviewer.return_value = "APPROVED"
+
+            from entrypoint import run_review_loop
+
+            await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test",
+                task_description="Desc",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            # Check reviewer was called with instructions
+            call_kwargs = mock_reviewer.call_args[1]
+            assert "Check for security issues" in call_kwargs["review_instructions"]
+
+    @pytest.mark.asyncio
+    async def test_cycle_timing_tracked(self, tmp_path: Path):
+        """Test that cycle timing is tracked with time.perf_counter()."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 1\n---\nReview")
+
+        with patch("entrypoint.run_reviewer_agent") as mock_reviewer:
+            mock_reviewer.return_value = "APPROVED"
+
+            from entrypoint import run_review_loop
+
+            await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test",
+                task_description="Desc",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            cycle_file = tmp_path / ".forge" / "test-skill" / "review_cycle_1.json"
+            cycle_data = json.loads(cycle_file.read_text())
+
+            # elapsed_seconds should be a positive float
+            assert isinstance(cycle_data["elapsed_seconds"], float)
+            assert cycle_data["elapsed_seconds"] >= 0
+
+    @pytest.mark.asyncio
+    async def test_reviewer_error_treated_as_rejection(self, tmp_path: Path):
+        """Test that reviewer agent errors are treated as rejections."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 1\n---\nReview")
+
+        with patch("entrypoint.run_reviewer_agent") as mock_reviewer:
+            mock_reviewer.side_effect = RuntimeError("API Error")
+
+            from entrypoint import run_review_loop
+
+            result = await run_review_loop(
+                workspace=tmp_path,
+                task_key="TEST-123",
+                task_summary="Test",
+                task_description="Desc",
+                guardrails="",
+                skill_name="test-skill",
+                review_md_path=review_md,
+            )
+
+            # Should still exit successfully after max retries
+            assert result is True
+
+            # Check that rejection was recorded
+            cycle_file = tmp_path / ".forge" / "test-skill" / "review_cycle_1.json"
+            cycle_data = json.loads(cycle_file.read_text())
+            assert cycle_data["verdict"] == "rejected"
+
+
+# ---------------------------------------------------------------------------
+# Test main function review loop integration
+# ---------------------------------------------------------------------------
+
+
+class TestMainReviewLoopIntegration:
+    def test_skips_review_when_no_skill_name(self, tmp_path: Path, monkeypatch):
+        """Test that review loop is skipped when no skill_name provided (SC-010)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create task file without skill_name
+        task_file = tmp_path / "task.json"
+        task_file.write_text(
+            json.dumps(
+                {
+                    "task_key": "TEST-123",
+                    "summary": "Test task",
+                    "description": "Test description",
+                }
+            )
+        )
+
+        # Create workspace
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        monkeypatch.setenv("FORGE_SYSTEM_PROMPT_TEMPLATE", "Test prompt {task_key}")
+        monkeypatch.chdir(workspace)
+
+        with (
+            patch("entrypoint.asyncio.run") as mock_asyncio_run,
+            patch("entrypoint.configure_git"),
+            patch("entrypoint.subprocess.run") as mock_subprocess,
+        ):
+            mock_asyncio_run.return_value = True
+            mock_subprocess.return_value = MagicMock(returncode=0)
+
+            from entrypoint import main
+
+            with pytest.raises(SystemExit) as exc_info:
+                sys.argv = [
+                    "entrypoint.py",
+                    "--task-file",
+                    str(task_file),
+                    "--workspace",
+                    str(workspace),
+                ]
+                main()
+
+            # Should exit successfully
+            assert exc_info.value.code == 0
+
+    def test_skips_review_when_no_review_md(self, tmp_path: Path, monkeypatch):
+        """Test that review loop is skipped when review.md doesn't exist (SC-010)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create task file with skill_name
+        task_file = tmp_path / "task.json"
+        task_file.write_text(
+            json.dumps(
+                {
+                    "task_key": "TEST-123",
+                    "summary": "Test task",
+                    "description": "Test description",
+                    "skill_name": "nonexistent-skill",
+                }
+            )
+        )
+
+        # Create workspace
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        monkeypatch.setenv("FORGE_SYSTEM_PROMPT_TEMPLATE", "Test prompt {task_key}")
+        monkeypatch.chdir(workspace)
+
+        with (
+            patch("entrypoint.asyncio.run") as mock_asyncio_run,
+            patch("entrypoint.configure_git"),
+            patch("entrypoint.subprocess.run") as mock_subprocess,
+            patch("entrypoint.detect_review_md") as mock_detect,
+        ):
+            mock_asyncio_run.return_value = True
+            mock_subprocess.return_value = MagicMock(returncode=0)
+            mock_detect.return_value = None  # No review.md found
+
+            from entrypoint import main
+
+            with pytest.raises(SystemExit) as exc_info:
+                sys.argv = [
+                    "entrypoint.py",
+                    "--task-file",
+                    str(task_file),
+                    "--workspace",
+                    str(workspace),
+                ]
+                main()
+
+            # Should exit successfully without running review loop
+            assert exc_info.value.code == 0
+            # run_review_loop should not have been called
+            # (only run_agent_task via asyncio.run)
+            assert mock_asyncio_run.call_count == 1
+
+    def test_runs_review_when_review_md_exists(self, tmp_path: Path, monkeypatch):
+        """Test that review loop runs when review.md exists (SC-001)."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        # Create task file with skill_name
+        task_file = tmp_path / "task.json"
+        task_file.write_text(
+            json.dumps(
+                {
+                    "task_key": "TEST-123",
+                    "summary": "Test task",
+                    "description": "Test description",
+                    "skill_name": "test-skill",
+                }
+            )
+        )
+
+        # Create workspace
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        # Create review.md
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 1\n---\nReview instructions")
+
+        monkeypatch.setenv("FORGE_SYSTEM_PROMPT_TEMPLATE", "Test prompt {task_key}")
+        monkeypatch.chdir(workspace)
+
+        asyncio_call_count = 0
+
+        def mock_asyncio_side_effect(_coro):
+            nonlocal asyncio_call_count
+            asyncio_call_count += 1
+            return True
+
+        with (
+            patch("entrypoint.asyncio.run") as mock_asyncio_run,
+            patch("entrypoint.configure_git"),
+            patch("entrypoint.subprocess.run") as mock_subprocess,
+            patch("entrypoint.detect_review_md") as mock_detect,
+        ):
+            mock_asyncio_run.side_effect = mock_asyncio_side_effect
+            mock_subprocess.return_value = MagicMock(returncode=0)
+            mock_detect.return_value = review_md
+
+            from entrypoint import main
+
+            with pytest.raises(SystemExit) as exc_info:
+                sys.argv = [
+                    "entrypoint.py",
+                    "--task-file",
+                    str(task_file),
+                    "--workspace",
+                    str(workspace),
+                ]
+                main()
+
+            assert exc_info.value.code == 0
+            # Should have called asyncio.run twice: once for run_agent_task, once for run_review_loop
+            assert asyncio_call_count == 2
+
+
+# ---------------------------------------------------------------------------
+# Test _print_review_progress (SC-011)
+# ---------------------------------------------------------------------------
+
+
+class TestPrintReviewProgress:
+    """Tests for terminal progress display in local mode (SC-011)."""
+
+    def test_prints_progress_when_tty(self, capsys):
+        """Test progress is printed when stdout is a TTY."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        with patch("entrypoint._IS_TTY", True):
+            # Need to reload the function to pick up the mocked _IS_TTY
+            import importlib
+
+            import entrypoint
+
+            importlib.reload(entrypoint)
+
+            # Re-patch after reload
+            with patch.object(entrypoint, "_IS_TTY", True):
+                entrypoint._print_review_progress(1, 3, "rejected", "Missing tests")
+
+                captured = capsys.readouterr()
+                assert "Review cycle 1/3: REJECTED - Missing tests" in captured.out
+
+    def test_no_output_when_not_tty(self, capsys):
+        """Test no output when stdout is not a TTY."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        with patch.object(entrypoint, "_IS_TTY", False):
+            entrypoint._print_review_progress(1, 3, "rejected", "Missing tests")
+
+            captured = capsys.readouterr()
+            assert captured.out == ""
+
+    def test_feedback_truncated_at_200_chars(self, capsys):
+        """Test that feedback is truncated to 200 characters with ellipsis."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        long_feedback = "x" * 300  # 300 characters
+
+        with patch.object(entrypoint, "_IS_TTY", True):
+            entrypoint._print_review_progress(2, 5, "rejected", long_feedback)
+
+            captured = capsys.readouterr()
+            # Should have 200 chars + "..."
+            assert "x" * 200 + "..." in captured.out
+            # Should NOT have 201 x's
+            assert "x" * 201 not in captured.out
+
+    def test_feedback_not_truncated_when_under_200(self, capsys):
+        """Test that feedback under 200 chars is not truncated."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        short_feedback = "Fix the error handling in function xyz"
+
+        with patch.object(entrypoint, "_IS_TTY", True):
+            entrypoint._print_review_progress(1, 3, "rejected", short_feedback)
+
+            captured = capsys.readouterr()
+            assert short_feedback in captured.out
+            assert "..." not in captured.out
+
+    def test_verdict_uppercased(self, capsys):
+        """Test that verdict is uppercased in output."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        with patch.object(entrypoint, "_IS_TTY", True):
+            entrypoint._print_review_progress(1, 3, "approved", "")
+
+            captured = capsys.readouterr()
+            assert "APPROVED" in captured.out
+            assert "approved" not in captured.out
+
+    def test_no_feedback_shows_just_verdict(self, capsys):
+        """Test that empty feedback shows just the verdict without dash."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        with patch.object(entrypoint, "_IS_TTY", True):
+            entrypoint._print_review_progress(3, 3, "approved", "")
+
+            captured = capsys.readouterr()
+            assert "Review cycle 3/3: APPROVED" in captured.out
+            assert " - " not in captured.out
+
+    def test_format_matches_spec(self, capsys):
+        """Test output format matches spec: 'Review cycle N/M: VERDICT - feedback'."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        with patch.object(entrypoint, "_IS_TTY", True):
+            entrypoint._print_review_progress(1, 3, "rejected", "Please add error handling")
+
+            captured = capsys.readouterr()
+            expected = "Review cycle 1/3: REJECTED - Please add error handling\n"
+            assert captured.out == expected
+
+    def test_feedback_exactly_200_chars_not_truncated(self, capsys):
+        """Test that feedback exactly 200 chars is not truncated."""
+        import sys
+
+        sys.path.insert(0, str(Path(__file__).parents[3] / "containers"))
+
+        import importlib
+
+        import entrypoint
+
+        importlib.reload(entrypoint)
+
+        exact_feedback = "y" * 200
+
+        with patch.object(entrypoint, "_IS_TTY", True):
+            entrypoint._print_review_progress(1, 2, "rejected", exact_feedback)
+
+            captured = capsys.readouterr()
+            assert exact_feedback in captured.out
+            assert "..." not in captured.out
diff --git a/tests/unit/containers/test_review.py b/tests/unit/containers/test_review.py
new file mode 100644
index 00000000..5107b1c9
--- /dev/null
+++ b/tests/unit/containers/test_review.py
@@ -0,0 +1,1133 @@
+"""Unit tests for containers.review module."""
+
+import json
+from pathlib import Path
+
+import pytest
+from review import (
+    DEFAULT_MAX_RETRIES,
+    ENV_MAX_RETRIES,
+    ReviewConfig,
+    ReviewCycleData,
+    Verdict,
+    detect_review_md,
+    parse_review_config,
+    parse_verdict,
+    write_cycle_file,
+)
+
+# ---------------------------------------------------------------------------
+# Verdict enum tests
+# ---------------------------------------------------------------------------
+
+
+class TestVerdict:
+    def test_approved_value(self):
+        assert Verdict.APPROVED == "approved"
+        assert Verdict.APPROVED.value == "approved"
+
+    def test_rejected_value(self):
+        assert Verdict.REJECTED == "rejected"
+        assert Verdict.REJECTED.value == "rejected"
+
+    def test_is_str_enum(self):
+        # StrEnum values can be used directly as strings
+        assert f"Verdict: {Verdict.APPROVED}" == "Verdict: approved"
+
+
+# ---------------------------------------------------------------------------
+# ReviewConfig dataclass tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewConfig:
+    def test_default_values(self):
+        config = ReviewConfig()
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert config.instructions == ""
+
+    def test_custom_values(self):
+        config = ReviewConfig(max_retries=5, instructions="Check for bugs")
+        assert config.max_retries == 5
+        assert config.instructions == "Check for bugs"
+
+    def test_default_max_retries_is_3(self):
+        assert DEFAULT_MAX_RETRIES == 3
+
+
+# ---------------------------------------------------------------------------
+# ReviewCycleData dataclass tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleData:
+    def test_all_fields_required(self):
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Looks good!",
+            skill="code-review",
+            elapsed_seconds=12.5,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        assert data.cycle == 1
+        assert data.max_cycles == 3
+        assert data.verdict == "approved"
+        assert data.feedback == "Looks good!"
+        assert data.skill == "code-review"
+        assert data.elapsed_seconds == 12.5
+        assert data.timestamp == "2024-01-15T10:30:00Z"
+
+    def test_verdict_can_be_approved_or_rejected(self):
+        approved = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict=Verdict.APPROVED,
+            feedback="",
+            skill="test",
+            elapsed_seconds=1.0,
+            timestamp="2024-01-01T00:00:00Z",
+        )
+        assert approved.verdict == "approved"
+
+        rejected = ReviewCycleData(
+            cycle=2,
+            max_cycles=3,
+            verdict=Verdict.REJECTED,
+            feedback="Needs work",
+            skill="test",
+            elapsed_seconds=2.0,
+            timestamp="2024-01-01T00:00:00Z",
+        )
+        assert rejected.verdict == "rejected"
+
+
+# ---------------------------------------------------------------------------
+# parse_review_config tests
+# ---------------------------------------------------------------------------
+
+
+class TestParseReviewConfig:
+    """Tests for parse_review_config function."""
+
+    def test_parse_valid_frontmatter(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: 5
+---
+Review the code for security issues.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == 5
+        assert config.instructions == "Review the code for security issues."
+
+    def test_parse_only_instructions_no_frontmatter(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text("Just some instructions, no frontmatter.")
+
+        config = parse_review_config(review_md)
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert config.instructions == "Just some instructions, no frontmatter."
+
+    def test_parse_empty_frontmatter(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+---
+Instructions after empty frontmatter.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert config.instructions == "Instructions after empty frontmatter."
+
+    def test_file_not_found_returns_defaults(self, tmp_path: Path):
+        review_md = tmp_path / "nonexistent.md"
+        config = parse_review_config(review_md)
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert config.instructions == ""
+
+    def test_malformed_yaml_logs_warning_and_returns_defaults(
+        self, tmp_path: Path, caplog: pytest.LogCaptureFixture
+    ):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: [invalid yaml
+---
+Some instructions.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert config.instructions == "Some instructions."
+        assert "Malformed YAML" in caplog.text
+
+    def test_env_var_fallback_when_no_frontmatter_value(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        monkeypatch.setenv(ENV_MAX_RETRIES, "7")
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text("No frontmatter, just instructions.")
+
+        config = parse_review_config(review_md)
+        assert config.max_retries == 7
+        assert config.instructions == "No frontmatter, just instructions."
+
+    def test_frontmatter_overrides_env_var(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
+        monkeypatch.setenv(ENV_MAX_RETRIES, "7")
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: 2
+---
+Instructions here.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == 2
+
+    def test_invalid_env_var_is_ignored(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
+    ):
+        monkeypatch.setenv(ENV_MAX_RETRIES, "not-a-number")
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text("Instructions only.")
+
+        config = parse_review_config(review_md)
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert "Invalid AUTO_REVIEW_MAX_RETRIES" in caplog.text
+
+    def test_non_integer_max_retries_in_frontmatter_uses_default(
+        self, tmp_path: Path, caplog: pytest.LogCaptureFixture
+    ):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: "five"
+---
+Instructions.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == DEFAULT_MAX_RETRIES
+        assert "Invalid max_retries" in caplog.text
+
+    def test_multiline_instructions(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: 4
+---
+Line one.
+Line two.
+Line three.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == 4
+        assert "Line one." in config.instructions
+        assert "Line two." in config.instructions
+        assert "Line three." in config.instructions
+
+    def test_frontmatter_with_extra_fields_ignored(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: 6
+author: test
+some_other_field: value
+---
+Instructions.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == 6
+        assert config.instructions == "Instructions."
+
+    def test_env_var_fallback_for_file_not_found(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        monkeypatch.setenv(ENV_MAX_RETRIES, "10")
+
+        review_md = tmp_path / "nonexistent.md"
+        config = parse_review_config(review_md)
+        assert config.max_retries == 10
+        assert config.instructions == ""
+
+    def test_env_var_fallback_for_malformed_yaml(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ):
+        monkeypatch.setenv(ENV_MAX_RETRIES, "8")
+
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+: invalid: yaml:
+---
+Some instructions.
+"""
+        )
+        config = parse_review_config(review_md)
+        assert config.max_retries == 8
+        assert config.instructions == "Some instructions."
+
+    def test_no_trailing_newline_after_frontmatter(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text("---\nmax_retries: 3\n---\nInstructions.")
+        config = parse_review_config(review_md)
+        assert config.max_retries == 3
+        assert config.instructions == "Instructions."
+
+    def test_whitespace_handling_in_instructions(self, tmp_path: Path):
+        review_md = tmp_path / "review.md"
+        review_md.write_text(
+            """\
+---
+max_retries: 1
+---
+
+   Indented instructions with leading/trailing whitespace.
+
+"""
+        )
+        config = parse_review_config(review_md)
+        # Instructions should be stripped
+        assert config.instructions == "Indented instructions with leading/trailing whitespace."
+
+
+# ---------------------------------------------------------------------------
+# detect_review_md tests
+# ---------------------------------------------------------------------------
+
+
+class TestDetectReviewMd:
+    """Tests for detect_review_md function with project override precedence."""
+
+    def test_returns_project_override_when_both_exist(self, tmp_path: Path):
+        """SC-006: Project override wins when both default and project exist."""
+        skills_base = tmp_path / "skills"
+
+        # Create default skill review.md
+        default_dir = skills_base / "default" / "local-code-review"
+        default_dir.mkdir(parents=True)
+        default_review = default_dir / "review.md"
+        default_review.write_text("Default instructions")
+
+        # Create project override review.md
+        project_dir = skills_base / "aisos" / "local-code-review"
+        project_dir.mkdir(parents=True)
+        project_review = project_dir / "review.md"
+        project_review.write_text("Project override instructions")
+
+        result = detect_review_md("local-code-review", "AISOS-123", skills_base)
+
+        assert result == project_review
+        assert result.read_text() == "Project override instructions"
+
+    def test_returns_default_when_only_default_exists(self, tmp_path: Path):
+        """Returns default path when no project override exists."""
+        skills_base = tmp_path / "skills"
+
+        # Create only default skill review.md
+        default_dir = skills_base / "default" / "code-review"
+        default_dir.mkdir(parents=True)
+        default_review = default_dir / "review.md"
+        default_review.write_text("Default instructions")
+
+        result = detect_review_md("code-review", "PROJ-456", skills_base)
+
+        assert result == default_review
+        assert result.read_text() == "Default instructions"
+
+    def test_returns_none_when_no_review_md_exists(self, tmp_path: Path):
+        """SC-010: Returns None when review.md doesn't exist in either location."""
+        skills_base = tmp_path / "skills"
+
+        # Create skill directories without review.md
+        default_dir = skills_base / "default" / "test-skill"
+        default_dir.mkdir(parents=True)
+
+        project_dir = skills_base / "myproj" / "test-skill"
+        project_dir.mkdir(parents=True)
+
+        result = detect_review_md("test-skill", "MYPROJ-789", skills_base)
+
+        assert result is None
+
+    def test_returns_none_when_skills_base_empty(self, tmp_path: Path):
+        """Returns None when skills_base is empty."""
+        skills_base = tmp_path / "skills"
+        skills_base.mkdir(parents=True)
+
+        result = detect_review_md("any-skill", "PROJ-1", skills_base)
+
+        assert result is None
+
+    def test_project_key_extracted_and_lowercased(self, tmp_path: Path):
+        """Project key is correctly extracted and lowercased from ticket_key."""
+        skills_base = tmp_path / "skills"
+
+        # Create project skill with lowercase directory
+        project_dir = skills_base / "myproject" / "review-skill"
+        project_dir.mkdir(parents=True)
+        project_review = project_dir / "review.md"
+        project_review.write_text("Project review")
+
+        # Ticket key has uppercase project
+        result = detect_review_md("review-skill", "MYPROJECT-999", skills_base)
+
+        assert result == project_review
+
+    def test_handles_ticket_key_without_hyphen(self, tmp_path: Path):
+        """Falls back to default when ticket_key has no hyphen (no project)."""
+        skills_base = tmp_path / "skills"
+
+        # Create default skill review.md
+        default_dir = skills_base / "default" / "test-skill"
+        default_dir.mkdir(parents=True)
+        default_review = default_dir / "review.md"
+        default_review.write_text("Default instructions")
+
+        result = detect_review_md("test-skill", "INVALID", skills_base)
+
+        assert result == default_review
+
+    def test_returns_none_for_ticket_without_hyphen_and_no_default(self, tmp_path: Path):
+        """Returns None when ticket has no hyphen and no default exists."""
+        skills_base = tmp_path / "skills"
+        skills_base.mkdir(parents=True)
+
+        result = detect_review_md("test-skill", "NOHYPHEN", skills_base)
+
+        assert result is None
+
+    def test_ignores_project_directory_without_review_md(self, tmp_path: Path):
+        """Falls back to default when project dir exists but has no review.md."""
+        skills_base = tmp_path / "skills"
+
+        # Create project directory without review.md
+        project_dir = skills_base / "proj" / "skill-name"
+        project_dir.mkdir(parents=True)
+        (project_dir / "other-file.txt").write_text("not review.md")
+
+        # Create default with review.md
+        default_dir = skills_base / "default" / "skill-name"
+        default_dir.mkdir(parents=True)
+        default_review = default_dir / "review.md"
+        default_review.write_text("Default review")
+
+        result = detect_review_md("skill-name", "PROJ-1", skills_base)
+
+        assert result == default_review
+
+    def test_returns_default_when_project_dir_does_not_exist(self, tmp_path: Path):
+        """Returns default when project directory doesn't exist at all."""
+        skills_base = tmp_path / "skills"
+
+        # Only create default
+        default_dir = skills_base / "default" / "my-skill"
+        default_dir.mkdir(parents=True)
+        default_review = default_dir / "review.md"
+        default_review.write_text("Default instructions")
+
+        # No project directory created
+        result = detect_review_md("my-skill", "PROJ-100", skills_base)
+
+        assert result == default_review
+
+    def test_does_not_match_directory_named_review_md(self, tmp_path: Path):
+        """Ensures we check for file, not directory named review.md."""
+        skills_base = tmp_path / "skills"
+
+        # Create a directory named review.md (edge case)
+        default_dir = skills_base / "default" / "edge-skill"
+        default_dir.mkdir(parents=True)
+        (default_dir / "review.md").mkdir()  # This is a directory, not a file
+
+        result = detect_review_md("edge-skill", "TEST-1", skills_base)
+
+        assert result is None
+
+    def test_multi_hyphen_ticket_key(self, tmp_path: Path):
+        """Correctly extracts project from ticket keys with multiple hyphens."""
+        skills_base = tmp_path / "skills"
+
+        # Create project skill
+        project_dir = skills_base / "proj" / "multi-hyphen-skill"
+        project_dir.mkdir(parents=True)
+        project_review = project_dir / "review.md"
+        project_review.write_text("Project review")
+
+        # Skill name also has hyphens, ticket key is "PROJ-123"
+        result = detect_review_md("multi-hyphen-skill", "PROJ-123", skills_base)
+
+        assert result == project_review
+
+
+# ---------------------------------------------------------------------------
+# parse_verdict tests
+# ---------------------------------------------------------------------------
+
+
+class TestParseVerdict:
+    """Tests for parse_verdict function (SC-002)."""
+
+    # ----- APPROVED marker tests -----
+
+    def test_approved_uppercase(self):
+        """APPROVED marker in uppercase returns (APPROVED, '')."""
+        result = parse_verdict("The code looks good. APPROVED")
+        assert result == (Verdict.APPROVED, "")
+
+    def test_approved_lowercase(self):
+        """Case-insensitive: 'approved' returns (APPROVED, '')."""
+        result = parse_verdict("Code review complete. approved")
+        assert result == (Verdict.APPROVED, "")
+
+    def test_approved_mixed_case(self):
+        """Case-insensitive: 'Approved' returns (APPROVED, '')."""
+        result = parse_verdict("All tests pass. Approved")
+        assert result == (Verdict.APPROVED, "")
+
+    def test_approved_with_text_before_and_after(self):
+        """APPROVED marker in middle of text still returns (APPROVED, '')."""
+        result = parse_verdict("Summary: Code is clean. APPROVED. No further changes needed.")
+        assert result == (Verdict.APPROVED, "")
+
+    def test_approved_at_start(self):
+        """APPROVED at start of text."""
+        result = parse_verdict("APPROVED - code meets all requirements")
+        assert result == (Verdict.APPROVED, "")
+
+    # ----- REJECTED marker tests -----
+
+    def test_rejected_uppercase(self):
+        """REJECTED marker returns (REJECTED, feedback)."""
+        result = parse_verdict("REJECTED: Code has security issues.")
+        assert result[0] == Verdict.REJECTED
+        assert result[1] == ": Code has security issues."
+
+    def test_rejected_lowercase(self):
+        """Case-insensitive: 'rejected' returns (REJECTED, feedback)."""
+        result = parse_verdict("Code review result: rejected due to missing tests.")
+        assert result[0] == Verdict.REJECTED
+        assert result[1] == "due to missing tests."
+
+    def test_rejected_mixed_case(self):
+        """Case-insensitive: 'Rejected' returns (REJECTED, feedback)."""
+        result = parse_verdict("Rejected - needs refactoring.")
+        assert result[0] == Verdict.REJECTED
+        assert result[1] == "- needs refactoring."
+
+    def test_rejected_extracts_feedback_after_marker(self):
+        """Feedback is all text after REJECTED marker."""
+        result = parse_verdict(
+            "Review: REJECTED\n\nPlease fix the following:\n1. Bug in line 42\n2. Missing docstring"
+        )
+        assert result[0] == Verdict.REJECTED
+        assert "Please fix the following:" in result[1]
+        assert "Bug in line 42" in result[1]
+        assert "Missing docstring" in result[1]
+
+    def test_rejected_feedback_is_stripped(self):
+        """Feedback text is stripped of leading/trailing whitespace."""
+        result = parse_verdict("REJECTED   \n\n  Needs work.  \n\n")
+        assert result[0] == Verdict.REJECTED
+        assert result[1] == "Needs work."
+
+    def test_rejected_with_empty_feedback(self):
+        """SC-003: Empty feedback after REJECTED marker is handled correctly."""
+        result = parse_verdict("REJECTED")
+        assert result[0] == Verdict.REJECTED
+        assert result[1] == ""
+
+    def test_rejected_with_only_whitespace_feedback(self):
+        """Whitespace-only feedback after REJECTED is stripped to empty string."""
+        result = parse_verdict("REJECTED   \n\n  \t  \n")
+        assert result[0] == Verdict.REJECTED
+        assert result[1] == ""
+
+    # ----- Neither marker present -----
+
+    def test_neither_marker_returns_rejected_with_error_message(self):
+        """Neither marker present returns (REJECTED, 'Verdict could not be parsed')."""
+        result = parse_verdict("This review output has no clear verdict.")
+        assert result == (Verdict.REJECTED, "Verdict could not be parsed")
+
+    def test_empty_string_returns_rejected_with_error_message(self):
+        """Empty string returns (REJECTED, 'Verdict could not be parsed')."""
+        result = parse_verdict("")
+        assert result == (Verdict.REJECTED, "Verdict could not be parsed")
+
+    def test_whitespace_only_returns_rejected_with_error_message(self):
+        """Whitespace-only string returns (REJECTED, 'Verdict could not be parsed')."""
+        result = parse_verdict("   \n\t\n   ")
+        assert result == (Verdict.REJECTED, "Verdict could not be parsed")
+
+    # ----- Both markers present -----
+
+    def test_both_markers_approved_first_wins(self):
+        """When both markers present, APPROVED first wins."""
+        result = parse_verdict("Code is APPROVED, not REJECTED because tests pass.")
+        assert result == (Verdict.APPROVED, "")
+
+    def test_both_markers_rejected_first_wins(self):
+        """When both markers present, REJECTED first wins if it comes first."""
+        result = parse_verdict(
+            "This code is REJECTED. It would have been APPROVED if tests passed."
+        )
+        assert result[0] == Verdict.REJECTED
+        assert "It would have been APPROVED if tests passed." in result[1]
+
+    def test_both_markers_same_position_edge_case(self):
+        """Edge case: if somehow both start at same position, check behavior.
+
+        In practice this can't happen since they're different strings,
+        but we verify APPROVED is checked first per spec.
+        """
+        # This tests the logic: APPROVED found, REJECTED found, but APPROVED position is smaller
+        text = "APPROVED followed by REJECTED"
+        result = parse_verdict(text)
+        assert result == (Verdict.APPROVED, "")
+
+    # ----- Partial matches should not be detected -----
+
+    def test_approved_as_word_in_middle(self):
+        """APPROVED as substring in longer word is still detected (per current impl)."""
+        # NOTE: The spec doesn't mention word boundaries, so "APPROVED" in "UNAPPROVED"
+        # would still match. This documents current behavior.
+        result = parse_verdict("This is PREAPPROVED for the next phase")
+        assert result == (Verdict.APPROVED, "")
+
+    def test_rejected_as_word_in_middle(self):
+        """REJECTED as substring is still detected (per current impl)."""
+        result = parse_verdict("Previously rejected items were fixed")
+        # "rejected" is found in the middle
+        assert result[0] == Verdict.REJECTED
+
+    # ----- Complex real-world scenarios -----
+
+    def test_real_world_approved_review(self):
+        """Real-world style APPROVED review."""
+        review = """
+## Code Review Summary
+
+The implementation looks good and follows the coding standards.
+All tests pass and documentation is adequate.
+
+**Verdict: APPROVED**
+
+No further changes required.
+"""
+        result = parse_verdict(review)
+        assert result == (Verdict.APPROVED, "")
+
+    def test_real_world_rejected_review(self):
+        """Real-world style REJECTED review with detailed feedback."""
+        review = """
+## Code Review Summary
+
+The implementation has several issues that need to be addressed.
+
+**Verdict: REJECTED**
+
+### Issues Found:
+1. Missing error handling in `parse_data()` function
+2. No unit tests for edge cases
+3. Documentation needs to be updated
+
+### Recommendations:
+- Add try/except blocks for file operations
+- Add tests for empty input and malformed data
+"""
+        result = parse_verdict(review)
+        assert result[0] == Verdict.REJECTED
+        assert "Missing error handling" in result[1]
+        assert "No unit tests for edge cases" in result[1]
+        assert "Recommendations:" in result[1]
+
+    def test_multiline_approved(self):
+        """APPROVED marker on its own line."""
+        review = """
+Review complete.
+
+APPROVED
+
+Ship it!
+"""
+        result = parse_verdict(review)
+        assert result == (Verdict.APPROVED, "")
+
+
+# ---------------------------------------------------------------------------
+# write_cycle_file tests
+# ---------------------------------------------------------------------------
+
+
+class TestWriteCycleFile:
+    """Tests for write_cycle_file function (SC-007)."""
+
+    def test_creates_step_directory(self, tmp_path: Path):
+        """Creates .forge/{step_name}/ directory if it doesn't exist."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="local-code-review",
+            elapsed_seconds=15.5,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "implement", cycle_data)
+
+        step_dir = workspace / ".forge" / "implement"
+        assert step_dir.exists()
+        assert step_dir.is_dir()
+
+    def test_creates_nested_forge_and_step_directories(self, tmp_path: Path):
+        """Creates both .forge/ and step subdirectory when neither exists."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="code-review",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "my-step", cycle_data)
+
+        assert (workspace / ".forge").exists()
+        assert (workspace / ".forge" / "my-step").exists()
+
+    def test_writes_review_cycle_n_json(self, tmp_path: Path):
+        """File written to .forge/{step_name}/review_cycle_N.json."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=2,
+            max_cycles=5,
+            verdict="rejected",
+            feedback="Needs work",
+            skill="test-skill",
+            elapsed_seconds=20.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step-name", cycle_data)
+
+        output_file = workspace / ".forge" / "step-name" / "review_cycle_2.json"
+        assert output_file.exists()
+        assert output_file.is_file()
+
+    def test_json_contains_all_required_fields(self, tmp_path: Path):
+        """SC-007: JSON contains all ReviewCycleData fields."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Looks good!",
+            skill="local-code-review",
+            elapsed_seconds=12.5,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "implement", cycle_data)
+
+        output_file = workspace / ".forge" / "implement" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["cycle"] == 1
+        assert data["max_cycles"] == 3
+        assert data["verdict"] == "approved"
+        assert data["feedback"] == "Looks good!"
+        assert data["skill"] == "local-code-review"
+        assert data["elapsed_seconds"] == 12.5
+        assert data["timestamp"] == "2024-01-15T10:30:00Z"
+
+    def test_timestamp_iso_8601_utc_format(self, tmp_path: Path):
+        """Timestamp is ISO 8601 UTC format."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        # ISO 8601 UTC timestamp
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-06-20T14:30:45Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        # Verify timestamp format preserved
+        assert data["timestamp"] == "2024-06-20T14:30:45Z"
+
+    def test_verdict_lowercase_approved(self, tmp_path: Path):
+        """Verdict is lowercase string 'approved'."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["verdict"] == "approved"
+        assert data["verdict"].islower()
+
+    def test_verdict_lowercase_rejected(self, tmp_path: Path):
+        """Verdict is lowercase string 'rejected'."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="Fix the bugs",
+            skill="test",
+            elapsed_seconds=8.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["verdict"] == "rejected"
+        assert data["verdict"].islower()
+
+    def test_verdict_enum_converted_to_lowercase(self, tmp_path: Path):
+        """Verdict.APPROVED enum is converted to lowercase string."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict=Verdict.APPROVED,
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["verdict"] == "approved"
+
+    def test_verdict_enum_rejected_converted_to_lowercase(self, tmp_path: Path):
+        """Verdict.REJECTED enum is converted to lowercase string."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict=Verdict.REJECTED,
+            feedback="Needs work",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["verdict"] == "rejected"
+
+    def test_multiple_cycles_written_separately(self, tmp_path: Path):
+        """Multiple cycles are written to separate files."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle1 = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="First attempt needs work",
+            skill="code-review",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        cycle2 = ReviewCycleData(
+            cycle=2,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="Still has issues",
+            skill="code-review",
+            elapsed_seconds=12.0,
+            timestamp="2024-01-15T10:35:00Z",
+        )
+        cycle3 = ReviewCycleData(
+            cycle=3,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="code-review",
+            elapsed_seconds=8.0,
+            timestamp="2024-01-15T10:40:00Z",
+        )
+
+        write_cycle_file(workspace, "impl", cycle1)
+        write_cycle_file(workspace, "impl", cycle2)
+        write_cycle_file(workspace, "impl", cycle3)
+
+        # All three files should exist
+        assert (workspace / ".forge" / "impl" / "review_cycle_1.json").exists()
+        assert (workspace / ".forge" / "impl" / "review_cycle_2.json").exists()
+        assert (workspace / ".forge" / "impl" / "review_cycle_3.json").exists()
+
+        # Verify content of each
+        data1 = json.loads((workspace / ".forge" / "impl" / "review_cycle_1.json").read_text())
+        data2 = json.loads((workspace / ".forge" / "impl" / "review_cycle_2.json").read_text())
+        data3 = json.loads((workspace / ".forge" / "impl" / "review_cycle_3.json").read_text())
+
+        assert data1["verdict"] == "rejected"
+        assert data2["verdict"] == "rejected"
+        assert data3["verdict"] == "approved"
+
+    def test_json_format_is_pretty_printed(self, tmp_path: Path):
+        """JSON is formatted with indentation for readability."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        content = output_file.read_text(encoding="utf-8")
+
+        # Should have newlines (pretty printed)
+        assert "\n" in content
+        # Should have indentation
+        assert "  " in content
+
+    def test_overwrites_existing_file(self, tmp_path: Path):
+        """Overwrites existing cycle file if run again with same cycle number."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        # Write first version
+        cycle_data1 = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="First feedback",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        write_cycle_file(workspace, "step", cycle_data1)
+
+        # Overwrite with second version
+        cycle_data2 = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Updated feedback",
+            skill="test",
+            elapsed_seconds=8.0,
+            timestamp="2024-01-15T10:35:00Z",
+        )
+        write_cycle_file(workspace, "step", cycle_data2)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        # Should have the updated values
+        assert data["verdict"] == "approved"
+        assert data["feedback"] == "Updated feedback"
+        assert data["elapsed_seconds"] == 8.0
+
+    def test_empty_feedback(self, tmp_path: Path):
+        """Empty feedback string is preserved."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["feedback"] == ""
+
+    def test_multiline_feedback(self, tmp_path: Path):
+        """Multiline feedback is preserved correctly."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        multiline_feedback = """Fix the following issues:
+1. Missing error handling
+2. No unit tests
+3. Documentation incomplete"""
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback=multiline_feedback,
+            skill="code-review",
+            elapsed_seconds=15.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["feedback"] == multiline_feedback
+        assert "1. Missing error handling" in data["feedback"]
+        assert "2. No unit tests" in data["feedback"]
+
+    def test_special_characters_in_feedback(self, tmp_path: Path):
+        """Special characters in feedback are handled correctly."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback='Fix "quotes", <tags>, and unicode: émoji 🎉',
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["feedback"] == 'Fix "quotes", <tags>, and unicode: émoji 🎉'
+
+    def test_elapsed_seconds_float_precision(self, tmp_path: Path):
+        """Float precision for elapsed_seconds is preserved."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=123.456789,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "step", cycle_data)
+
+        output_file = workspace / ".forge" / "step" / "review_cycle_1.json"
+        data = json.loads(output_file.read_text(encoding="utf-8"))
+
+        assert data["elapsed_seconds"] == 123.456789
+
+    def test_step_name_with_hyphens(self, tmp_path: Path):
+        """Step name with hyphens is handled correctly."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "local-code-review", cycle_data)
+
+        assert (workspace / ".forge" / "local-code-review" / "review_cycle_1.json").exists()
+
+    def test_step_name_with_underscores(self, tmp_path: Path):
+        """Step name with underscores is handled correctly."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+
+        cycle_data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        write_cycle_file(workspace, "my_step_name", cycle_data)
+
+        assert (workspace / ".forge" / "my_step_name" / "review_cycle_1.json").exists()
diff --git a/tests/unit/observability/__init__.py b/tests/unit/observability/__init__.py
new file mode 100644
index 00000000..da6a2955
--- /dev/null
+++ b/tests/unit/observability/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for the observability module."""
diff --git a/tests/unit/observability/test_review_notifier.py b/tests/unit/observability/test_review_notifier.py
new file mode 100644
index 00000000..75cc75e6
--- /dev/null
+++ b/tests/unit/observability/test_review_notifier.py
@@ -0,0 +1,565 @@
+"""Unit tests for the ReviewJiraNotifier class."""
+
+import logging
+import time
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from forge.observability.review_notifier import (
+    DEFAULT_RATE_LIMIT_SECONDS,
+    MAX_FEEDBACK_LENGTH,
+    NotifyResult,
+    ReviewJiraNotifier,
+)
+from forge.observability.review_poller import ReviewCycleData
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def mock_jira_client():
+    """Create a mock JiraClient."""
+    client = MagicMock()
+    client.add_comment = AsyncMock()
+    return client
+
+
+@pytest.fixture
+def review_cycle_data():
+    """Create sample ReviewCycleData."""
+    return ReviewCycleData(
+        cycle=1,
+        max_cycles=3,
+        verdict="approved",
+        feedback="The code looks good. All tests pass.",
+        skill="local-code-review",
+        elapsed_seconds=45.5,
+        timestamp="2024-01-15T10:30:00Z",
+    )
+
+
+@pytest.fixture
+def notifier(mock_jira_client):
+    """Create a ReviewJiraNotifier with default settings."""
+    return ReviewJiraNotifier(
+        jira_client=mock_jira_client,
+        ticket_key="TEST-123",
+    )
+
+
+# ---------------------------------------------------------------------------
+# NotifyResult tests
+# ---------------------------------------------------------------------------
+
+
+class TestNotifyResult:
+    """Tests for NotifyResult dataclass."""
+
+    def test_posted_success(self):
+        """Test successful post result."""
+        result = NotifyResult(posted=True, rate_limited=False)
+        assert result.posted is True
+        assert result.rate_limited is False
+        assert result.error is None
+
+    def test_rate_limited(self):
+        """Test rate limited result."""
+        result = NotifyResult(posted=False, rate_limited=True)
+        assert result.posted is False
+        assert result.rate_limited is True
+        assert result.error is None
+
+    def test_error_result(self):
+        """Test error result."""
+        result = NotifyResult(posted=False, rate_limited=False, error="API Error")
+        assert result.posted is False
+        assert result.rate_limited is False
+        assert result.error == "API Error"
+
+
+# ---------------------------------------------------------------------------
+# ReviewJiraNotifier initialization tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewJiraNotifierInit:
+    """Tests for ReviewJiraNotifier initialization."""
+
+    def test_init_basic(self, mock_jira_client):
+        """Test basic initialization with defaults."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="PROJ-456",
+        )
+        assert notifier.jira_client is mock_jira_client
+        assert notifier.ticket_key == "PROJ-456"
+        assert notifier.rate_limit_seconds == DEFAULT_RATE_LIMIT_SECONDS
+        assert notifier._last_notify_time is None
+
+    def test_init_custom_rate_limit(self, mock_jira_client):
+        """Test initialization with custom rate limit."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="TEST-123",
+            rate_limit_seconds=60.0,
+        )
+        assert notifier.rate_limit_seconds == 60.0
+
+    def test_default_rate_limit_is_30_seconds(self):
+        """Test that the default rate limit is 30 seconds."""
+        assert DEFAULT_RATE_LIMIT_SECONDS == 30.0
+
+
+# ---------------------------------------------------------------------------
+# Rate limiting tests
+# ---------------------------------------------------------------------------
+
+
+class TestRateLimiting:
+    """Tests for rate limiting functionality."""
+
+    def test_not_rate_limited_initially(self, notifier):
+        """Test that first call is not rate limited."""
+        assert notifier._is_rate_limited() is False
+
+    def test_rate_limited_after_notify(self, notifier):
+        """Test that calls are rate limited after a notify."""
+        # Simulate a recent notify by setting the timestamp
+        notifier._last_notify_time = time.monotonic()
+        assert notifier._is_rate_limited() is True
+
+    def test_not_rate_limited_after_interval(self, mock_jira_client):
+        """Test that calls are not rate limited after interval passes."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="TEST-123",
+            rate_limit_seconds=0.1,  # Very short for testing
+        )
+
+        # Set a timestamp in the past
+        notifier._last_notify_time = time.monotonic() - 0.2
+        assert notifier._is_rate_limited() is False
+
+    def test_reset_rate_limit(self, notifier):
+        """Test that reset_rate_limit clears the timer."""
+        notifier._last_notify_time = time.monotonic()
+        assert notifier._is_rate_limited() is True
+
+        notifier.reset_rate_limit()
+        assert notifier._is_rate_limited() is False
+
+    @pytest.mark.asyncio
+    async def test_notify_returns_rate_limited_result(self, mock_jira_client, review_cycle_data):
+        """Test that notify returns rate_limited=True when rate limited."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="TEST-123",
+            rate_limit_seconds=60.0,
+        )
+
+        # First call should succeed
+        result1 = await notifier.notify(review_cycle_data)
+        assert result1.posted is True
+        assert result1.rate_limited is False
+
+        # Second immediate call should be rate limited
+        result2 = await notifier.notify(review_cycle_data)
+        assert result2.posted is False
+        assert result2.rate_limited is True
+
+        # Jira should only have been called once
+        assert mock_jira_client.add_comment.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# Comment formatting tests
+# ---------------------------------------------------------------------------
+
+
+class TestCommentFormatting:
+    """Tests for comment formatting."""
+
+    def test_format_approved_verdict(self, notifier):
+        """Test formatting for approved verdict."""
+        data = ReviewCycleData(
+            cycle=2,
+            max_cycles=5,
+            verdict="approved",
+            feedback="All good!",
+            skill="review-skill",
+            elapsed_seconds=30.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        assert "Review Cycle 2/5" in comment
+        assert "✅" in comment
+        assert "APPROVED" in comment
+        assert "review-skill" in comment
+        assert "30.0s" in comment
+        assert "All good!" in comment
+
+    def test_format_rejected_verdict(self, notifier):
+        """Test formatting for rejected verdict."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="Needs fixes",
+            skill="code-review",
+            elapsed_seconds=25.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        assert "Review Cycle 1/3" in comment
+        assert "❌" in comment
+        assert "REJECTED" in comment
+        assert "Needs fixes" in comment
+
+    def test_format_duration_minutes(self, notifier):
+        """Test that duration >= 60s is formatted in minutes."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=125.0,  # 2.08 minutes
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        assert "2.1m" in comment
+
+    def test_format_duration_seconds(self, notifier):
+        """Test that duration < 60s is formatted in seconds."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=45.3,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        assert "45.3s" in comment
+
+    def test_format_empty_feedback(self, notifier):
+        """Test formatting with empty feedback."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="test",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        # Should not include "Feedback:" section
+        assert "Feedback:" not in comment
+
+    def test_format_whitespace_feedback(self, notifier):
+        """Test formatting with whitespace-only feedback."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="   \n\t  ",
+            skill="test",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        # Should not include "Feedback:" section
+        assert "Feedback:" not in comment
+
+    def test_format_empty_skill(self, notifier):
+        """Test formatting with empty skill name."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Good",
+            skill="",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        # Should not include "Skill:" section
+        assert "Skill:" not in comment
+
+    def test_format_truncates_long_feedback(self, notifier):
+        """Test that long feedback is truncated."""
+        long_feedback = "A" * (MAX_FEEDBACK_LENGTH + 100)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback=long_feedback,
+            skill="test",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        # Should be truncated with ellipsis
+        assert "..." in comment
+        # Should not contain the full feedback
+        assert long_feedback not in comment
+
+    def test_format_truncates_at_word_boundary(self, notifier):
+        """Test that truncation prefers word boundaries."""
+        # Create feedback with words
+        words = ["word"] * 150  # More than MAX_FEEDBACK_LENGTH
+        long_feedback = " ".join(words)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback=long_feedback,
+            skill="test",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        # Should end with "..." after a complete word
+        assert "..." in comment
+        # The truncated content should not cut a word in half
+        feedback_part = comment.split("*Feedback:*")[1].strip()
+        assert feedback_part.endswith("word...")
+
+    def test_format_max_feedback_length_constant(self):
+        """Test that MAX_FEEDBACK_LENGTH is 500."""
+        assert MAX_FEEDBACK_LENGTH == 500
+
+    def test_format_case_insensitive_verdict(self, notifier):
+        """Test that verdict is uppercased in output."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="Approved",  # Mixed case
+            feedback="",
+            skill="test",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        comment = notifier._format_comment(data)
+
+        assert "APPROVED" in comment
+
+
+# ---------------------------------------------------------------------------
+# Notify tests
+# ---------------------------------------------------------------------------
+
+
+class TestNotify:
+    """Tests for notify method."""
+
+    @pytest.mark.asyncio
+    async def test_notify_posts_comment(self, mock_jira_client, notifier, review_cycle_data):
+        """Test that notify posts a comment to Jira."""
+        result = await notifier.notify(review_cycle_data)
+
+        assert result.posted is True
+        assert result.rate_limited is False
+        assert result.error is None
+        mock_jira_client.add_comment.assert_called_once()
+
+        # Check the call args
+        call_args = mock_jira_client.add_comment.call_args
+        assert call_args[0][0] == "TEST-123"  # ticket_key
+        assert "Review Cycle 1/3" in call_args[0][1]  # comment body
+
+    @pytest.mark.asyncio
+    async def test_notify_updates_last_notify_time(self, notifier, review_cycle_data):
+        """Test that notify updates the last notify time."""
+        assert notifier._last_notify_time is None
+
+        await notifier.notify(review_cycle_data)
+
+        assert notifier._last_notify_time is not None
+
+    @pytest.mark.asyncio
+    async def test_notify_handles_api_error(
+        self, mock_jira_client, notifier, review_cycle_data, caplog
+    ):
+        """Test that notify handles Jira API errors gracefully."""
+        mock_jira_client.add_comment.side_effect = Exception("API Error")
+
+        with caplog.at_level(logging.WARNING):
+            result = await notifier.notify(review_cycle_data)
+
+        assert result.posted is False
+        assert result.rate_limited is False
+        assert result.error == "API Error"
+
+        # Check warning was logged
+        assert "Failed to post review comment" in caplog.text
+        assert "TEST-123" in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_notify_logs_success(self, notifier, review_cycle_data, caplog):
+        """Test that notify logs success at INFO level."""
+        with caplog.at_level(logging.INFO):
+            await notifier.notify(review_cycle_data)
+
+        assert "Posted review cycle 1/3 comment to TEST-123" in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_notify_does_not_update_time_on_error(
+        self, mock_jira_client, notifier, review_cycle_data
+    ):
+        """Test that last_notify_time is not updated on error."""
+        mock_jira_client.add_comment.side_effect = Exception("API Error")
+
+        await notifier.notify(review_cycle_data)
+
+        # Time should not be updated on failure
+        assert notifier._last_notify_time is None
+
+    @pytest.mark.asyncio
+    async def test_notify_rate_limited_logs_debug(
+        self, mock_jira_client, review_cycle_data, caplog
+    ):
+        """Test that rate limited skips log at DEBUG level."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="TEST-123",
+            rate_limit_seconds=60.0,
+        )
+
+        # First call
+        await notifier.notify(review_cycle_data)
+
+        # Second call should be rate limited
+        with caplog.at_level(logging.DEBUG):
+            result = await notifier.notify(review_cycle_data)
+
+        assert result.rate_limited is True
+        assert "Rate limited" in caplog.text
+
+
+# ---------------------------------------------------------------------------
+# Integration-style tests
+# ---------------------------------------------------------------------------
+
+
+class TestIntegration:
+    """Integration-style tests combining multiple features."""
+
+    @pytest.mark.asyncio
+    async def test_multiple_cycles_with_rate_limiting(self, mock_jira_client):
+        """Test posting multiple cycles respects rate limiting."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="TEST-123",
+            rate_limit_seconds=0.1,  # Short for testing
+        )
+
+        cycle1 = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="First review",
+            skill="review",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        cycle2 = ReviewCycleData(
+            cycle=2,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Second review",
+            skill="review",
+            elapsed_seconds=15.0,
+            timestamp="2024-01-15T10:31:00Z",
+        )
+
+        # First cycle posts
+        result1 = await notifier.notify(cycle1)
+        assert result1.posted is True
+
+        # Immediate second cycle is rate limited
+        result2 = await notifier.notify(cycle2)
+        assert result2.rate_limited is True
+
+        # Wait for rate limit to expire
+        import asyncio
+
+        await asyncio.sleep(0.15)
+
+        # Third attempt should succeed
+        result3 = await notifier.notify(cycle2)
+        assert result3.posted is True
+
+        # Verify two comments were posted
+        assert mock_jira_client.add_comment.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_error_does_not_block_subsequent_posts(self, mock_jira_client):
+        """Test that an error on one call doesn't block future calls."""
+        notifier = ReviewJiraNotifier(
+            jira_client=mock_jira_client,
+            ticket_key="TEST-123",
+            rate_limit_seconds=0.0,  # No rate limiting
+        )
+
+        cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Test",
+            skill="review",
+            elapsed_seconds=10.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        # First call fails
+        mock_jira_client.add_comment.side_effect = Exception("API Error")
+        result1 = await notifier.notify(cycle)
+        assert result1.posted is False
+        assert result1.error == "API Error"
+
+        # Second call succeeds
+        mock_jira_client.add_comment.side_effect = None
+        result2 = await notifier.notify(cycle)
+        assert result2.posted is True
+        assert result2.error is None
+
+    @pytest.mark.asyncio
+    async def test_comment_contains_all_cycle_info(self, mock_jira_client, notifier):
+        """Test that posted comment contains all relevant info."""
+        data = ReviewCycleData(
+            cycle=2,
+            max_cycles=5,
+            verdict="rejected",
+            feedback="Please fix the typo on line 42",
+            skill="code-review",
+            elapsed_seconds=90.5,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        await notifier.notify(data)
+
+        call_args = mock_jira_client.add_comment.call_args
+        comment_body = call_args[0][1]
+
+        # Verify all key information is present
+        assert "2/5" in comment_body  # cycle/max_cycles
+        assert "REJECTED" in comment_body  # verdict
+        assert "❌" in comment_body  # verdict icon
+        assert "code-review" in comment_body  # skill
+        assert "1.5m" in comment_body  # duration (90.5s = 1.5m)
+        assert "typo on line 42" in comment_body  # feedback
diff --git a/tests/unit/observability/test_review_poller.py b/tests/unit/observability/test_review_poller.py
new file mode 100644
index 00000000..7d222875
--- /dev/null
+++ b/tests/unit/observability/test_review_poller.py
@@ -0,0 +1,726 @@
+"""Unit tests for the ReviewCyclePoller class."""
+
+import json
+from pathlib import Path
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from forge.config import Settings
+from forge.observability.review_poller import (
+    MAX_JSON_PARSE_RETRIES,
+    ReviewCycleData,
+    ReviewCyclePoller,
+)
+
+# ---------------------------------------------------------------------------
+# ReviewCycleData tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleData:
+    """Tests for ReviewCycleData dataclass."""
+
+    def test_all_fields(self):
+        """Test that all fields are correctly stored."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Looks good!",
+            skill="code-review",
+            elapsed_seconds=12.5,
+            timestamp="2024-01-15T10:30:00Z",
+            file_path="/path/to/file.json",
+        )
+        assert data.cycle == 1
+        assert data.max_cycles == 3
+        assert data.verdict == "approved"
+        assert data.feedback == "Looks good!"
+        assert data.skill == "code-review"
+        assert data.elapsed_seconds == 12.5
+        assert data.timestamp == "2024-01-15T10:30:00Z"
+        assert data.file_path == "/path/to/file.json"
+
+    def test_default_file_path(self):
+        """Test that file_path defaults to empty string."""
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="Needs work",
+            skill="review",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        assert data.file_path == ""
+
+    def test_from_dict_all_fields(self):
+        """Test creating from dict with all fields."""
+        input_dict = {
+            "cycle": 2,
+            "max_cycles": 5,
+            "verdict": "rejected",
+            "feedback": "Fix bugs",
+            "skill": "local-code-review",
+            "elapsed_seconds": 8.3,
+            "timestamp": "2024-01-16T14:20:00Z",
+        }
+        data = ReviewCycleData.from_dict(input_dict, file_path="/test/path.json")
+
+        assert data.cycle == 2
+        assert data.max_cycles == 5
+        assert data.verdict == "rejected"
+        assert data.feedback == "Fix bugs"
+        assert data.skill == "local-code-review"
+        assert data.elapsed_seconds == 8.3
+        assert data.timestamp == "2024-01-16T14:20:00Z"
+        assert data.file_path == "/test/path.json"
+
+    def test_from_dict_minimal_fields(self):
+        """Test creating from dict with only required fields."""
+        input_dict = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+        }
+        data = ReviewCycleData.from_dict(input_dict)
+
+        assert data.cycle == 1
+        assert data.max_cycles == 3
+        assert data.verdict == "approved"
+        assert data.feedback == ""
+        assert data.skill == ""
+        assert data.elapsed_seconds == 0.0
+        assert data.timestamp == ""
+        assert data.file_path == ""
+
+    def test_from_dict_missing_required_raises(self):
+        """Test that missing required fields raise KeyError."""
+        with pytest.raises(KeyError):
+            ReviewCycleData.from_dict({"cycle": 1, "max_cycles": 3})  # missing verdict
+
+        with pytest.raises(KeyError):
+            ReviewCycleData.from_dict({"cycle": 1, "verdict": "approved"})  # missing max_cycles
+
+
+# ---------------------------------------------------------------------------
+# ReviewCyclePoller initialization tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCyclePollerInit:
+    """Tests for ReviewCyclePoller initialization."""
+
+    def test_init_basic(self):
+        """Test basic initialization."""
+        poller = ReviewCyclePoller(
+            workspace_path=Path("/workspace"),
+            step_name="implement_task",
+        )
+        assert poller.workspace_path == Path("/workspace")
+        assert poller.step_name == "implement_task"
+        assert poller._settings is None
+        assert poller._processed_files == set()
+        assert poller._running is False
+
+    def test_init_with_settings(self, mock_settings):
+        """Test initialization with explicit settings."""
+        poller = ReviewCyclePoller(
+            workspace_path=Path("/workspace"),
+            step_name="generate_prd",
+            settings=mock_settings,
+        )
+        assert poller._settings is mock_settings
+
+    def test_review_cycle_dir(self):
+        """Test review_cycle_dir property."""
+        poller = ReviewCyclePoller(
+            workspace_path=Path("/workspace"),
+            step_name="implement_task",
+        )
+        assert poller.review_cycle_dir == Path("/workspace/.forge/implement_task")
+
+    def test_poll_interval_from_settings(self, mock_settings):
+        """Test poll_interval reads from settings."""
+        mock_settings.auto_review_poll_interval = 10.0
+        poller = ReviewCyclePoller(
+            workspace_path=Path("/workspace"),
+            step_name="test_step",
+            settings=mock_settings,
+        )
+        assert poller.poll_interval == 10.0
+
+
+# ---------------------------------------------------------------------------
+# ReviewCyclePoller._get_review_cycle_files tests
+# ---------------------------------------------------------------------------
+
+
+class TestGetReviewCycleFiles:
+    """Tests for _get_review_cycle_files method."""
+
+    def test_no_directory_returns_empty(self, tmp_path):
+        """Test that non-existent directory returns empty list."""
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="nonexistent_step",
+        )
+        assert poller._get_review_cycle_files() == []
+
+    def test_empty_directory_returns_empty(self, tmp_path):
+        """Test that empty directory returns empty list."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+        assert poller._get_review_cycle_files() == []
+
+    def test_finds_review_cycle_files(self, tmp_path):
+        """Test finding review_cycle_*.json files."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        # Create matching files
+        (step_dir / "review_cycle_1.json").write_text("{}")
+        (step_dir / "review_cycle_2.json").write_text("{}")
+        (step_dir / "review_cycle_10.json").write_text("{}")
+
+        # Create non-matching files
+        (step_dir / "other_file.json").write_text("{}")
+        (step_dir / "review_cycle.json").write_text("{}")  # Missing underscore
+        (step_dir / "review_cycle_1.txt").write_text("{}")  # Wrong extension
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+        files = poller._get_review_cycle_files()
+
+        assert len(files) == 3
+        assert all(f.name.startswith("review_cycle_") for f in files)
+        assert all(f.suffix == ".json" for f in files)
+
+    def test_files_are_sorted(self, tmp_path):
+        """Test that files are returned in sorted order."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        # Create files in non-sorted order
+        (step_dir / "review_cycle_3.json").write_text("{}")
+        (step_dir / "review_cycle_1.json").write_text("{}")
+        (step_dir / "review_cycle_2.json").write_text("{}")
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+        files = poller._get_review_cycle_files()
+
+        assert [f.name for f in files] == [
+            "review_cycle_1.json",
+            "review_cycle_2.json",
+            "review_cycle_3.json",
+        ]
+
+
+# ---------------------------------------------------------------------------
+# ReviewCyclePoller._parse_json_with_retry tests
+# ---------------------------------------------------------------------------
+
+
+class TestParseJsonWithRetry:
+    """Tests for _parse_json_with_retry method."""
+
+    @pytest.mark.asyncio
+    async def test_parse_valid_json(self, tmp_path):
+        """Test parsing a valid JSON file."""
+        json_file = tmp_path / "test.json"
+        json_file.write_text('{"cycle": 1, "max_cycles": 3, "verdict": "approved"}')
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+        )
+        result = await poller._parse_json_with_retry(json_file)
+
+        assert result == {"cycle": 1, "max_cycles": 3, "verdict": "approved"}
+
+    @pytest.mark.asyncio
+    async def test_parse_empty_file_returns_none(self, tmp_path):
+        """Test that empty file returns None after retries."""
+        json_file = tmp_path / "empty.json"
+        json_file.write_text("")
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+        )
+
+        with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
+            result = await poller._parse_json_with_retry(json_file)
+
+        assert result is None
+        # Should have retried MAX_JSON_PARSE_RETRIES - 1 times
+        assert mock_sleep.await_count == MAX_JSON_PARSE_RETRIES - 1
+
+    @pytest.mark.asyncio
+    async def test_parse_invalid_json_retries(self, tmp_path):
+        """Test that invalid JSON triggers retries."""
+        json_file = tmp_path / "invalid.json"
+        json_file.write_text('{"incomplete": ')
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+        )
+
+        with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
+            result = await poller._parse_json_with_retry(json_file)
+
+        assert result is None
+        assert mock_sleep.await_count == MAX_JSON_PARSE_RETRIES - 1
+
+    @pytest.mark.asyncio
+    async def test_parse_file_not_found_returns_none(self, tmp_path):
+        """Test that missing file returns None without retries."""
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+        )
+        result = await poller._parse_json_with_retry(tmp_path / "nonexistent.json")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_parse_succeeds_on_retry(self, tmp_path):
+        """Test that parsing can succeed after initial failures."""
+        json_file = tmp_path / "eventually_valid.json"
+        json_file.write_text('{"incomplete": ')
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+        )
+
+        call_count = 0
+
+        async def mock_read():
+            nonlocal call_count
+            call_count += 1
+            if call_count < 3:
+                return '{"incomplete": '
+            return '{"cycle": 1, "max_cycles": 3, "verdict": "approved"}'
+
+        with patch("aiofiles.open") as mock_open:
+            mock_file = AsyncMock()
+            mock_file.read = mock_read
+            mock_file.__aenter__ = AsyncMock(return_value=mock_file)
+            mock_file.__aexit__ = AsyncMock(return_value=None)
+            mock_open.return_value = mock_file
+
+            with patch("asyncio.sleep", new_callable=AsyncMock):
+                result = await poller._parse_json_with_retry(json_file)
+
+        assert result == {"cycle": 1, "max_cycles": 3, "verdict": "approved"}
+
+
+# ---------------------------------------------------------------------------
+# ReviewCyclePoller.poll_once tests
+# ---------------------------------------------------------------------------
+
+
+class TestPollOnce:
+    """Tests for poll_once method."""
+
+    @pytest.mark.asyncio
+    async def test_poll_once_no_files(self, tmp_path):
+        """Test poll_once with no files."""
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+        result = await poller.poll_once()
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_poll_once_new_files(self, tmp_path):
+        """Test poll_once detects new files."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+            "feedback": "LGTM",
+            "skill": "code-review",
+            "elapsed_seconds": 5.5,
+            "timestamp": "2024-01-15T10:00:00Z",
+        }
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(cycle_data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+        result = await poller.poll_once()
+
+        assert len(result) == 1
+        assert result[0].cycle == 1
+        assert result[0].verdict == "approved"
+        assert result[0].feedback == "LGTM"
+        assert result[0].skill == "code-review"
+
+    @pytest.mark.asyncio
+    async def test_poll_once_skips_already_processed(self, tmp_path):
+        """Test that poll_once skips already processed files."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+            "feedback": "",
+            "skill": "",
+            "elapsed_seconds": 0,
+            "timestamp": "",
+        }
+        file_path = step_dir / "review_cycle_1.json"
+        file_path.write_text(json.dumps(cycle_data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+
+        # First poll should detect the file
+        result1 = await poller.poll_once()
+        assert len(result1) == 1
+
+        # Second poll should skip it
+        result2 = await poller.poll_once()
+        assert len(result2) == 0
+
+    @pytest.mark.asyncio
+    async def test_poll_once_detects_new_after_first(self, tmp_path):
+        """Test that poll_once detects new files on subsequent polls."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        cycle1 = {"cycle": 1, "max_cycles": 3, "verdict": "rejected", "feedback": "Fix it"}
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(cycle1))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+
+        result1 = await poller.poll_once()
+        assert len(result1) == 1
+        assert result1[0].verdict == "rejected"
+
+        # Add new file
+        cycle2 = {"cycle": 2, "max_cycles": 3, "verdict": "approved", "feedback": ""}
+        (step_dir / "review_cycle_2.json").write_text(json.dumps(cycle2))
+
+        result2 = await poller.poll_once()
+        assert len(result2) == 1
+        assert result2[0].cycle == 2
+        assert result2[0].verdict == "approved"
+
+    @pytest.mark.asyncio
+    async def test_poll_once_skips_invalid_json(self, tmp_path):
+        """Test that invalid JSON files are skipped."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        # Valid file
+        valid_data = {"cycle": 1, "max_cycles": 3, "verdict": "approved"}
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(valid_data))
+
+        # Invalid JSON file
+        (step_dir / "review_cycle_2.json").write_text("not valid json")
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+
+        with patch("asyncio.sleep", new_callable=AsyncMock):
+            result = await poller.poll_once()
+
+        # Only valid file should be returned
+        assert len(result) == 1
+        assert result[0].cycle == 1
+
+    @pytest.mark.asyncio
+    async def test_poll_once_skips_missing_required_fields(self, tmp_path):
+        """Test that files with missing required fields are skipped."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        # Missing 'verdict'
+        invalid_data = {"cycle": 1, "max_cycles": 3}
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(invalid_data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+        result = await poller.poll_once()
+
+        assert len(result) == 0
+
+
+# ---------------------------------------------------------------------------
+# ReviewCyclePoller async iteration tests
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncIteration:
+    """Tests for async iteration interface."""
+
+    @pytest.mark.asyncio
+    async def test_poll_starts_iteration(self, tmp_path, mock_settings):
+        """Test that poll() returns an iterator."""
+        mock_settings.auto_review_poll_interval = 0.01
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+            settings=mock_settings,
+        )
+
+        iterator = poller.poll()
+        assert iterator is poller
+        assert poller._running is True
+
+    @pytest.mark.asyncio
+    async def test_stop_ends_iteration(self, tmp_path, mock_settings):
+        """Test that stop() ends iteration."""
+        mock_settings.auto_review_poll_interval = 0.01
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+            settings=mock_settings,
+        )
+
+        poller.poll()
+        poller.stop()
+        assert poller._running is False
+
+        with pytest.raises(StopAsyncIteration):
+            await poller.__anext__()
+
+    @pytest.mark.asyncio
+    async def test_iteration_polls_at_interval(self, tmp_path, mock_settings):
+        """Test that iteration waits for poll interval."""
+        mock_settings.auto_review_poll_interval = 0.5
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test",
+            settings=mock_settings,
+        )
+
+        poller.poll()
+
+        with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
+            result = await poller.__anext__()
+
+        mock_sleep.assert_awaited_once_with(0.5)
+        assert result == []
+
+        poller.stop()
+
+    @pytest.mark.asyncio
+    async def test_async_for_loop(self, tmp_path, mock_settings):
+        """Test using poller in async for loop."""
+        mock_settings.auto_review_poll_interval = 0.01
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        cycle_data = {"cycle": 1, "max_cycles": 3, "verdict": "approved"}
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(cycle_data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+            settings=mock_settings,
+        )
+
+        results = []
+        iteration_count = 0
+
+        async for new_cycles in poller.poll():
+            iteration_count += 1
+            results.extend(new_cycles)
+            if iteration_count >= 2:  # Stop after 2 iterations
+                poller.stop()
+
+        assert len(results) == 1
+        assert results[0].verdict == "approved"
+
+
+# ---------------------------------------------------------------------------
+# ReviewCyclePoller reset and processed_count tests
+# ---------------------------------------------------------------------------
+
+
+class TestResetAndProcessedCount:
+    """Tests for reset and processed_count methods."""
+
+    @pytest.mark.asyncio
+    async def test_processed_count(self, tmp_path):
+        """Test processed_count tracks processed files."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        for i in range(3):
+            data = {"cycle": i + 1, "max_cycles": 5, "verdict": "approved"}
+            (step_dir / f"review_cycle_{i + 1}.json").write_text(json.dumps(data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+
+        assert poller.processed_count == 0
+
+        await poller.poll_once()
+        assert poller.processed_count == 3
+
+    @pytest.mark.asyncio
+    async def test_reset_clears_processed(self, tmp_path):
+        """Test that reset clears processed files."""
+        step_dir = tmp_path / ".forge" / "test_step"
+        step_dir.mkdir(parents=True)
+
+        data = {"cycle": 1, "max_cycles": 3, "verdict": "approved"}
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="test_step",
+        )
+
+        # First poll
+        result1 = await poller.poll_once()
+        assert len(result1) == 1
+        assert poller.processed_count == 1
+
+        # Reset and poll again
+        poller.reset()
+        assert poller.processed_count == 0
+
+        result2 = await poller.poll_once()
+        assert len(result2) == 1
+
+
+# ---------------------------------------------------------------------------
+# Integration-style tests
+# ---------------------------------------------------------------------------
+
+
+class TestPollerIntegration:
+    """Integration-style tests for the poller."""
+
+    @pytest.mark.asyncio
+    async def test_detects_files_within_time_limit(self, tmp_path, mock_settings):
+        """Test that new files are detected within acceptable time."""
+        mock_settings.auto_review_poll_interval = 0.1  # Fast polling for test
+        step_dir = tmp_path / ".forge" / "implement_task"
+        step_dir.mkdir(parents=True)
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="implement_task",
+            settings=mock_settings,
+        )
+
+        # Start polling
+        poller.poll()
+
+        # Create file after starting poll
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "rejected",
+            "feedback": "Tests failing",
+            "skill": "local-code-review",
+            "elapsed_seconds": 15.2,
+            "timestamp": "2024-01-15T12:00:00Z",
+        }
+        (step_dir / "review_cycle_1.json").write_text(json.dumps(cycle_data))
+
+        # Poll and check detection
+        detected = []
+        for _ in range(5):  # Max 5 iterations
+            result = await poller.__anext__()
+            detected.extend(result)
+            if detected:
+                break
+
+        poller.stop()
+
+        assert len(detected) == 1
+        assert detected[0].cycle == 1
+        assert detected[0].verdict == "rejected"
+        assert detected[0].feedback == "Tests failing"
+
+    @pytest.mark.asyncio
+    async def test_multiple_cycle_files_in_order(self, tmp_path):
+        """Test processing multiple review cycles in order."""
+        step_dir = tmp_path / ".forge" / "implement_task"
+        step_dir.mkdir(parents=True)
+
+        # Create multiple cycle files
+        for i in range(1, 4):
+            verdict = "approved" if i == 3 else "rejected"
+            data = {
+                "cycle": i,
+                "max_cycles": 3,
+                "verdict": verdict,
+                "feedback": f"Feedback for cycle {i}" if verdict == "rejected" else "",
+                "skill": "local-code-review",
+                "elapsed_seconds": i * 5.0,
+                "timestamp": f"2024-01-15T10:{i:02d}:00Z",
+            }
+            (step_dir / f"review_cycle_{i}.json").write_text(json.dumps(data))
+
+        poller = ReviewCyclePoller(
+            workspace_path=tmp_path,
+            step_name="implement_task",
+        )
+
+        result = await poller.poll_once()
+
+        assert len(result) == 3
+        assert result[0].cycle == 1
+        assert result[0].verdict == "rejected"
+        assert result[1].cycle == 2
+        assert result[1].verdict == "rejected"
+        assert result[2].cycle == 3
+        assert result[2].verdict == "approved"
+
+
+# ---------------------------------------------------------------------------
+# Fixture for mock_settings
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def mock_settings():
+    """Create mock settings for tests."""
+    return Settings(
+        redis_url="redis://localhost:6379/0",
+        jira_base_url="https://test.atlassian.net",
+        jira_api_token="test-token",
+        jira_user_email="test@example.com",
+        github_token="test-github-token",
+        anthropic_api_key="test-anthropic-key",
+        auto_review_poll_interval=5.0,
+    )
diff --git a/tests/unit/observability/test_review_recorder.py b/tests/unit/observability/test_review_recorder.py
new file mode 100644
index 00000000..f536c0b7
--- /dev/null
+++ b/tests/unit/observability/test_review_recorder.py
@@ -0,0 +1,628 @@
+"""Unit tests for the ReviewCycleRecorder class."""
+
+import json
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from forge.observability.review_recorder import (
+    ReviewCycleData,
+    ReviewCycleRecorder,
+)
+
+
+# ---------------------------------------------------------------------------
+# ReviewCycleData tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleData:
+    """Tests for ReviewCycleData dataclass."""
+
+    def test_all_fields(self):
+        """Test that all fields are correctly stored."""
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Looks good!",
+            skill="code-review",
+            elapsed_seconds=12.5,
+            timestamp=ts,
+        )
+        assert data.cycle == 1
+        assert data.max_cycles == 3
+        assert data.verdict == "approved"
+        assert data.feedback == "Looks good!"
+        assert data.skill == "code-review"
+        assert data.elapsed_seconds == 12.5
+        assert data.timestamp == ts
+
+    def test_timestamp_is_datetime(self):
+        """Test that timestamp field is datetime type."""
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+        assert isinstance(data.timestamp, datetime)
+
+    def test_to_dict_returns_dict(self):
+        """Test to_dict returns a dictionary."""
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=2,
+            max_cycles=5,
+            verdict="rejected",
+            feedback="Needs fixes",
+            skill="local-code-review",
+            elapsed_seconds=8.3,
+            timestamp=ts,
+        )
+        result = data.to_dict()
+
+        assert isinstance(result, dict)
+        assert result["cycle"] == 2
+        assert result["max_cycles"] == 5
+        assert result["verdict"] == "rejected"
+        assert result["feedback"] == "Needs fixes"
+        assert result["skill"] == "local-code-review"
+        assert result["elapsed_seconds"] == 8.3
+
+    def test_to_dict_timestamp_is_iso_string(self):
+        """Test that to_dict converts timestamp to ISO 8601 string."""
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="",
+            elapsed_seconds=0.0,
+            timestamp=ts,
+        )
+        result = data.to_dict()
+
+        assert isinstance(result["timestamp"], str)
+        assert "2024-01-15" in result["timestamp"]
+        assert "10:30:00" in result["timestamp"]
+
+    def test_to_json_returns_string(self):
+        """Test that to_json returns a JSON string."""
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Good work",
+            skill="review",
+            elapsed_seconds=5.0,
+            timestamp=ts,
+        )
+        result = data.to_json()
+
+        assert isinstance(result, str)
+        # Should be valid JSON
+        parsed = json.loads(result)
+        assert parsed["cycle"] == 1
+        assert parsed["verdict"] == "approved"
+
+    def test_to_json_pretty_printed(self):
+        """Test that to_json output is pretty-printed with indentation."""
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="",
+            elapsed_seconds=0.0,
+            timestamp=ts,
+        )
+        result = data.to_json()
+
+        # Should contain newlines (pretty printed)
+        assert "\n" in result
+        # Should contain indentation
+        assert "  " in result
+
+
+# ---------------------------------------------------------------------------
+# ReviewCycleRecorder initialization tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleRecorderInit:
+    """Tests for ReviewCycleRecorder initialization."""
+
+    def test_init_with_step_name_only(self):
+        """Test initialization with just step_name."""
+        recorder = ReviewCycleRecorder(step_name="implement_task")
+
+        assert recorder.step_name == "implement_task"
+        assert recorder.mode is None
+        assert recorder.recording_dir is None
+
+    def test_init_with_log_mode(self):
+        """Test initialization with log mode."""
+        recorder = ReviewCycleRecorder(
+            step_name="generate_prd",
+            mode="log",
+        )
+
+        assert recorder.step_name == "generate_prd"
+        assert recorder.mode == "log"
+
+    def test_init_with_copy_mode_and_recording_dir(self):
+        """Test initialization with copy mode and recording directory."""
+        recorder = ReviewCycleRecorder(
+            step_name="implement_task",
+            mode="copy",
+            recording_dir=Path("/recordings"),
+        )
+
+        assert recorder.step_name == "implement_task"
+        assert recorder.mode == "copy"
+        assert recorder.recording_dir == Path("/recordings")
+
+    def test_init_copy_mode_requires_recording_dir(self):
+        """Test that copy mode raises ValueError without recording_dir."""
+        with pytest.raises(ValueError, match="recording_dir is required"):
+            ReviewCycleRecorder(
+                step_name="test_step",
+                mode="copy",
+            )
+
+    def test_init_log_mode_does_not_require_recording_dir(self):
+        """Test that log mode works without recording_dir."""
+        recorder = ReviewCycleRecorder(
+            step_name="test_step",
+            mode="log",
+        )
+        assert recorder.recording_dir is None
+
+    def test_step_dir_property(self):
+        """Test step_dir property returns correct path."""
+        recorder = ReviewCycleRecorder(
+            step_name="implement_task",
+            mode="copy",
+            recording_dir=Path("/recordings"),
+        )
+
+        assert recorder.step_dir == Path("/recordings/implement_task")
+
+    def test_step_dir_none_when_no_recording_dir(self):
+        """Test step_dir returns None when no recording_dir."""
+        recorder = ReviewCycleRecorder(
+            step_name="test_step",
+            mode="log",
+        )
+        assert recorder.step_dir is None
+
+
+# ---------------------------------------------------------------------------
+# ReviewCycleRecorder.record tests (log mode)
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleRecorderRecord:
+    """Tests for ReviewCycleRecorder.record method."""
+
+    def test_record_disabled_mode_does_nothing(self, caplog):
+        """Test that record does nothing when mode is None."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode=None)
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Good",
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        # No log messages should be recorded
+        assert len(caplog.records) == 0
+
+    def test_record_log_mode_logs_at_info_level(self, caplog):
+        """Test that log mode records at INFO level."""
+        recorder = ReviewCycleRecorder(step_name="implement_task", mode="log")
+        ts = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        data = ReviewCycleData(
+            cycle=2,
+            max_cycles=5,
+            verdict="rejected",
+            feedback="Fix the tests",
+            skill="code-review",
+            elapsed_seconds=12.5,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        assert len(caplog.records) == 1
+        assert caplog.records[0].levelno == logging.INFO
+
+    def test_record_log_mode_includes_cycle_info(self, caplog):
+        """Test that log output includes cycle number and max cycles."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=3,
+            max_cycles=5,
+            verdict="approved",
+            feedback="",
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        assert "3/5" in log_message or ("3" in log_message and "5" in log_message)
+
+    def test_record_log_mode_includes_step_name(self, caplog):
+        """Test that log output includes step name."""
+        recorder = ReviewCycleRecorder(step_name="implement_task", mode="log")
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        assert "implement_task" in log_message
+
+    def test_record_log_mode_includes_verdict(self, caplog):
+        """Test that log output includes verdict."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="",
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        assert "rejected" in log_message
+
+    def test_record_log_mode_includes_skill(self, caplog):
+        """Test that log output includes skill name."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="local-code-review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        assert "local-code-review" in log_message
+
+    def test_record_log_mode_includes_elapsed_seconds(self, caplog):
+        """Test that log output includes elapsed seconds."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="review",
+            elapsed_seconds=15.75,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        assert "15.75" in log_message
+
+    def test_record_log_mode_includes_feedback(self, caplog):
+        """Test that log output includes feedback."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+        ts = datetime.now(timezone.utc)
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="Fix the bug in line 42",
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        assert "Fix the bug in line 42" in log_message
+
+    def test_record_log_mode_truncates_long_feedback(self, caplog):
+        """Test that log output truncates feedback longer than 100 chars."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+        ts = datetime.now(timezone.utc)
+        long_feedback = "x" * 150
+        data = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback=long_feedback,
+            skill="review",
+            elapsed_seconds=1.0,
+            timestamp=ts,
+        )
+
+        with caplog.at_level(logging.INFO):
+            recorder.record(data)
+
+        log_message = caplog.records[0].message
+        # Should truncate and add ellipsis
+        assert "..." in log_message
+        # Should not contain the full feedback
+        assert long_feedback not in log_message
+
+
+# ---------------------------------------------------------------------------
+# ReviewCycleRecorder.record_file tests (copy mode)
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleRecorderRecordFile:
+    """Tests for ReviewCycleRecorder.record_file method."""
+
+    def test_record_file_disabled_mode_returns_none(self, tmp_path):
+        """Test that record_file returns None when mode is None."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode=None)
+
+        # Create a source file
+        source_file = tmp_path / "review_cycle_1.json"
+        source_file.write_text('{"cycle": 1}')
+
+        result = recorder.record_file(source_file)
+        assert result is None
+
+    def test_record_file_log_mode_returns_none(self, tmp_path):
+        """Test that record_file returns None when mode is log."""
+        recorder = ReviewCycleRecorder(step_name="test_step", mode="log")
+
+        # Create a source file
+        source_file = tmp_path / "review_cycle_1.json"
+        source_file.write_text('{"cycle": 1}')
+
+        result = recorder.record_file(source_file)
+        assert result is None
+
+    def test_record_file_copy_mode_creates_step_directory(self, tmp_path):
+        """Test that copy mode creates the step-specific subdirectory."""
+        recording_dir = tmp_path / "recordings"
+        recorder = ReviewCycleRecorder(
+            step_name="implement_task",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        # Create a source file
+        source_dir = tmp_path / "source"
+        source_dir.mkdir()
+        source_file = source_dir / "review_cycle_1.json"
+        source_file.write_text('{"cycle": 1}')
+
+        recorder.record_file(source_file)
+
+        step_dir = recording_dir / "implement_task"
+        assert step_dir.exists()
+        assert step_dir.is_dir()
+
+    def test_record_file_copy_mode_copies_file(self, tmp_path):
+        """Test that copy mode copies the file to recording directory."""
+        recording_dir = tmp_path / "recordings"
+        recorder = ReviewCycleRecorder(
+            step_name="implement_task",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        # Create a source file with content
+        source_dir = tmp_path / "source"
+        source_dir.mkdir()
+        source_file = source_dir / "review_cycle_1.json"
+        source_file.write_text('{"cycle": 1, "verdict": "approved"}')
+
+        result = recorder.record_file(source_file)
+
+        expected_dest = recording_dir / "implement_task" / "review_cycle_1.json"
+        assert result == expected_dest
+        assert expected_dest.exists()
+        assert expected_dest.read_text() == '{"cycle": 1, "verdict": "approved"}'
+
+    def test_record_file_copy_mode_preserves_filename(self, tmp_path):
+        """Test that copy mode preserves the original filename."""
+        recording_dir = tmp_path / "recordings"
+        recorder = ReviewCycleRecorder(
+            step_name="test_step",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        source_dir = tmp_path / "source"
+        source_dir.mkdir()
+        source_file = source_dir / "review_cycle_5.json"
+        source_file.write_text('{"cycle": 5}')
+
+        result = recorder.record_file(source_file)
+
+        assert result is not None
+        assert result.name == "review_cycle_5.json"
+
+    def test_record_file_copy_mode_handles_multiple_files(self, tmp_path):
+        """Test that multiple files can be copied."""
+        recording_dir = tmp_path / "recordings"
+        recorder = ReviewCycleRecorder(
+            step_name="test_step",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        source_dir = tmp_path / "source"
+        source_dir.mkdir()
+
+        # Copy multiple files
+        for i in range(1, 4):
+            source_file = source_dir / f"review_cycle_{i}.json"
+            source_file.write_text(f'{{"cycle": {i}}}')
+            recorder.record_file(source_file)
+
+        step_dir = recording_dir / "test_step"
+        assert (step_dir / "review_cycle_1.json").exists()
+        assert (step_dir / "review_cycle_2.json").exists()
+        assert (step_dir / "review_cycle_3.json").exists()
+
+    def test_record_file_nonexistent_source_returns_none(self, tmp_path, caplog):
+        """Test that nonexistent source file returns None and logs warning."""
+        recording_dir = tmp_path / "recordings"
+        recorder = ReviewCycleRecorder(
+            step_name="test_step",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        nonexistent_file = tmp_path / "missing.json"
+
+        with caplog.at_level(logging.WARNING):
+            result = recorder.record_file(nonexistent_file)
+
+        assert result is None
+        assert any("does not exist" in record.message for record in caplog.records)
+
+    def test_record_file_returns_path_on_success(self, tmp_path):
+        """Test that successful copy returns the destination path."""
+        recording_dir = tmp_path / "recordings"
+        recorder = ReviewCycleRecorder(
+            step_name="my_step",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        source_dir = tmp_path / "source"
+        source_dir.mkdir()
+        source_file = source_dir / "review_cycle_1.json"
+        source_file.write_text('{"cycle": 1}')
+
+        result = recorder.record_file(source_file)
+
+        assert result is not None
+        assert result == recording_dir / "my_step" / "review_cycle_1.json"
+
+    def test_record_file_directory_creation_error_returns_none(self, tmp_path, caplog):
+        """Test that directory creation error returns None."""
+        # Create a file where the directory should be
+        recording_dir = tmp_path / "recordings"
+        recording_dir.mkdir()
+        blocking_file = recording_dir / "test_step"
+        blocking_file.write_text("blocking file")
+
+        recorder = ReviewCycleRecorder(
+            step_name="test_step",
+            mode="copy",
+            recording_dir=recording_dir,
+        )
+
+        source_dir = tmp_path / "source"
+        source_dir.mkdir()
+        source_file = source_dir / "review_cycle_1.json"
+        source_file.write_text('{"cycle": 1}')
+
+        with caplog.at_level(logging.ERROR):
+            result = recorder.record_file(source_file)
+
+        assert result is None
+        assert any("Failed to create directory" in record.message for record in caplog.records)
+
+
+# ---------------------------------------------------------------------------
+# Settings configuration tests
+# ---------------------------------------------------------------------------
+
+
+class TestSettingsConfiguration:
+    """Tests for AUTO_REVIEW_RECORD_POLLED_FILES configuration."""
+
+    def test_setting_default_is_none(self):
+        """Test that the default value is None."""
+        from forge.config import Settings
+
+        with patch.dict("os.environ", {}, clear=True):
+            # Create settings with minimal required fields
+            settings = Settings(
+                jira_base_url="https://example.atlassian.net",
+                jira_api_token="token",
+                jira_user_email="user@example.com",
+                github_token="ghtoken",
+            )
+            assert settings.auto_review_record_polled_files is None
+
+    def test_setting_accepts_log_value(self):
+        """Test that 'log' value is accepted."""
+        from forge.config import Settings
+
+        with patch.dict("os.environ", {"AUTO_REVIEW_RECORD_POLLED_FILES": "log"}, clear=False):
+            settings = Settings(
+                jira_base_url="https://example.atlassian.net",
+                jira_api_token="token",
+                jira_user_email="user@example.com",
+                github_token="ghtoken",
+            )
+            assert settings.auto_review_record_polled_files == "log"
+
+    def test_setting_accepts_copy_value(self):
+        """Test that 'copy' value is accepted."""
+        from forge.config import Settings
+
+        with patch.dict("os.environ", {"AUTO_REVIEW_RECORD_POLLED_FILES": "copy"}, clear=False):
+            settings = Settings(
+                jira_base_url="https://example.atlassian.net",
+                jira_api_token="token",
+                jira_user_email="user@example.com",
+                github_token="ghtoken",
+            )
+            assert settings.auto_review_record_polled_files == "copy"
diff --git a/tests/unit/sandbox/test_runner_review_polling.py b/tests/unit/sandbox/test_runner_review_polling.py
new file mode 100644
index 00000000..ed5f4ec3
--- /dev/null
+++ b/tests/unit/sandbox/test_runner_review_polling.py
@@ -0,0 +1,1203 @@
+"""Unit tests for review polling integration in ContainerRunner."""
+
+import asyncio
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from forge.observability import ReviewCycleData
+from forge.sandbox.runner import (
+    ContainerResult,
+    ContainerRunner,
+    _poller_to_recorder_cycle,
+)
+
+# ---------------------------------------------------------------------------
+# Helper to create a runner instance without __init__ side effects
+# ---------------------------------------------------------------------------
+
+
+def _runner_without_init() -> ContainerRunner:
+    """Create a ContainerRunner instance without running __init__."""
+    return object.__new__(ContainerRunner)
+
+
+# ---------------------------------------------------------------------------
+# ContainerResult tests
+# ---------------------------------------------------------------------------
+
+
+class TestContainerResultReviewCycles:
+    """Tests for ContainerResult review_cycles field."""
+
+    def test_default_review_cycles_empty(self):
+        """Test that review_cycles defaults to empty list."""
+        result = ContainerResult(
+            success=True,
+            exit_code=0,
+            stdout="",
+            stderr="",
+        )
+        assert result.review_cycles == []
+
+    def test_review_cycles_with_data(self):
+        """Test that review_cycles can store ReviewCycleData."""
+        cycles = [
+            ReviewCycleData(
+                cycle=1,
+                max_cycles=3,
+                verdict="rejected",
+                feedback="Fix the bug",
+                skill="local-code-review",
+                elapsed_seconds=5.5,
+                timestamp="2024-01-15T10:30:00Z",
+            ),
+            ReviewCycleData(
+                cycle=2,
+                max_cycles=3,
+                verdict="approved",
+                feedback="",
+                skill="local-code-review",
+                elapsed_seconds=3.2,
+                timestamp="2024-01-15T10:35:00Z",
+            ),
+        ]
+        result = ContainerResult(
+            success=True,
+            exit_code=0,
+            stdout="output",
+            stderr="",
+            review_cycles=cycles,
+        )
+        assert len(result.review_cycles) == 2
+        assert result.review_cycles[0].verdict == "rejected"
+        assert result.review_cycles[1].verdict == "approved"
+
+
+# ---------------------------------------------------------------------------
+# _poller_to_recorder_cycle conversion tests
+# ---------------------------------------------------------------------------
+
+
+class TestPollerToRecorderCycle:
+    """Tests for the _poller_to_recorder_cycle helper function."""
+
+    def test_converts_all_fields(self):
+        """Test that all fields are converted correctly."""
+        poller_cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="Looks good",
+            skill="implement-task",
+            elapsed_seconds=12.5,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+        recorder_cycle = _poller_to_recorder_cycle(poller_cycle, "implement_task")
+
+        assert recorder_cycle.cycle == 1
+        assert recorder_cycle.max_cycles == 3
+        assert recorder_cycle.verdict == "approved"
+        assert recorder_cycle.feedback == "Looks good"
+        assert recorder_cycle.skill == "implement-task"
+        assert recorder_cycle.elapsed_seconds == 12.5
+        # Timestamp is converted to datetime
+        assert recorder_cycle.timestamp.year == 2024
+        assert recorder_cycle.timestamp.month == 1
+        assert recorder_cycle.timestamp.day == 15
+
+    def test_handles_invalid_timestamp(self):
+        """Test that invalid timestamp falls back to now."""
+        poller_cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="",
+            skill="skill",
+            elapsed_seconds=1.0,
+            timestamp="not-a-valid-timestamp",
+        )
+        recorder_cycle = _poller_to_recorder_cycle(poller_cycle, "step")
+
+        # Should have a valid datetime (fallback to now)
+        assert recorder_cycle.timestamp is not None
+
+    def test_handles_empty_timestamp(self):
+        """Test that empty timestamp falls back to now."""
+        poller_cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="skill",
+            elapsed_seconds=1.0,
+            timestamp="",
+        )
+        recorder_cycle = _poller_to_recorder_cycle(poller_cycle, "step")
+
+        # Should have a valid datetime (fallback to now)
+        assert recorder_cycle.timestamp is not None
+
+
+# ---------------------------------------------------------------------------
+# ContainerRunner.run() with step_name tests
+# ---------------------------------------------------------------------------
+
+
+class TestRunWithStepName:
+    """Tests for ContainerRunner.run() with step_name parameter."""
+
+    @pytest.mark.asyncio
+    async def test_run_accepts_step_name_parameter(self, tmp_path: Path):
+        """Test that run() accepts the step_name parameter."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = None
+
+        # Create a mock process that completes immediately
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+        ):
+            result = await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        assert result.success is True
+        assert result.review_cycles == []
+
+    @pytest.mark.asyncio
+    async def test_run_without_step_name_disables_polling(self, tmp_path: Path):
+        """Test that run() without step_name disables polling."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch("forge.sandbox.runner.ReviewCyclePoller") as mock_poller_class,
+        ):
+            result = await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                # No step_name provided
+            )
+
+        # Poller should not be created when step_name is None
+        mock_poller_class.assert_not_called()
+        assert result.review_cycles == []
+
+
+# ---------------------------------------------------------------------------
+# Background polling task tests
+# ---------------------------------------------------------------------------
+
+
+class TestBackgroundPollingTask:
+    """Tests for the background polling task during container execution."""
+
+    @pytest.mark.asyncio
+    async def test_polling_task_started_with_step_name(self, tmp_path: Path):
+        """Test that polling task is started when step_name is provided."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = None
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        # Track if polling was started
+        poller_created = False
+
+        def create_poller(*_args, **_kwargs):
+            nonlocal poller_created
+            poller_created = True
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = "implement_task"
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+        ):
+            await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        assert poller_created is True
+
+    @pytest.mark.asyncio
+    async def test_polling_task_cancelled_on_container_exit(self, tmp_path: Path):
+        """Test that polling task is cancelled when container exits."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = None
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        stop_called = False
+
+        def create_poller(*_args, **_kwargs):
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[])
+
+            def stop():
+                nonlocal stop_called
+                stop_called = True
+
+            mock_poller.stop = stop
+            mock_poller.step_name = "implement_task"
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+        ):
+            await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        assert stop_called is True
+
+
+# ---------------------------------------------------------------------------
+# Review cycle collection tests
+# ---------------------------------------------------------------------------
+
+
+class TestReviewCycleCollection:
+    """Tests for collecting review cycles into ContainerResult."""
+
+    @pytest.mark.asyncio
+    async def test_detected_cycles_added_to_result(self, tmp_path: Path):
+        """Test that detected review cycles are added to the result."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 0.1
+        runner.settings.auto_review_record_polled_files = "log"
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        # Simulate a review cycle file being detected during final poll
+        detected_cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="LGTM",
+            skill="local-code-review",
+            elapsed_seconds=5.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        def create_poller(*_args, **_kwargs):
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[detected_cycle])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = "implement_task"
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+            patch("forge.sandbox.runner.record_review_cycle"),
+            patch("forge.sandbox.runner.record_review_verdict"),
+            patch("forge.sandbox.runner.observe_review_duration"),
+        ):
+            result = await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        assert len(result.review_cycles) == 1
+        assert result.review_cycles[0].verdict == "approved"
+        assert result.review_cycles[0].skill == "local-code-review"
+
+    @pytest.mark.asyncio
+    async def test_cycles_collected_even_on_timeout(self, tmp_path: Path):
+        """Test that cycles are collected even when container times out."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 1
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 0.1
+        runner.settings.auto_review_record_polled_files = None
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(side_effect=TimeoutError())
+        mock_process.returncode = None
+
+        # Pre-collected cycle (simulating one detected before timeout)
+        timeout_cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="rejected",
+            feedback="Partial review",
+            skill="local-code-review",
+            elapsed_seconds=2.0,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        cycles_collected = []
+
+        def create_poller(*_args, **_kwargs):
+            mock_poller = MagicMock()
+
+            async def poll_iter():
+                # Yield one cycle before timeout
+                cycles_collected.append(timeout_cycle)
+                yield [timeout_cycle]
+                # Then wait forever (simulating ongoing polling)
+                await asyncio.sleep(1000)
+
+            mock_poller.poll = MagicMock(return_value=poll_iter())
+            mock_poller.poll_once = AsyncMock(return_value=[])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = "implement_task"
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch.object(runner, "_stop_timed_out_container", new=AsyncMock()),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+            patch("forge.sandbox.runner.record_review_cycle"),
+            patch("forge.sandbox.runner.record_review_verdict"),
+            patch("forge.sandbox.runner.observe_review_duration"),
+        ):
+            result = await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        # Result should indicate failure
+        assert result.success is False
+        assert "Timeout" in (result.error_message or "")
+        # But cycles collected should still be present
+        assert len(result.review_cycles) >= 0  # May have collected the cycle
+
+
+# ---------------------------------------------------------------------------
+# Metrics recording tests
+# ---------------------------------------------------------------------------
+
+
+class TestMetricsRecording:
+    """Tests for Prometheus metrics recording during polling."""
+
+    @pytest.mark.asyncio
+    async def test_metrics_recorded_for_detected_cycles(self, tmp_path: Path):
+        """Test that metrics are recorded for each detected cycle."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 0.1
+        runner.settings.auto_review_record_polled_files = None
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        detected_cycle = ReviewCycleData(
+            cycle=1,
+            max_cycles=3,
+            verdict="approved",
+            feedback="",
+            skill="implement-task",
+            elapsed_seconds=10.5,
+            timestamp="2024-01-15T10:30:00Z",
+        )
+
+        def create_poller(*_args, **_kwargs):
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[detected_cycle])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = "implement_task"
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+            patch("forge.sandbox.runner.record_review_cycle") as mock_cycle,
+            patch("forge.sandbox.runner.record_review_verdict") as mock_verdict,
+            patch("forge.sandbox.runner.observe_review_duration") as mock_duration,
+        ):
+            await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        # Verify metrics were recorded
+        mock_cycle.assert_called_with("implement-task", "implement_task")
+        mock_verdict.assert_called_with("implement-task", "implement_task", "approved")
+        mock_duration.assert_called_with("implement-task", "implement_task", 10.5)
+
+
+# ---------------------------------------------------------------------------
+# Step name path organization tests
+# ---------------------------------------------------------------------------
+
+
+class TestStepNamePathOrganization:
+    """Tests for step-name based path organization."""
+
+    @pytest.mark.asyncio
+    async def test_step_name_passed_to_poller(self, tmp_path: Path):
+        """Test that step_name is passed to the poller correctly."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = None
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        captured_step_name = None
+
+        def create_poller(workspace_path=None, step_name=None, settings=None):
+            nonlocal captured_step_name
+            captured_step_name = step_name
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = step_name
+            # Suppress unused warnings
+            _ = workspace_path, settings
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+        ):
+            await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="local_review",
+            )
+
+        assert captured_step_name == "local_review"
+
+    @pytest.mark.asyncio
+    async def test_step_name_passed_to_recorder(self, tmp_path: Path):
+        """Test that step_name is passed to the recorder correctly."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = "log"
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        captured_recorder_step_name = None
+
+        def create_poller(**kwargs):
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = kwargs.get("step_name", "")
+            return mock_poller
+
+        def create_recorder(step_name=None, mode=None, recording_dir=None):
+            nonlocal captured_recorder_step_name
+            captured_recorder_step_name = step_name
+            mock_recorder = MagicMock()
+            mock_recorder.record = MagicMock()
+            mock_recorder.record_file = MagicMock()
+            # Suppress unused warnings
+            _ = mode, recording_dir
+            return mock_recorder
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCycleRecorder",
+                side_effect=create_recorder,
+            ),
+        ):
+            await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="fix_ci",
+            )
+
+        assert captured_recorder_step_name == "fix_ci"
+
+
+# ---------------------------------------------------------------------------
+# Helper classes for async iteration mocking
+# ---------------------------------------------------------------------------
+
+
+class AsyncIteratorMock:
+    """Mock for async iterators that yields a list of items once and then stops."""
+
+    def __init__(self, items: list):
+        self.items = items
+        self.index = 0
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self):
+        if self.index < len(self.items):
+            item = self.items[self.index]
+            self.index += 1
+            return item
+        raise StopAsyncIteration
+
+
+# ---------------------------------------------------------------------------
+# _sweep_review_cycles() tests
+# ---------------------------------------------------------------------------
+
+
+class TestSweepReviewCycles:
+    """Tests for the _sweep_review_cycles() post-execution sweep."""
+
+    def test_sweep_finds_missed_file(self, tmp_path: Path, caplog):
+        """Test that sweep catches files missed during async polling."""
+        import json
+        import logging
+
+        runner = _runner_without_init()
+
+        # Create a review cycle file that was NOT processed by the poller
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+            "feedback": "Looks good",
+            "skill": "local-review",
+            "elapsed_seconds": 5.5,
+            "timestamp": "2024-01-15T10:30:00Z",
+        }
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text(json.dumps(cycle_data))
+
+        # Empty processed files set - nothing was caught during polling
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        # Mock recorder
+        mock_recorder = MagicMock()
+
+        with caplog.at_level(logging.WARNING):
+            runner._sweep_review_cycles(
+                workspace_path=tmp_path,
+                step_name=step_name,
+                processed_files=processed_files,
+                collected_cycles=collected_cycles,
+                recorder=mock_recorder,
+            )
+
+        # Should have found the missed file
+        assert len(collected_cycles) == 1
+        assert collected_cycles[0].cycle == 1
+        assert collected_cycles[0].verdict == "approved"
+        assert collected_cycles[0].skill == "local-review"
+
+        # Should log a warning about missed files
+        assert "Sweep caught 1 review cycle file(s) missed" in caplog.text
+        assert step_name in caplog.text
+
+    def test_sweep_deduplicates_against_processed_files(self, tmp_path: Path):
+        """Test that sweep skips files already processed by async poller."""
+        import json
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        # Create two cycle files
+        for i in [1, 2]:
+            cycle_data = {
+                "cycle": i,
+                "max_cycles": 3,
+                "verdict": "approved",
+                "feedback": f"Review {i}",
+                "skill": "local-review",
+                "elapsed_seconds": float(i),
+                "timestamp": f"2024-01-15T10:3{i}:00Z",
+            }
+            cycle_file = cycle_dir / f"review_cycle_{i}.json"
+            cycle_file.write_text(json.dumps(cycle_data))
+
+        # Simulate that cycle_1 was already processed
+        cycle_1_path = str(cycle_dir / "review_cycle_1.json")
+        processed_files: set[str] = {cycle_1_path}
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        runner._sweep_review_cycles(
+            workspace_path=tmp_path,
+            step_name=step_name,
+            processed_files=processed_files,
+            collected_cycles=collected_cycles,
+            recorder=mock_recorder,
+        )
+
+        # Should only find cycle_2 (cycle_1 was already processed)
+        assert len(collected_cycles) == 1
+        assert collected_cycles[0].cycle == 2
+
+    def test_sweep_no_warning_when_no_missed_files(self, tmp_path: Path, caplog):
+        """Test that no warning is logged when all files were already processed."""
+        import json
+        import logging
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+            "feedback": "",
+            "skill": "local-review",
+            "elapsed_seconds": 5.0,
+            "timestamp": "2024-01-15T10:30:00Z",
+        }
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text(json.dumps(cycle_data))
+
+        # File was already processed
+        processed_files: set[str] = {str(cycle_file)}
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        with caplog.at_level(logging.WARNING):
+            runner._sweep_review_cycles(
+                workspace_path=tmp_path,
+                step_name=step_name,
+                processed_files=processed_files,
+                collected_cycles=collected_cycles,
+                recorder=mock_recorder,
+            )
+
+        # Should not find any new files
+        assert len(collected_cycles) == 0
+
+        # Should not log warning about missed files
+        assert "Sweep caught" not in caplog.text
+
+    def test_sweep_handles_nonexistent_directory(self, tmp_path: Path):
+        """Test that sweep handles missing .forge/{step} directory gracefully."""
+        runner = _runner_without_init()
+
+        # Don't create the directory
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        # Should not raise
+        runner._sweep_review_cycles(
+            workspace_path=tmp_path,
+            step_name="nonexistent_step",
+            processed_files=processed_files,
+            collected_cycles=collected_cycles,
+            recorder=mock_recorder,
+        )
+
+        assert len(collected_cycles) == 0
+
+    def test_sweep_handles_invalid_json(self, tmp_path: Path, caplog):
+        """Test that sweep handles invalid JSON files gracefully."""
+        import logging
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        # Create an invalid JSON file
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text("not valid json {")
+
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        with caplog.at_level(logging.WARNING):
+            runner._sweep_review_cycles(
+                workspace_path=tmp_path,
+                step_name=step_name,
+                processed_files=processed_files,
+                collected_cycles=collected_cycles,
+                recorder=mock_recorder,
+            )
+
+        # Should not have collected any cycles
+        assert len(collected_cycles) == 0
+
+        # Should log warning about parse failure
+        assert "Failed to parse review cycle file" in caplog.text
+
+    def test_sweep_handles_missing_required_fields(self, tmp_path: Path, caplog):
+        """Test that sweep handles JSON with missing required fields."""
+        import json
+        import logging
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        # Create JSON missing required fields
+        cycle_data = {"verdict": "approved", "feedback": "Missing fields"}
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text(json.dumps(cycle_data))
+
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        with caplog.at_level(logging.WARNING):
+            runner._sweep_review_cycles(
+                workspace_path=tmp_path,
+                step_name=step_name,
+                processed_files=processed_files,
+                collected_cycles=collected_cycles,
+                recorder=mock_recorder,
+            )
+
+        # Should not have collected any cycles
+        assert len(collected_cycles) == 0
+
+        # Should log warning about invalid data
+        assert "Invalid review cycle data" in caplog.text
+
+    def test_sweep_handles_empty_file(self, tmp_path: Path, caplog):
+        """Test that sweep handles empty files gracefully."""
+        import logging
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        # Create an empty file
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text("")
+
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        with caplog.at_level(logging.WARNING):
+            runner._sweep_review_cycles(
+                workspace_path=tmp_path,
+                step_name=step_name,
+                processed_files=processed_files,
+                collected_cycles=collected_cycles,
+                recorder=mock_recorder,
+            )
+
+        # Should not have collected any cycles
+        assert len(collected_cycles) == 0
+
+        # Should log warning about empty file
+        assert "Empty review cycle file" in caplog.text
+
+    def test_sweep_emits_metrics(self, tmp_path: Path):
+        """Test that sweep emits Prometheus metrics for caught files."""
+        import json
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "rejected",
+            "feedback": "Needs work",
+            "skill": "local-review",
+            "elapsed_seconds": 8.5,
+            "timestamp": "2024-01-15T10:30:00Z",
+        }
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text(json.dumps(cycle_data))
+
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        with (
+            patch("forge.sandbox.runner.record_review_cycle") as mock_cycle,
+            patch("forge.sandbox.runner.record_review_verdict") as mock_verdict,
+            patch("forge.sandbox.runner.observe_review_duration") as mock_duration,
+        ):
+            runner._sweep_review_cycles(
+                workspace_path=tmp_path,
+                step_name=step_name,
+                processed_files=processed_files,
+                collected_cycles=collected_cycles,
+                recorder=mock_recorder,
+            )
+
+        # Verify metrics were emitted
+        mock_cycle.assert_called_once_with("local-review", step_name)
+        mock_verdict.assert_called_once_with("local-review", step_name, "rejected")
+        mock_duration.assert_called_once_with("local-review", step_name, 8.5)
+
+    def test_sweep_records_via_recorder(self, tmp_path: Path):
+        """Test that sweep uses recorder to record and copy files."""
+        import json
+
+        runner = _runner_without_init()
+
+        step_name = "implement_task"
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+            "feedback": "",
+            "skill": "local-review",
+            "elapsed_seconds": 5.0,
+            "timestamp": "2024-01-15T10:30:00Z",
+        }
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text(json.dumps(cycle_data))
+
+        processed_files: set[str] = set()
+        collected_cycles: list[ReviewCycleData] = []
+
+        mock_recorder = MagicMock()
+
+        runner._sweep_review_cycles(
+            workspace_path=tmp_path,
+            step_name=step_name,
+            processed_files=processed_files,
+            collected_cycles=collected_cycles,
+            recorder=mock_recorder,
+        )
+
+        # Verify recorder methods were called
+        mock_recorder.record.assert_called_once()
+        mock_recorder.record_file.assert_called_once_with(cycle_file)
+
+
+class TestSweepIntegrationWithRun:
+    """Tests for sweep integration with ContainerRunner.run()."""
+
+    @pytest.mark.asyncio
+    async def test_fast_exit_files_caught_by_sweep(self, tmp_path: Path, caplog):
+        """Test that files written just before container exit are caught by sweep."""
+        import json
+        import logging
+
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = "log"
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        step_name = "implement_task"
+
+        # Create a file that simulates being written just before container exit
+        # (not caught by async polling)
+        cycle_dir = tmp_path / ".forge" / step_name
+        cycle_dir.mkdir(parents=True)
+        cycle_data = {
+            "cycle": 1,
+            "max_cycles": 3,
+            "verdict": "approved",
+            "feedback": "Fast exit",
+            "skill": "fast-review",
+            "elapsed_seconds": 1.0,
+            "timestamp": "2024-01-15T10:30:00Z",
+        }
+        cycle_file = cycle_dir / "review_cycle_1.json"
+        cycle_file.write_text(json.dumps(cycle_data))
+
+        def create_poller(**kwargs):
+            mock_poller = MagicMock()
+            # Poller returns no files during polling and poll_once
+            # (simulating fast exit where files are written after polling stops)
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = kwargs.get("step_name", "")
+            # Empty processed files - nothing was caught during async polling
+            mock_poller._processed_files = set()
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder") as mock_recorder_class,
+            patch("forge.sandbox.runner.record_review_cycle"),
+            patch("forge.sandbox.runner.record_review_verdict"),
+            patch("forge.sandbox.runner.observe_review_duration"),
+            caplog.at_level(logging.WARNING),
+        ):
+            mock_recorder = MagicMock()
+            mock_recorder_class.return_value = mock_recorder
+
+            result = await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name=step_name,
+            )
+
+        # The sweep should have caught the file
+        assert len(result.review_cycles) == 1
+        assert result.review_cycles[0].verdict == "approved"
+        assert result.review_cycles[0].skill == "fast-review"
+
+        # Should log warning about missed files
+        assert "Sweep caught 1 review cycle file(s) missed" in caplog.text
+
+    @pytest.mark.asyncio
+    async def test_sweep_runs_after_async_polling(self, tmp_path: Path):
+        """Test that sweep is called after container exits."""
+        runner = _runner_without_init()
+        runner.settings = MagicMock()
+        runner.settings.container_image = "test:latest"
+        runner.settings.container_timeout = 60
+        runner.settings.container_memory = "1g"
+        runner.settings.container_cpus = "1"
+        runner.settings.container_keep = False
+        runner.settings.auto_review_poll_interval = 1.0
+        runner.settings.auto_review_record_polled_files = None
+
+        mock_process = MagicMock()
+        mock_process.communicate = AsyncMock(return_value=(b"output", b""))
+        mock_process.returncode = 0
+
+        sweep_called = False
+        original_sweep = runner._sweep_review_cycles
+
+        def mock_sweep(*args, **kwargs):
+            nonlocal sweep_called
+            sweep_called = True
+            return original_sweep(*args, **kwargs)
+
+        def create_poller(**kwargs):
+            mock_poller = MagicMock()
+            mock_poller.poll = MagicMock(return_value=AsyncIteratorMock([]))
+            mock_poller.poll_once = AsyncMock(return_value=[])
+            mock_poller.stop = MagicMock()
+            mock_poller.step_name = kwargs.get("step_name", "")
+            mock_poller._processed_files = set()
+            return mock_poller
+
+        with (
+            patch.object(runner, "_build_container_name", return_value="test-container"),
+            patch.object(runner, "_build_podman_command", return_value=["podman", "run"]),
+            patch(
+                "forge.sandbox.runner.asyncio.create_subprocess_exec",
+                return_value=mock_process,
+            ),
+            patch(
+                "forge.sandbox.runner.ReviewCyclePoller",
+                side_effect=create_poller,
+            ),
+            patch("forge.sandbox.runner.ReviewCycleRecorder"),
+            patch.object(runner, "_sweep_review_cycles", side_effect=mock_sweep),
+        ):
+            await runner.run(
+                workspace_path=tmp_path,
+                task_summary="Test task",
+                task_description="Test description",
+                step_name="implement_task",
+            )
+
+        assert sweep_called is True
diff --git a/tests/unit/workflow/nodes/test_implementation.py b/tests/unit/workflow/nodes/test_implementation.py
index 7c673597..07a2812f 100644
--- a/tests/unit/workflow/nodes/test_implementation.py
+++ b/tests/unit/workflow/nodes/test_implementation.py
@@ -56,7 +56,6 @@ def _make_successful_runner():
 
 
 class TestImplementTaskStartedComment:
-
     @pytest.mark.asyncio
     async def test_posts_comment_on_task_ticket_before_container(self):
         """A comment is posted on the task ticket (not parent) when implementation starts."""
@@ -184,7 +183,6 @@ async def test_passes_trace_context_to_container_runner(self):
 
 
 class TestImplementationNodeRouting:
-
     @pytest.mark.asyncio
     async def test_feature_missing_workspace_uses_feature_implementation_node(self):
         """Feature implementation failures must resume at implement_task."""
@@ -272,3 +270,33 @@ async def test_bug_container_failure_keeps_bug_implementation_node(self):
         assert result["current_node"] == "implement_bug_fix"
         assert result["last_error"] == "container failed"
         assert result["retry_count"] == 1
+
+
+class TestImplementTaskStepName:
+    """Tests for step_name propagation to ContainerRunner."""
+
+    @pytest.mark.asyncio
+    async def test_passes_step_name_implement_task_to_runner(self):
+        """ContainerRunner.run() is called with step_name='implement_task'."""
+        from forge.workflow.nodes.implementation import implement_task
+
+        mock_jira = _make_mock_jira()
+        runner = _make_successful_runner()
+
+        with (
+            patch(
+                "forge.workflow.nodes.implementation.JiraClient",
+                return_value=mock_jira,
+            ),
+            patch(
+                "forge.workflow.nodes.implementation.ContainerRunner",
+                return_value=runner,
+            ),
+            patch("forge.workflow.nodes.implementation.get_settings"),
+        ):
+            await implement_task(_make_state())
+
+        # Verify step_name was passed
+        runner.run.assert_called_once()
+        call_kwargs = runner.run.call_args.kwargs
+        assert call_kwargs.get("step_name") == "implement_task"
diff --git a/tests/unit/workflow/nodes/test_local_reviewer.py b/tests/unit/workflow/nodes/test_local_reviewer.py
index 7a78a0ff..56092467 100644
--- a/tests/unit/workflow/nodes/test_local_reviewer.py
+++ b/tests/unit/workflow/nodes/test_local_reviewer.py
@@ -366,3 +366,69 @@ async def test_feature_no_qualitative_fields_set(self, base_feature_review_state
             "qualitative_retry_count" not in result or result.get("qualitative_retry_count") is None
         )
         assert "local_review_verdict" not in result or result.get("local_review_verdict") is None
+
+
+class TestLocalReviewStepName:
+    """Tests for step_name propagation to ContainerRunner."""
+
+    @pytest.mark.asyncio
+    async def test_bug_review_passes_step_name_local_review(self, base_bug_review_state):
+        """Bug qualitative review passes step_name='local_review' to ContainerRunner."""
+        captured_kwargs = []
+
+        class _CapturingRunner:
+            async def run(self, **kwargs):
+                captured_kwargs.append(kwargs)
+                result = MagicMock()
+                result.success = True
+                result.exit_code = 0
+                result.stdout = "verdict: adequate\n\nfeedback: Good."
+                result.stderr = ""
+                return result
+
+        mock_git = _make_mock_git()
+        mock_workspace = MagicMock()
+
+        with (
+            patch(
+                "forge.workflow.nodes.local_reviewer.ContainerRunner",
+                return_value=_CapturingRunner(),
+            ),
+            patch("forge.workflow.nodes.local_reviewer.GitOperations", return_value=mock_git),
+            patch("forge.workflow.nodes.local_reviewer.Workspace", return_value=mock_workspace),
+        ):
+            await local_review_changes(base_bug_review_state)
+
+        assert captured_kwargs, "runner.run was not called"
+        assert captured_kwargs[0].get("step_name") == "local_review"
+
+    @pytest.mark.asyncio
+    async def test_feature_review_passes_step_name_local_review(self, base_feature_review_state):
+        """Feature review passes step_name='local_review' to ContainerRunner."""
+        captured_kwargs = []
+
+        class _CapturingRunner:
+            async def run(self, **kwargs):
+                captured_kwargs.append(kwargs)
+                result = MagicMock()
+                result.success = True
+                result.exit_code = 0
+                result.stdout = "No issues found."
+                result.stderr = ""
+                return result
+
+        mock_git = _make_mock_git()
+        mock_workspace = MagicMock()
+
+        with (
+            patch(
+                "forge.workflow.nodes.local_reviewer.ContainerRunner",
+                return_value=_CapturingRunner(),
+            ),
+            patch("forge.workflow.nodes.local_reviewer.GitOperations", return_value=mock_git),
+            patch("forge.workflow.nodes.local_reviewer.Workspace", return_value=mock_workspace),
+        ):
+            await local_review_changes(base_feature_review_state)
+
+        assert captured_kwargs, "runner.run was not called"
+        assert captured_kwargs[0].get("step_name") == "local_review"