From d4f312337f442aa321a2ae8ec7bb8e61eaed1074 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Mon, 11 May 2026 10:49:39 +0800 Subject: [PATCH] {"schema":"decodex/commit/1","summary":"Add review handoff recovery","authority":"manual"} --- apps/decodex/src/cli.rs | 135 +++- apps/decodex/src/lib.rs | 1 + apps/decodex/src/recovery.rs | 813 +++++++++++++++++++++++ apps/decodex/src/state/internal.rs | 26 + apps/decodex/src/state/store.rs | 55 ++ apps/decodex/src/state/tests.rs | 95 +++ apps/decodex/src/tracker/records.rs | 7 + docs/reference/operator-control-plane.md | 5 + docs/runbook/index.md | 3 + docs/runbook/recover-review-handoff.md | 71 ++ docs/spec/linear-execution-ledger.md | 7 + docs/spec/post-review-lifecycle.md | 23 + 12 files changed, 1240 insertions(+), 1 deletion(-) create mode 100644 apps/decodex/src/recovery.rs create mode 100644 docs/runbook/recover-review-handoff.md diff --git a/apps/decodex/src/cli.rs b/apps/decodex/src/cli.rs index 6d7f664..c0d0b2a 100644 --- a/apps/decodex/src/cli.rs +++ b/apps/decodex/src/cli.rs @@ -20,6 +20,7 @@ use crate::{ manual::{self, ManualCommitRequest, ManualLandRequest}, orchestrator::{self, IssueDispatchMode, RunOnceRequest, ServeRequest}, prelude::eyre, + recovery::{self, ReviewHandoffDiagnoseRequest, ReviewHandoffRebindRequest}, runtime, }; @@ -57,6 +58,7 @@ impl Cli { Command::Serve(args) => args.run(config_path), Command::Project(args) => args.run(), Command::Status(args) => args.run(config_path), + Command::Recover(args) => args.run(config_path), Command::ArchiveLinear(args) => args.run(config_path), Command::Probe(args) => args.run(), Command::Attempt(args) => args.run(config_path), @@ -126,6 +128,8 @@ enum Command { Project(ProjectCommand), /// Inspect the current local runtime state for one configured project. Status(StatusCommand), + /// Diagnose or explicitly repair supported retained-lane recovery cases. + Recover(RecoverCommand), /// Dry-run or archive old terminal Linear issues by repo label. ArchiveLinear(ArchiveLinearCommand), /// Validate the local app-server integration boundary. @@ -350,6 +354,81 @@ impl StatusCommand { } } +#[derive(Debug, Args)] +struct RecoverCommand { + #[command(subcommand)] + command: RecoverSubcommand, +} +impl RecoverCommand { + fn run(&self, config_path: Option<&Path>) -> crate::prelude::Result<()> { + match &self.command { + RecoverSubcommand::ReviewHandoff(args) => args.run(config_path), + } + } +} + +#[derive(Debug, Subcommand)] +enum RecoverSubcommand { + /// Recover retained review lanes whose handoff marker is missing. + ReviewHandoff(ReviewHandoffRecoveryCommand), +} + +#[derive(Debug, Args)] +struct ReviewHandoffRecoveryCommand { + #[command(subcommand)] + command: ReviewHandoffRecoverySubcommand, +} +impl ReviewHandoffRecoveryCommand { + fn run(&self, config_path: Option<&Path>) -> crate::prelude::Result<()> { + match &self.command { + ReviewHandoffRecoverySubcommand::Diagnose(args) => + recovery::run_review_handoff_diagnose( + config_path, + &ReviewHandoffDiagnoseRequest { issue: args.issue.clone(), json: args.json }, + ), + ReviewHandoffRecoverySubcommand::Rebind(args) => recovery::run_review_handoff_rebind( + config_path, + &ReviewHandoffRebindRequest { + issue: args.issue.clone(), + pr_url: args.pr.clone(), + dry_run: args.dry_run, + }, + ), + } + } +} + +#[derive(Debug, Subcommand)] +enum ReviewHandoffRecoverySubcommand { + /// Read-only diagnosis for orphaned retained review lanes. + Diagnose(ReviewHandoffDiagnoseCommand), + /// Explicitly bind a validated PR URL to one retained review lane. + Rebind(ReviewHandoffRebindCommand), +} + +#[derive(Debug, Args)] +struct ReviewHandoffDiagnoseCommand { + /// Issue identifier to inspect. Omit to inspect all retained review worktrees. + #[arg(value_name = "ISSUE")] + issue: Option, + /// Emit structured JSON instead of human-readable text. + #[arg(long)] + json: bool, +} + +#[derive(Debug, Args)] +struct ReviewHandoffRebindCommand { + /// Issue identifier for the retained review lane. + #[arg(value_name = "ISSUE")] + issue: String, + /// Pull request URL to bind after validation. + #[arg(long, value_name = "URL")] + pr: String, + /// Validate only; do not write runtime markers or tracker audit comments. + #[arg(long)] + dry_run: bool, +} + #[derive(Debug, Args)] struct ArchiveLinearCommand { /// Repo label scope to inspect, for example `repo:decodex`. @@ -493,7 +572,9 @@ mod tests { use crate::cli::{ AttemptCommand, Cli, Command, CommitCommand, LandCommand, ProbeCommand, ProjectCommand, - ProjectSubcommand, RunCommand, ServeCommand, StatusCommand, + ProjectSubcommand, RecoverCommand, RecoverSubcommand, ReviewHandoffDiagnoseCommand, + ReviewHandoffRebindCommand, ReviewHandoffRecoveryCommand, ReviewHandoffRecoverySubcommand, + RunCommand, ServeCommand, StatusCommand, }; #[test] @@ -669,4 +750,56 @@ mod tests { assert_eq!(cli.config, Some(PathBuf::from("./project.toml"))); assert!(matches!(cli.command, Command::Status(StatusCommand { json: true, limit: 5 }))); } + + #[test] + fn parses_review_handoff_diagnose_with_issue_and_json() { + let cli = Cli::parse_from([ + "decodex", + "--config", + "./project.toml", + "recover", + "review-handoff", + "diagnose", + "PUB-718", + "--json", + ]); + + assert_eq!(cli.config, Some(PathBuf::from("./project.toml"))); + assert!(matches!( + cli.command, + Command::Recover(RecoverCommand { + command: RecoverSubcommand::ReviewHandoff(ReviewHandoffRecoveryCommand { + command: ReviewHandoffRecoverySubcommand::Diagnose( + ReviewHandoffDiagnoseCommand { issue: Some(_), json: true } + ) + }) + }) + )); + } + + #[test] + fn parses_review_handoff_rebind_dry_run() { + let cli = Cli::parse_from([ + "decodex", + "recover", + "review-handoff", + "rebind", + "PUB-718", + "--pr", + "https://github.com/hack-ink/pubfi-mono-v2/pull/14", + "--dry-run", + ]); + + assert!(matches!( + cli.command, + Command::Recover(RecoverCommand { + command: RecoverSubcommand::ReviewHandoff(ReviewHandoffRecoveryCommand { + command: ReviewHandoffRecoverySubcommand::Rebind( + ReviewHandoffRebindCommand { issue, pr, dry_run: true } + ) + }) + }) if issue == "PUB-718" + && pr == "https://github.com/hack-ink/pubfi-mono-v2/pull/14" + )); + } } diff --git a/apps/decodex/src/lib.rs b/apps/decodex/src/lib.rs index c9f5fbc..4d879c8 100644 --- a/apps/decodex/src/lib.rs +++ b/apps/decodex/src/lib.rs @@ -17,6 +17,7 @@ mod pull_request; mod prelude { pub use color_eyre::{Result, eyre}; } +mod recovery; mod runtime; mod tracker; mod worktree; diff --git a/apps/decodex/src/recovery.rs b/apps/decodex/src/recovery.rs new file mode 100644 index 0000000..d5bc2b6 --- /dev/null +++ b/apps/decodex/src/recovery.rs @@ -0,0 +1,813 @@ +//! Explicit operator recovery surfaces for retained Decodex lanes. + +use std::{ + collections::HashMap, + env, fs, + path::{Path, PathBuf}, + process::Command, +}; + +use serde::Serialize; +use time::{OffsetDateTime, format_description::well_known::Rfc3339}; + +use crate::{ + config::ServiceConfig, + github, + prelude::{Result, eyre}, + pull_request::PullRequestLandingState, + runtime, + state::{ + RUN_ACTIVITY_MARKER_FILE, ReviewHandoffMarker, ReviewOrchestrationMarker, RunAttempt, + StateStore, WorktreeMapping, + }, + tracker::{ + self, IssueTracker, TrackerIssue, + linear::LinearClient, + records::{self, LinearExecutionEventIdentity, LinearExecutionEventRecord}, + }, + workflow::WorkflowDocument, +}; + +const MISSING_HANDOFF_REASON: &str = "missing_review_handoff_record"; +const ORPHANED_REVIEW_HANDOFF_CLASSIFICATION: &str = "orphaned_review_handoff"; +const REVIEW_HANDOFF_REBIND_EVENT: &str = "review_handoff_rebind"; +const REBOUND_ORCHESTRATION_PHASE: &str = "request_pending"; + +/// Read-only retained review handoff diagnostic request. +#[derive(Debug)] +pub(crate) struct ReviewHandoffDiagnoseRequest { + /// Optional issue identifier to inspect. + pub(crate) issue: Option, + /// Emit JSON instead of text. + pub(crate) json: bool, +} + +/// Explicit retained review handoff rebind request. +#[derive(Debug)] +pub(crate) struct ReviewHandoffRebindRequest { + /// Issue identifier to repair. + pub(crate) issue: String, + /// Pull request URL to bind. + pub(crate) pr_url: String, + /// Validate without writing markers or tracker audit comments. + pub(crate) dry_run: bool, +} + +#[derive(Serialize)] +struct ReviewHandoffRecoveryReport { + project_id: String, + diagnostics: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize)] +struct ReviewHandoffDiagnostic { + project_id: String, + issue_id: String, + issue_identifier: String, + issue_state: String, + classification: String, + reason: String, + branch_name: String, + worktree_path: String, + local_branch_name: Option, + local_head_oid: Option, + worktree_clean: Option, + existing_pr_url: Option, + active_label_present: Option, + next_action: String, +} + +struct RecoveryContext { + config: ServiceConfig, + workflow: WorkflowDocument, + state_store: StateStore, + tracker: LinearClient, +} + +struct RebindValidation { + issue: TrackerIssue, + worktree: WorktreeMapping, + attempt: RunAttempt, + landing_state: PullRequestLandingState, + local_head_oid: String, + worktree_path_for_event: Option, + active_label_present: bool, +} + +/// Run a read-only retained review handoff diagnostic. +pub(crate) fn run_review_handoff_diagnose( + config_path: Option<&Path>, + request: &ReviewHandoffDiagnoseRequest, +) -> Result<()> { + let context = load_recovery_context(config_path)?; + let diagnostics = match request.issue.as_deref() { + Some(issue_identifier) => vec![diagnose_issue(&context, issue_identifier)?], + None => diagnose_all_retained_review_worktrees(&context)?, + }; + let report = ReviewHandoffRecoveryReport { + project_id: context.config.service_id().to_owned(), + diagnostics, + }; + + if request.json { + println!("{}", serde_json::to_string_pretty(&report)?); + } else { + print!("{}", render_review_handoff_recovery_report(&report)); + } + + Ok(()) +} + +/// Run an explicit retained review handoff rebind. +pub(crate) fn run_review_handoff_rebind( + config_path: Option<&Path>, + request: &ReviewHandoffRebindRequest, +) -> Result<()> { + let context = load_recovery_context(config_path)?; + let validation = validate_rebind_request(&context, request)?; + + if request.dry_run { + println!( + "dry run: review handoff rebind validated for project={} issue={} branch={} pr={} head={} active_label_present={}", + context.config.service_id(), + validation.issue.identifier, + validation.worktree.branch_name(), + landing_url(&validation.landing_state), + validation.local_head_oid, + validation.active_label_present + ); + + return Ok(()); + } + + apply_review_handoff_rebind(&context, &validation)?; + + println!( + "rebind ok: project={} issue={} branch={} pr={} head={}", + context.config.service_id(), + validation.issue.identifier, + validation.worktree.branch_name(), + landing_url(&validation.landing_state), + validation.local_head_oid + ); + + Ok(()) +} + +fn load_recovery_context(config_path: Option<&Path>) -> Result { + let state_store = runtime::open_runtime_store()?; + let config_path = resolve_recovery_config_path(config_path, &state_store)?; + let config = ServiceConfig::from_path(&config_path)?; + let workflow = WorkflowDocument::from_path(config.workflow_path())?; + let tracker = LinearClient::new(config.tracker().resolve_api_key()?)?; + + runtime::register_project_config(&state_store, &config_path, true)?; + + Ok(RecoveryContext { config, workflow, state_store, tracker }) +} + +fn resolve_recovery_config_path( + config_path: Option<&Path>, + state_store: &StateStore, +) -> Result { + if let Some(config_path) = config_path { + return ServiceConfig::resolve_project_config_path(config_path); + } + + runtime::registered_config_path_for_cwd(state_store, &env::current_dir()?)?.ok_or_else(|| { + eyre::eyre!( + "No Decodex project config found. Pass --config or register one with `decodex project add `." + ) + }) +} + +fn diagnose_all_retained_review_worktrees( + context: &RecoveryContext, +) -> Result> { + let worktrees = context.state_store.list_worktrees(context.config.service_id())?; + + if worktrees.is_empty() { + return Ok(Vec::new()); + } + + let issue_ids = + worktrees.iter().map(|worktree| worktree.issue_id().to_owned()).collect::>(); + let issues = context.tracker.refresh_issues(&issue_ids)?; + let issues_by_id = + issues.into_iter().map(|issue| (issue.id.clone(), issue)).collect::>(); + let success_state = context.workflow.frontmatter().tracker().success_state(); + let mut diagnostics = Vec::new(); + + for worktree in worktrees { + let Some(issue) = issues_by_id.get(worktree.issue_id()).cloned() else { + continue; + }; + + if issue.state.name != success_state { + continue; + } + + diagnostics.push(diagnose_issue_worktree(context, issue, worktree)?); + } + + Ok(diagnostics) +} + +fn diagnose_issue( + context: &RecoveryContext, + issue_identifier: &str, +) -> Result { + let issue = load_issue_by_identifier(&context.tracker, issue_identifier)?; + let worktree = context.state_store.worktree_for_issue(&issue.id)?.ok_or_else(|| { + eyre::eyre!("Issue `{}` has no retained worktree mapping.", issue.identifier) + })?; + + diagnose_issue_worktree(context, issue, worktree) +} + +fn diagnose_issue_worktree( + context: &RecoveryContext, + issue: TrackerIssue, + worktree: WorktreeMapping, +) -> Result { + let existing_handoff = context.state_store.review_handoff_marker( + context.config.service_id(), + &issue.id, + worktree.branch_name(), + )?; + let local_branch_name = worktree_checkout_branch_name(worktree.worktree_path()).ok().flatten(); + let local_head_oid = worktree_head_oid(worktree.worktree_path()).ok().flatten(); + let worktree_clean = worktree_is_clean(worktree.worktree_path()).ok(); + let active_label_name = tracker::automation_active_label(context.config.service_id()); + let active_label_present = tracker::issue_has_label_with_server_confirmation( + &context.tracker, + &issue, + &active_label_name, + ) + .ok(); + let missing_handoff = existing_handoff.is_none(); + + Ok(ReviewHandoffDiagnostic { + project_id: context.config.service_id().to_owned(), + issue_id: issue.id.clone(), + issue_identifier: issue.identifier.clone(), + issue_state: issue.state.name.clone(), + classification: diagnostic_classification(existing_handoff.as_ref()), + reason: diagnostic_reason(existing_handoff.as_ref()), + branch_name: worktree.branch_name().to_owned(), + worktree_path: worktree.worktree_path().display().to_string(), + local_branch_name, + local_head_oid, + worktree_clean, + existing_pr_url: existing_handoff.map(|handoff| handoff.pr_url().to_owned()), + active_label_present, + next_action: diagnostic_next_action( + context.config.service_id(), + &issue.identifier, + missing_handoff, + ), + }) +} + +fn diagnostic_classification(existing_handoff: Option<&ReviewHandoffMarker>) -> String { + if existing_handoff.is_some() { + return String::from("review_handoff_bound"); + } + + String::from(ORPHANED_REVIEW_HANDOFF_CLASSIFICATION) +} + +fn diagnostic_reason(existing_handoff: Option<&ReviewHandoffMarker>) -> String { + if existing_handoff.is_some() { + return String::from("review_handoff_record_present"); + } + + String::from(MISSING_HANDOFF_REASON) +} + +fn diagnostic_next_action( + service_id: &str, + issue_identifier: &str, + missing_handoff: bool, +) -> String { + if !missing_handoff { + return String::from("Continue the existing post-review lifecycle; no rebind is needed."); + } + + format!( + "Inspect PR lineage, ensure label `{}` is present, then run `decodex recover review-handoff rebind {} --pr ` if the PR exactly matches this retained lane.", + tracker::automation_active_label(service_id), + issue_identifier + ) +} + +fn render_review_handoff_recovery_report(report: &ReviewHandoffRecoveryReport) -> String { + let mut output = + format!("Review handoff recovery diagnostics for project {}\n", report.project_id); + + if report.diagnostics.is_empty() { + output.push_str("- none\n"); + + return output; + } + + for diagnostic in &report.diagnostics { + output.push_str(&format!( + "- issue: {}\n state: {}\n classification: {}\n reason: {}\n branch: {}\n worktree_path: {}\n local_branch: {}\n local_head: {}\n worktree_clean: {}\n existing_pr_url: {}\n active_label_present: {}\n next_action: {}\n", + diagnostic.issue_identifier, + diagnostic.issue_state, + diagnostic.classification, + diagnostic.reason, + diagnostic.branch_name, + diagnostic.worktree_path, + optional_text(diagnostic.local_branch_name.as_deref()), + optional_text(diagnostic.local_head_oid.as_deref()), + diagnostic.worktree_clean.map_or_else(|| String::from("unknown"), |clean| clean.to_string()), + optional_text(diagnostic.existing_pr_url.as_deref()), + diagnostic.active_label_present.map_or_else(|| String::from("unknown"), |present| present.to_string()), + diagnostic.next_action, + )); + } + + output +} + +fn optional_text(value: Option<&str>) -> &str { + value.unwrap_or("none") +} + +fn validate_rebind_request( + context: &RecoveryContext, + request: &ReviewHandoffRebindRequest, +) -> Result { + let issue = load_issue_by_identifier(&context.tracker, &request.issue)?; + let worktree = validate_rebind_issue_context(context, &issue)?; + let attempt = + context.state_store.latest_run_attempt_for_issue(&issue.id)?.ok_or_else(|| { + eyre::eyre!("Issue `{}` has no recorded run attempt to rebind.", issue.identifier) + })?; + let landing_state = inspect_rebind_pull_request(context, &request.pr_url)?; + let local_head_oid = validate_rebind_worktree(&worktree, &landing_state)?; + let active_label_present = validate_rebind_tracker_labels(context, &issue)?; + let worktree_path_for_event = + repository_relative_path(context.config.repo_root(), worktree.worktree_path()); + + Ok(RebindValidation { + issue, + worktree, + attempt, + landing_state, + local_head_oid, + worktree_path_for_event, + active_label_present, + }) +} + +fn load_issue_by_identifier(tracker: &T, issue_identifier: &str) -> Result +where + T: IssueTracker + ?Sized, +{ + tracker + .get_issue_by_identifier(issue_identifier)? + .ok_or_else(|| eyre::eyre!("Tracker issue `{issue_identifier}` was not found.")) +} + +fn validate_rebind_issue_context( + context: &RecoveryContext, + issue: &TrackerIssue, +) -> Result { + let tracker_policy = context.workflow.frontmatter().tracker(); + + if issue.state.name != tracker_policy.success_state() { + eyre::bail!( + "Issue `{}` is in `{}`, but review handoff rebind requires `{}`.", + issue.identifier, + issue.state.name, + tracker_policy.success_state() + ); + } + if issue.has_label(tracker_policy.opt_out_label()) { + eyre::bail!( + "Issue `{}` has opt-out label `{}`.", + issue.identifier, + tracker_policy.opt_out_label() + ); + } + if issue.has_label(tracker_policy.needs_attention_label()) { + eyre::bail!( + "Issue `{}` has needs-attention label `{}`.", + issue.identifier, + tracker_policy.needs_attention_label() + ); + } + + let worktree = context.state_store.worktree_for_issue(&issue.id)?.ok_or_else(|| { + eyre::eyre!("Issue `{}` has no retained worktree mapping.", issue.identifier) + })?; + let existing_handoff = context.state_store.review_handoff_marker( + context.config.service_id(), + &issue.id, + worktree.branch_name(), + )?; + + if let Some(existing_handoff) = existing_handoff { + eyre::bail!( + "Issue `{}` already has review handoff marker for branch `{}` and PR `{}`.", + issue.identifier, + worktree.branch_name(), + existing_handoff.pr_url() + ); + } + + Ok(worktree) +} + +fn inspect_rebind_pull_request( + context: &RecoveryContext, + pr_url: &str, +) -> Result { + let github_token = context.config.github().resolve_token()?; + let repository = github::inspect_repository_context(context.config.repo_root(), &github_token)?; + + if !github::pull_request_matches_repository(pr_url, &repository)? { + eyre::bail!( + "Pull request `{}` does not belong to configured repository `{}/{}`.", + pr_url, + repository.owner, + repository.name + ); + } + + let landing_state = github::inspect_pull_request_landing_state( + context.config.repo_root(), + pr_url, + &github_token, + )?; + + if landing_state.base_ref_name != repository.default_branch { + eyre::bail!( + "Pull request `{}` targets `{}`, but configured default branch is `{}`.", + pr_url, + landing_state.base_ref_name, + repository.default_branch + ); + } + if landing_state.state != "OPEN" { + eyre::bail!( + "Pull request `{pr_url}` is `{}`; rebind requires `OPEN`.", + landing_state.state + ); + } + if landing_state.is_draft { + eyre::bail!("Pull request `{pr_url}` is still draft."); + } + + Ok(landing_state) +} + +fn validate_rebind_worktree( + worktree: &WorktreeMapping, + landing_state: &PullRequestLandingState, +) -> Result { + let local_branch = worktree_checkout_branch_name(worktree.worktree_path())? + .ok_or_else(|| eyre::eyre!("Retained worktree is detached."))?; + + if local_branch != worktree.branch_name() { + eyre::bail!( + "Retained worktree branch is `{local_branch}`, but runtime mapping expects `{}`.", + worktree.branch_name() + ); + } + if landing_state.head_ref_name != worktree.branch_name() { + eyre::bail!( + "Pull request `{}` points at branch `{}`, but retained lane branch is `{}`.", + landing_url(landing_state), + landing_state.head_ref_name, + worktree.branch_name() + ); + } + if !worktree_is_clean(worktree.worktree_path())? { + eyre::bail!( + "Retained worktree `{}` has local changes; rebind requires a clean lane checkout.", + worktree.worktree_path().display() + ); + } + + let local_head = worktree_head_oid(worktree.worktree_path())? + .ok_or_else(|| eyre::eyre!("Retained worktree has no readable HEAD."))?; + + if landing_state.head_ref_oid != local_head { + eyre::bail!( + "Pull request `{}` points at head `{}`, but retained worktree HEAD is `{local_head}`.", + landing_url(landing_state), + landing_state.head_ref_oid + ); + } + + Ok(local_head) +} + +fn validate_rebind_tracker_labels(context: &RecoveryContext, issue: &TrackerIssue) -> Result { + let active_label = tracker::automation_active_label(context.config.service_id()); + let active_label_present = + tracker::issue_has_label_with_server_confirmation(&context.tracker, issue, &active_label)?; + + if !active_label_present { + eyre::bail!( + "Issue `{}` is missing active automation label `{active_label}`. Restore explicit lane ownership before rebind.", + issue.identifier + ); + } + + Ok(active_label_present) +} + +fn apply_review_handoff_rebind( + context: &RecoveryContext, + validation: &RebindValidation, +) -> Result<()> { + let handoff_marker = ReviewHandoffMarker::new( + validation.attempt.run_id(), + validation.attempt.attempt_number(), + validation.worktree.branch_name(), + landing_url(&validation.landing_state), + validation.landing_state.base_ref_name.clone(), + validation.landing_state.head_ref_name.clone(), + validation.local_head_oid.clone(), + ); + let orchestration_marker = ReviewOrchestrationMarker::new( + validation.attempt.run_id(), + validation.attempt.attempt_number(), + validation.worktree.branch_name(), + landing_url(&validation.landing_state), + validation.local_head_oid.clone(), + REBOUND_ORCHESTRATION_PHASE, + None, + None, + None, + 0, + 0, + None, + ); + let event = review_handoff_rebind_event(context, validation); + + context.state_store.upsert_review_handoff_marker( + context.config.service_id(), + &validation.issue.id, + &handoff_marker, + )?; + context.state_store.upsert_review_orchestration_marker( + context.config.service_id(), + &validation.issue.id, + &orchestration_marker, + )?; + + if let Err(error) = write_rebind_audit(context, validation, &event) + .and_then(|()| context.state_store.record_linear_execution_event(&event)) + { + context.state_store.clear_review_markers_for_handoff( + context.config.service_id(), + &validation.issue.id, + &handoff_marker, + &orchestration_marker, + )?; + + return Err(error); + } + + Ok(()) +} + +fn review_handoff_rebind_event( + context: &RecoveryContext, + validation: &RebindValidation, +) -> LinearExecutionEventRecord { + let pr_url = landing_url(&validation.landing_state); + let stable_anchor = records::stable_event_anchor(&[ + pr_url, + &validation.local_head_oid, + REVIEW_HANDOFF_REBIND_EVENT, + ]); + let mut event = LinearExecutionEventRecord::new( + LinearExecutionEventIdentity { + service_id: context.config.service_id(), + issue_id: &validation.issue.id, + issue_identifier: &validation.issue.identifier, + run_id: validation.attempt.run_id(), + attempt_number: validation.attempt.attempt_number(), + }, + REVIEW_HANDOFF_REBIND_EVENT, + current_timestamp(), + &stable_anchor, + ); + + event.branch = Some(validation.worktree.branch_name().to_owned()); + event.worktree_path = validation.worktree_path_for_event.clone(); + event.pr_url = Some(pr_url.to_owned()); + event.pr_head_sha = Some(validation.local_head_oid.clone()); + event.pr_base_ref = Some(validation.landing_state.base_ref_name.clone()); + event.commit_sha = Some(validation.local_head_oid.clone()); + event.validation_result = Some(String::from("passed")); + event.summary = Some(format!( + "Explicit operator rebind restored retained review handoff marker for {}.", + validation.issue.identifier + )); + event.evidence = Some(vec![ + format!("issue_state={}", validation.issue.state.name), + format!("branch={}", validation.worktree.branch_name()), + format!("pr_url={pr_url}"), + format!("pr_head_sha={}", validation.local_head_oid), + String::from("existing_review_handoff_marker=absent"), + ]); + event.next_action = Some(String::from("continue retained post-review lifecycle")); + + event +} + +fn write_rebind_audit( + context: &RecoveryContext, + validation: &RebindValidation, + event: &LinearExecutionEventRecord, +) -> Result<()> { + let body = format!( + "Decodex operator recovery: rebound retained review handoff marker for `{}` to `{}`. This does not land the pull request.", + validation.issue.identifier, + landing_url(&validation.landing_state) + ); + + tracker::create_linear_execution_event_comment( + &context.tracker, + &validation.issue.id, + &body, + event, + )?; + + Ok(()) +} + +fn landing_url(landing_state: &PullRequestLandingState) -> &str { + &landing_state.url +} + +fn current_timestamp() -> String { + OffsetDateTime::now_utc().format(&Rfc3339).expect("timestamp formatting should succeed") +} + +fn worktree_checkout_branch_name(worktree_path: &Path) -> Result> { + let output = Command::new("git") + .arg("-C") + .arg(worktree_path) + .args(["symbolic-ref", "--quiet", "--short", "HEAD"]) + .output()?; + + if output.status.success() { + return Ok(Some(trimmed_stdout(&output.stdout)?)); + } + if output.status.code() == Some(1) { + return Ok(None); + } + + let stderr = String::from_utf8_lossy(&output.stderr); + + eyre::bail!( + "Failed to inspect retained worktree branch in `{}`: {}", + worktree_path.display(), + stderr.trim() + ) +} + +fn worktree_head_oid(worktree_path: &Path) -> Result> { + let output = Command::new("git") + .arg("-C") + .arg(worktree_path) + .args(["rev-parse", "--verify", "HEAD"]) + .output()?; + + if output.status.success() { + return Ok(Some(trimmed_stdout(&output.stdout)?)); + } + if output.status.code() == Some(128) { + return Ok(None); + } + + let stderr = String::from_utf8_lossy(&output.stderr); + + eyre::bail!( + "Failed to inspect retained worktree HEAD in `{}`: {}", + worktree_path.display(), + stderr.trim() + ) +} + +fn worktree_is_clean(worktree_path: &Path) -> Result { + Ok(worktree_blocking_status_lines(worktree_path)?.is_empty()) +} + +fn worktree_blocking_status_lines(worktree_path: &Path) -> Result> { + let output = Command::new("git") + .arg("-C") + .arg(worktree_path) + .args(["status", "--porcelain"]) + .output()?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + + eyre::bail!( + "Failed to inspect retained worktree cleanliness in `{}`: {}", + worktree_path.display(), + stderr.trim() + ); + } + + let status = String::from_utf8(output.stdout)?; + + Ok(status + .lines() + .filter(|line| !line.trim_end().is_empty()) + .filter(|line| !is_untracked_runtime_marker(line)) + .map(ToOwned::to_owned) + .collect()) +} + +fn is_untracked_runtime_marker(line: &str) -> bool { + line.trim_end().strip_prefix("?? ") == Some(RUN_ACTIVITY_MARKER_FILE) +} + +fn trimmed_stdout(stdout: &[u8]) -> Result { + Ok(String::from_utf8(stdout.to_vec())?.trim().to_owned()) +} + +fn repository_relative_path(repo_root: &Path, path: &Path) -> Option { + let canonical_repo_root = fs::canonicalize(repo_root).ok()?; + let canonical_path = fs::canonicalize(path).ok()?; + let relative = canonical_path.strip_prefix(canonical_repo_root).ok()?; + + Some(relative.to_string_lossy().to_string()) +} + +#[cfg(test)] +mod tests { + use crate::{ + recovery::REVIEW_HANDOFF_REBIND_EVENT, + tracker::records::{self, LinearExecutionEventIdentity, LinearExecutionEventRecord}, + }; + + #[test] + fn review_handoff_rebind_event_validation_accepts_required_fields() { + let mut record = LinearExecutionEventRecord::new( + LinearExecutionEventIdentity { + service_id: "pubfi", + issue_id: "issue-id", + issue_identifier: "PUB-718", + run_id: "pub-718-attempt-1", + attempt_number: 1, + }, + REVIEW_HANDOFF_REBIND_EVENT, + super::current_timestamp(), + "anchor", + ); + + record.branch = Some(String::from("x/pubfi-pub-718")); + record.worktree_path = Some(String::from(".worktrees/PUB-718")); + record.pr_url = Some(String::from("https://github.com/hack-ink/pubfi-mono-v2/pull/14")); + record.pr_head_sha = Some(String::from("0123456789abcdef0123456789abcdef01234567")); + record.pr_base_ref = Some(String::from("main")); + record.commit_sha = Some(String::from("0123456789abcdef0123456789abcdef01234567")); + record.validation_result = Some(String::from("passed")); + record.summary = Some(String::from("Explicit operator rebind restored marker.")); + record.evidence = Some(vec![String::from("existing_review_handoff_marker=absent")]); + + records::validate_linear_execution_event_record(&record) + .expect("rebind event should validate"); + } + + #[test] + fn review_handoff_rebind_event_requires_evidence() { + let mut record = LinearExecutionEventRecord::new( + LinearExecutionEventIdentity { + service_id: "pubfi", + issue_id: "issue-id", + issue_identifier: "PUB-718", + run_id: "pub-718-attempt-1", + attempt_number: 1, + }, + REVIEW_HANDOFF_REBIND_EVENT, + super::current_timestamp(), + "anchor", + ); + + record.branch = Some(String::from("x/pubfi-pub-718")); + record.pr_url = Some(String::from("https://github.com/hack-ink/pubfi-mono-v2/pull/14")); + record.pr_head_sha = Some(String::from("0123456789abcdef0123456789abcdef01234567")); + record.pr_base_ref = Some(String::from("main")); + record.commit_sha = Some(String::from("0123456789abcdef0123456789abcdef01234567")); + record.validation_result = Some(String::from("passed")); + record.summary = Some(String::from("Explicit operator rebind restored marker.")); + + let error = records::validate_linear_execution_event_record(&record) + .expect_err("rebind event without evidence should fail"); + + assert!(error.contains("evidence")); + } +} diff --git a/apps/decodex/src/state/internal.rs b/apps/decodex/src/state/internal.rs index 1566e68..db52a4c 100644 --- a/apps/decodex/src/state/internal.rs +++ b/apps/decodex/src/state/internal.rs @@ -448,6 +448,32 @@ ON CONFLICT(key) DO UPDATE SET value = excluded.value; Ok(()) } + fn delete_review_marker_identity( + &mut self, + project_id: &str, + issue_id: &str, + branch_name: &str, + run_id: &str, + attempt_number: i64, + ) -> Result<()> { + let transaction = self.connection.transaction()?; + + transaction.execute( + "DELETE FROM review_handoffs + WHERE project_id = ?1 AND issue_id = ?2 AND branch_name = ?3", + params![project_id, issue_id, branch_name], + )?; + transaction.execute( + "DELETE FROM review_orchestrations + WHERE project_id = ?1 AND issue_id = ?2 AND branch_name = ?3 + AND run_id = ?4 AND attempt_number = ?5", + params![project_id, issue_id, branch_name, run_id, attempt_number], + )?; + transaction.commit()?; + + Ok(()) + } + fn load_projects(&self, state: &mut StateData) -> Result<()> { let mut statement = self.connection.prepare( "SELECT service_id, config_path, repo_root, worktree_root, workflow_path, \ diff --git a/apps/decodex/src/state/store.rs b/apps/decodex/src/state/store.rs index 63d0f70..f4d20b7 100644 --- a/apps/decodex/src/state/store.rs +++ b/apps/decodex/src/state/store.rs @@ -1021,6 +1021,37 @@ impl StateStore { self.delete_review_markers_locked(issue_id) } + /// Remove the exact retained review markers created for one handoff identity. + pub(crate) fn clear_review_markers_for_handoff( + &self, + project_id: &str, + issue_id: &str, + handoff_marker: &ReviewHandoffMarker, + orchestration_marker: &ReviewOrchestrationMarker, + ) -> Result<()> { + let handoff_key = ReviewMarkerKey::new(project_id, issue_id, handoff_marker.branch_name()); + let orchestration_key = ReviewOrchestrationKey::new( + project_id, + issue_id, + orchestration_marker.branch_name(), + orchestration_marker.run_id(), + orchestration_marker.attempt_number(), + ); + let mut state = self.lock()?; + + state.review_handoffs.remove(&handoff_key); + state.review_orchestrations.remove(&orchestration_key); + self.persist_runtime_state_locked(&state)?; + + self.delete_review_marker_identity_locked( + project_id, + issue_id, + handoff_marker.branch_name(), + orchestration_marker.run_id(), + orchestration_marker.attempt_number(), + ) + } + /// Read the worktree mapping for one issue. pub fn worktree_for_issue(&self, issue_id: &str) -> Result> { let state = self.lock()?; @@ -1177,6 +1208,30 @@ impl StateStore { sqlite.delete_review_markers(issue_id) } + + fn delete_review_marker_identity_locked( + &self, + project_id: &str, + issue_id: &str, + branch_name: &str, + run_id: &str, + attempt_number: i64, + ) -> Result<()> { + let Some(sqlite) = self.sqlite.as_ref() else { + return Ok(()); + }; + let mut sqlite = sqlite + .lock() + .map_err(|_| eyre::eyre!("StateStore SQLite mutex is poisoned."))?; + + sqlite.delete_review_marker_identity( + project_id, + issue_id, + branch_name, + run_id, + attempt_number, + ) + } } fn retarget_review_handoff_issue( diff --git a/apps/decodex/src/state/tests.rs b/apps/decodex/src/state/tests.rs index f3a5452..bd37a40 100644 --- a/apps/decodex/src/state/tests.rs +++ b/apps/decodex/src/state/tests.rs @@ -82,6 +82,101 @@ fn review_markers_roundtrip_preserve_required_fields() { assert_eq!(restored_orchestration, orchestration); } +#[test] +fn clear_review_markers_for_handoff_preserves_other_branches() { + let temp_dir = TempDir::new().expect("tempdir should create"); + let state_path = temp_dir.path().join("runtime.sqlite3"); + let store = StateStore::open(&state_path).expect("state store should open"); + let removed_handoff = ReviewHandoffMarker::new( + "run-1", + 1, + "x/decodex-pub-101", + "https://github.com/hack-ink/decodex/pull/101", + "main", + "x/decodex-pub-101", + "08a20f7dfb9526e7421a5f095b1c6adec84e52d6", + ); + let removed_orchestration = ReviewOrchestrationMarker::new( + "run-1", + 1, + "x/decodex-pub-101", + "https://github.com/hack-ink/decodex/pull/101", + "08a20f7dfb9526e7421a5f095b1c6adec84e52d6", + "request_pending", + None, + None, + None, + 0, + 0, + None, + ); + let kept_handoff = ReviewHandoffMarker::new( + "run-2", + 1, + "x/decodex-pub-101-review", + "https://github.com/hack-ink/decodex/pull/102", + "main", + "x/decodex-pub-101-review", + "18a20f7dfb9526e7421a5f095b1c6adec84e52d6", + ); + let kept_orchestration = ReviewOrchestrationMarker::new( + "run-2", + 1, + "x/decodex-pub-101-review", + "https://github.com/hack-ink/decodex/pull/102", + "18a20f7dfb9526e7421a5f095b1c6adec84e52d6", + "request_pending", + None, + None, + None, + 0, + 0, + None, + ); + + store + .upsert_review_handoff_marker("pubfi", "PUB-101", &removed_handoff) + .expect("removed handoff marker should persist"); + store + .upsert_review_orchestration_marker("pubfi", "PUB-101", &removed_orchestration) + .expect("removed orchestration marker should persist"); + store + .upsert_review_handoff_marker("pubfi", "PUB-101", &kept_handoff) + .expect("kept handoff marker should persist"); + store + .upsert_review_orchestration_marker("pubfi", "PUB-101", &kept_orchestration) + .expect("kept orchestration marker should persist"); + store + .clear_review_markers_for_handoff( + "pubfi", + "PUB-101", + &removed_handoff, + &removed_orchestration, + ) + .expect("exact review markers should clear"); + + let reopened = StateStore::open(&state_path).expect("reopened state store should open"); + + assert!( + reopened + .review_handoff_marker("pubfi", "PUB-101", "x/decodex-pub-101") + .expect("removed handoff marker should read") + .is_none() + ); + assert_eq!( + reopened + .review_handoff_marker("pubfi", "PUB-101", "x/decodex-pub-101-review") + .expect("kept handoff marker should read"), + Some(kept_handoff.clone()) + ); + assert_eq!( + reopened + .review_orchestration_marker("pubfi", "PUB-101", &kept_handoff) + .expect("kept orchestration marker should read"), + Some(kept_orchestration) + ); +} + #[test] fn missing_review_markers_return_absent() { let store = StateStore::open_in_memory().expect("state store should open"); diff --git a/apps/decodex/src/tracker/records.rs b/apps/decodex/src/tracker/records.rs index 5bf4a38..22770ec 100644 --- a/apps/decodex/src/tracker/records.rs +++ b/apps/decodex/src/tracker/records.rs @@ -335,6 +335,13 @@ fn validate_linear_execution_event_fields( require_string(record.terminal_path.as_deref(), "terminal_path") }, + "review_handoff_rebind" => { + validate_pr_event_fields(record)?; + require_string(record.validation_result.as_deref(), "validation_result")?; + require_string(record.summary.as_deref(), "summary")?; + + require_vec(record.evidence.as_ref(), "evidence") + }, "landed" => { validate_pr_event_fields(record)?; diff --git a/docs/reference/operator-control-plane.md b/docs/reference/operator-control-plane.md index b938dc0..0f9be1a 100644 --- a/docs/reference/operator-control-plane.md +++ b/docs/reference/operator-control-plane.md @@ -122,6 +122,11 @@ Worktree visibility follows the owning dashboard section: within the same turn. - `Review & Landing` means a retained PR lane still owns the path for review repair, landing, closeout, or retained-lane cleanup. +- `missing_review_handoff_record` in `Review & Landing` means Decodex found a retained + review worktree but cannot find the authoritative runtime DB handoff marker. Treat + this as an orphaned retained review lane: inspect it with + `decodex recover review-handoff diagnose `, then use the explicit rebind path + only after the PR URL and retained worktree lineage match exactly. - `Intake Queue` means queued attention still owns the path, including partial retained progress after retries. - `Recovery Worktrees` means the path is retained local state after the authoritative diff --git a/docs/runbook/index.md b/docs/runbook/index.md index db3df08..0578c57 100644 --- a/docs/runbook/index.md +++ b/docs/runbook/index.md @@ -33,5 +33,8 @@ Question this index answers: "which sequence should I execute?" - [`local-github-signal-workflow.md`](./local-github-signal-workflow.md) for collecting GitHub change bundles, running Codex editorial analysis, validating signal entries, and publishing static site content. +- [`recover-review-handoff.md`](./recover-review-handoff.md) for diagnosing and + explicitly rebinding retained review lanes blocked by a missing runtime DB handoff + marker. - [`self-dogfood-pilot.md`](./self-dogfood-pilot.md) for the retained-lane pilot run against `decodex` itself and the bounded live-operation sequence. diff --git a/docs/runbook/recover-review-handoff.md b/docs/runbook/recover-review-handoff.md new file mode 100644 index 0000000..ff59f93 --- /dev/null +++ b/docs/runbook/recover-review-handoff.md @@ -0,0 +1,71 @@ +# Recover Missing Review Handoff + +Purpose: Diagnose and explicitly repair retained review lanes that are blocked by a +missing runtime DB handoff marker. + +Use this when: `decodex status`, `/state`, or the dashboard shows a `Review & Landing` +lane blocked with `missing_review_handoff_record`. + +Do not use this for: healthy PR handoffs, review repair, landing, closeout, cleanup-only +worktrees, or manual PR landing. + +Governing spec: [`../spec/post-review-lifecycle.md`](../spec/post-review-lifecycle.md). + +## Read-Only Diagnosis + +Run from the project repo or pass the registered project directory: + +```sh +decodex recover review-handoff diagnose +decodex --config "$HOME/.codex/decodex/projects/" recover review-handoff diagnose +``` + +Omit `` to inspect every retained review worktree for the configured project. +Use `--json` for a structured report. + +The diagnostic is read-only. It reports the issue, tracker state, branch, worktree, +local branch, local head, worktree cleanliness, existing PR URL when one is already +bound, active automation label presence, and the suggested next command. + +## Explicit Rebind + +This is a break-glass path. Healthy lanes should keep using +`issue_review_handoff` followed by `issue_terminal_finalize(path = "review_handoff")`; +operators should not use rebind just because a normal review handoff is still in +progress. + +Only rebind when the diagnosis says the review handoff marker is absent and the PR +lineage has been checked. + +```sh +decodex recover review-handoff rebind --pr --dry-run +decodex recover review-handoff rebind --pr +``` + +The non-dry-run command writes runtime DB handoff/orchestration markers and records a +`review_handoff_rebind` audit event on the tracker issue. It does not merge the PR, +change issue state, queue follow-up issues, or clean worktrees. + +The command rejects the rebind unless all of these are true: + +- the issue is in the workflow `tracker.success_state` +- the issue does not have opt-out or needs-attention labels +- the issue still has `decodex:active:` ownership +- the retained worktree branch matches the runtime DB worktree mapping +- the retained worktree has no local changes except `.decodex-run-activity` +- the PR belongs to the configured GitHub repository +- the PR targets the configured default branch +- the PR is open and non-draft +- the PR head branch and head SHA match the retained worktree +- no review handoff marker already exists for the issue/branch + +After a successful rebind, run: + +```sh +decodex status +decodex run --dry-run +``` + +The lane should leave `missing_review_handoff_record` and return to the existing +post-review lifecycle classification such as waiting for review, ready to land, review +repair required, or blocked for a different concrete reason. diff --git a/docs/spec/linear-execution-ledger.md b/docs/spec/linear-execution-ledger.md index 1149602..f22bfdc 100644 --- a/docs/spec/linear-execution-ledger.md +++ b/docs/spec/linear-execution-ledger.md @@ -133,6 +133,7 @@ The event type set is intentionally small and low-frequency: - `pr_opened` - `pr_updated` - `review_handoff` +- `review_handoff_rebind` - `repair_handoff` - `landed` - `closeout` @@ -156,6 +157,7 @@ Every event requires the record envelope. Additional required fields are listed | `pr_opened` | `branch`, `pr_url`, `pr_head_sha`, `pr_base_ref`, `commit_sha` | `worktree_path`, `summary` | | `pr_updated` | `branch`, `pr_url`, `pr_head_sha`, `pr_base_ref`, `commit_sha` | `worktree_path`, `summary` | | `review_handoff` | `branch`, `worktree_path`, `pr_url`, `pr_head_sha`, `pr_base_ref`, `commit_sha`, `validation_result`, `summary`, `terminal_path` | `verification` | +| `review_handoff_rebind` | `branch`, `pr_url`, `pr_head_sha`, `pr_base_ref`, `commit_sha`, `validation_result`, `summary`, `evidence` | `worktree_path`, `next_action` | | `repair_handoff` | `branch`, `worktree_path`, `pr_url`, `pr_head_sha`, `pr_base_ref`, `commit_sha`, `validation_result`, `summary`, `terminal_path` | `verification` | | `landed` | `branch`, `pr_url`, `pr_head_sha`, `pr_base_ref`, `commit_sha`, `summary` | `worktree_path` | | `closeout` | `pr_url`, `commit_sha`, `summary` | `branch`, `worktree_path`, `validation_result`, `target_state` | @@ -168,6 +170,11 @@ that writes the event. For normal review handoff this is `review_handoff`; for r repair completion this is `review_repair`; for explicit human-required exits this is `manual_attention`. +`review_handoff_rebind` is only for an explicit operator recovery command that restores a +missing runtime DB review handoff marker after validating the retained worktree and PR +lineage. It is not a normal agent terminal signal, does not imply `issue_terminal_finalize` +ran, and must not be emitted automatically from `decodex run`. + ## Progress checkpoint records `progress_checkpoint` records are the Linear form of durable execution memory. They diff --git a/docs/spec/post-review-lifecycle.md b/docs/spec/post-review-lifecycle.md index 5e55f98..09482b6 100644 --- a/docs/spec/post-review-lifecycle.md +++ b/docs/spec/post-review-lifecycle.md @@ -65,6 +65,29 @@ In the current runtime, the retained lane persists its validated review handoff If these signals disagree and the disagreement cannot be resolved without guessing operator intent, the runtime must use `manual_intervention_required`. +## Explicit handoff recovery + +`missing_review_handoff_record` is a fail-closed post-review state. The scheduler must +not infer a PR lineage from branch names, current heads, PR titles, or Linear comments, +and `decodex run` must not repair this state automatically. + +The supported operator recovery surface is `decodex recover review-handoff`. This is a +break-glass recovery path for orphaned retained review lanes, not part of the normal +automation success path. + +- `diagnose` is read-only. It reports the project, issue, branch, worktree, local head, + active automation label, existing PR URL when present, and the missing marker reason. +- `rebind` is mutating and requires an explicit issue identifier plus PR URL. It must + validate the configured project, tracker issue, success-state compatibility, active + automation ownership, retained worktree branch, clean worktree, PR repository, PR base, + PR head branch, PR head SHA, and open non-draft PR state before writing markers. +- A successful rebind writes the same runtime DB handoff and orchestration marker shapes + as normal `issue_review_handoff` needs, and records a `review_handoff_rebind` audit + event. It does not land the PR, queue follow-up work, or substitute for healthy lanes' + normal `issue_review_handoff` plus `issue_terminal_finalize(path = "review_handoff")` + path. If any audit write fails after marker creation, the command must clear the new + markers and report failure instead of leaving a silently rebound lane. + ## Phase model The post-`In Review` lifecycle is expressed in lane phases. These phases refine, but do not replace, the owned-lane action classes.