From 51df90658526411050c20af767852c85574a7ce5 Mon Sep 17 00:00:00 2001 From: shiny-code-bot Date: Thu, 18 Jun 2026 12:31:38 -0400 Subject: [PATCH] fix: recover auto-review reads from corrupt index --- codex-rs/auto-review/src/lib.rs | 65 ++++++++----- codex-rs/auto-review/src/lib_tests.rs | 126 ++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 23 deletions(-) diff --git a/codex-rs/auto-review/src/lib.rs b/codex-rs/auto-review/src/lib.rs index 491d02a5f052..1eb1392e9fa7 100644 --- a/codex-rs/auto-review/src/lib.rs +++ b/codex-rs/auto-review/src/lib.rs @@ -38,6 +38,12 @@ pub struct AutoReviewStore { legacy_root: Option, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum IndexLoadMode { + Strict, + Recovering, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct AutoReviewDuplicateMatch { pub run_id: String, @@ -230,7 +236,7 @@ impl AutoReviewStore { return Ok(None); } Ok(self - .load_index()? + .load_index_for_read()? .runs .into_iter() .filter(|run| run.target.worktree_diff_fingerprint.as_deref() == Some(fingerprint)) @@ -266,7 +272,7 @@ impl AutoReviewStore { pub fn load_run(&self, run_id: &str) -> Result { validate_safe_id(run_id).context("auto review run_id")?; let Some(run) = self - .load_index()? + .load_index_for_read()? .runs .into_iter() .find(|run| run.run_id == run_id) @@ -278,7 +284,7 @@ impl AutoReviewStore { } pub fn list_runs(&self) -> Result> { - let mut runs = self.load_index()?.runs; + let mut runs = self.load_index_for_read()?.runs; runs.sort_by(|left, right| left.run_id.cmp(&right.run_id)); Ok(runs) } @@ -302,24 +308,39 @@ impl AutoReviewStore { } fn load_index(&self) -> Result { + self.load_index_with_mode(IndexLoadMode::Strict) + } + + fn load_index_for_read(&self) -> Result { + self.load_index_with_mode(IndexLoadMode::Recovering) + } + + fn load_index_with_mode(&self, mode: IndexLoadMode) -> Result { let path = self.runs_path(); let mut index = AutoReviewRunsIndex::default(); for run in self.load_legacy_runs()? { index.upsert(run); } + let mut indexed_run_ids = BTreeSet::new(); if path.exists() { - let json = std::fs::read_to_string(&path).with_context(|| { - format!("failed to read auto review runs index {}", path.display()) - })?; - let parsed: AutoReviewRunsIndex = serde_json::from_str(&json).with_context(|| { - format!("failed to parse auto review runs index {}", path.display()) - })?; - for run in parsed.runs { - index.upsert(run); + match load_runs_index_file(&path) { + Ok(parsed) => { + for run in parsed.runs { + indexed_run_ids.insert(run.run_id.clone()); + index.upsert(run); + } + } + Err(err) if mode == IndexLoadMode::Recovering => { + drop(err); + } + Err(err) => return Err(err), } } for run in self.load_output_runs() { - index.upsert_if_absent(run); + if indexed_run_ids.contains(&run.run_id) { + continue; + } + index.upsert(run); } index.validate()?; Ok(index) @@ -431,6 +452,15 @@ impl AutoReviewStore { } } +fn load_runs_index_file(path: &Path) -> Result { + let json = std::fs::read_to_string(path) + .with_context(|| format!("failed to read auto review runs index {}", path.display()))?; + let parsed: AutoReviewRunsIndex = serde_json::from_str(&json) + .with_context(|| format!("failed to parse auto review runs index {}", path.display()))?; + parsed.validate()?; + Ok(parsed) +} + #[derive(Debug, Clone, Serialize, Deserialize)] struct AutoReviewRunsIndex { schema_version: u32, @@ -473,17 +503,6 @@ impl AutoReviewRunsIndex { by_id.insert(run.run_id.clone(), run); self.runs = by_id.into_values().collect(); } - - fn upsert_if_absent(&mut self, run: AutoReviewRun) { - if self - .runs - .iter() - .any(|existing| existing.run_id == run.run_id) - { - return; - } - self.upsert(run); - } } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] diff --git a/codex-rs/auto-review/src/lib_tests.rs b/codex-rs/auto-review/src/lib_tests.rs index 25760dcb7443..cc7a91af62eb 100644 --- a/codex-rs/auto-review/src/lib_tests.rs +++ b/codex-rs/auto-review/src/lib_tests.rs @@ -164,6 +164,115 @@ fn corrupt_sidecar_does_not_block_store_listing() -> anyhow::Result<()> { Ok(()) } +#[test] +fn list_runs_recovers_from_corrupt_index_using_outputs() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + let scope = tempfile::tempdir()?; + let store = AutoReviewStore::for_scope(codex_home.path(), scope.path()); + store.save_run(&sample_run("run_a", Vec::new()))?; + store.save_run(&sample_run("run_b", Vec::new()))?; + corrupt_runs_index(&store)?; + + let runs = store.list_runs()?; + + assert_eq!( + runs.into_iter().map(|run| run.run_id).collect::>(), + vec!["run_a".to_string(), "run_b".to_string()] + ); + Ok(()) +} + +#[test] +fn load_run_recovers_from_corrupt_index_using_output() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + let scope = tempfile::tempdir()?; + let store = AutoReviewStore::for_scope(codex_home.path(), scope.path()); + let run = sample_run("run_1", vec![sample_finding("f1", "Title")]); + store.save_run(&run)?; + corrupt_runs_index(&store)?; + + let loaded = store.load_run("run_1")?; + + assert_eq!(loaded, run); + Ok(()) +} + +#[test] +fn finding_detail_recovers_from_corrupt_index_using_output() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + let scope = tempfile::tempdir()?; + let store = AutoReviewStore::for_scope(codex_home.path(), scope.path()); + let run = sample_run("run_1", vec![sample_finding("f1", "Title")]); + store.save_run(&run)?; + corrupt_runs_index(&store)?; + + let detail = store.finding_detail("run_1", "f1", 1024)?; + + assert_eq!(detail.finding_id, "f1"); + assert!(detail.content.contains("Title")); + Ok(()) +} + +#[test] +fn read_paths_recover_from_corrupt_index_using_legacy_runs() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + let scope = tempfile::tempdir()?; + let legacy_run = sample_run("legacy_run", vec![sample_finding("f1", "Legacy")]); + write_legacy_run(codex_home.path(), &legacy_run)?; + let store = AutoReviewStore::for_scope(codex_home.path(), scope.path()); + corrupt_runs_index(&store)?; + + let runs = store.list_runs()?; + let loaded = store.load_run("legacy_run")?; + let detail = store.finding_detail("legacy_run", "f1", 1024)?; + + assert_eq!(runs, vec![legacy_run.clone()]); + assert_eq!(loaded, legacy_run); + assert!(detail.content.contains("Legacy")); + Ok(()) +} + +#[test] +fn corrupt_index_prefers_scoped_output_over_legacy_run() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + let scope = tempfile::tempdir()?; + let legacy_run = sample_run("same_run", vec![sample_finding("f1", "Legacy")]); + let scoped_run = sample_run("same_run", vec![sample_finding("f1", "Scoped")]); + write_legacy_run(codex_home.path(), &legacy_run)?; + let store = AutoReviewStore::for_scope(codex_home.path(), scope.path()); + store.save_run(&scoped_run)?; + corrupt_runs_index(&store)?; + + let loaded = store.load_run("same_run")?; + let detail = store.finding_detail("same_run", "f1", 1024)?; + + assert_eq!(loaded, scoped_run); + assert!(detail.content.contains("Scoped")); + assert!(!detail.content.contains("Legacy")); + Ok(()) +} + +#[test] +fn save_run_keeps_corrupt_index_strict() -> anyhow::Result<()> { + let codex_home = tempfile::tempdir()?; + let scope = tempfile::tempdir()?; + let store = AutoReviewStore::for_scope(codex_home.path(), scope.path()); + store.save_run(&sample_run("run_1", Vec::new()))?; + corrupt_runs_index(&store)?; + + let error = store + .save_run(&sample_run("run_2", Vec::new())) + .expect_err("writing through a corrupt index should stay explicit"); + + assert!( + error + .to_string() + .contains("failed to parse auto review runs index"), + "unexpected error: {error:#}" + ); + Ok(()) +} + #[test] fn list_runs_returns_empty_when_store_is_missing() -> anyhow::Result<()> { let codex_home = tempfile::tempdir()?; @@ -1127,6 +1236,23 @@ fn sample_run(run_id: &str, findings: Vec) -> AutoRevie } } +fn corrupt_runs_index(store: &AutoReviewStore) -> anyhow::Result<()> { + let runs_path = store.runs_path(); + std::fs::create_dir_all(runs_path.parent().expect("runs path parent"))?; + std::fs::write(runs_path, "not json\n")?; + Ok(()) +} + +fn write_legacy_run(codex_home: &std::path::Path, run: &AutoReviewRun) -> anyhow::Result<()> { + let legacy_dir = codex_home.join("auto-review").join("runs"); + std::fs::create_dir_all(&legacy_dir)?; + std::fs::write( + legacy_dir.join(format!("{}.json", run.run_id)), + format!("{}\n", serde_json::to_string_pretty(run)?), + )?; + Ok(()) +} + fn target_with_fingerprint(fingerprint: &str) -> AutoReviewRunTarget { AutoReviewRunTarget { worktree_diff_fingerprint: Some(fingerprint.to_string()),