From adeb51189eff90238163c28a1ea347a7e1be1932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 12 Jun 2026 20:34:57 +0200 Subject: [PATCH 1/2] feat: align sweep with Phase 2 naming, add aggregate sweep summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename prompts/sweep.md → prompts/phase-2-sweep.md - Add prompts/phase-2-sweep-summary.md (aggregate sweep rollup) - Per-file sweeps write runs/phase-2-summary-sweep--*.md - Aggregate rollup writes runs/sweep-summary-*.md - find_latest_summary('2') excludes per-file sweep summaries - Add find_latest_sweep_summary() for sweep-summary-*.md - Update codecome hints: Phase 2 uses filtered lookup, Sweep block uses rollup - run-sweep.py invokes aggregate summary after all per-file sweeps succeed - Update README and docs for new artifact names - Add tests for sweep exclusion, find_latest_sweep_summary, aggregate summary --- .project/phase-2-sweep-alignment-plan.md | 396 +++++++++++++++++++++++ README.md | 5 +- docs/file-risk-sweeps.md | 16 +- prompts/phase-2-sweep-summary.md | 71 ++++ prompts/{sweep.md => phase-2-sweep.md} | 14 +- tests/test_phases_completion.py | 72 +++++ tests/test_run_summary_questions.py | 115 +++++++ tests/test_run_sweep.py | 219 +++++++++++++ tools/codecome.py | 24 +- tools/codecome/run_summary_questions.py | 43 ++- tools/run-sweep.py | 43 ++- 11 files changed, 992 insertions(+), 26 deletions(-) create mode 100644 .project/phase-2-sweep-alignment-plan.md create mode 100644 prompts/phase-2-sweep-summary.md rename prompts/{sweep.md => phase-2-sweep.md} (81%) create mode 100644 tests/test_run_sweep.py diff --git a/.project/phase-2-sweep-alignment-plan.md b/.project/phase-2-sweep-alignment-plan.md new file mode 100644 index 00000000..f1146691 --- /dev/null +++ b/.project/phase-2-sweep-alignment-plan.md @@ -0,0 +1,396 @@ +# Phase 2 Sweep Alignment Plan + +## Problem + +`make sweep` is intended to run focused, file-scoped Phase 2 hypothesis generation. It is implemented as a meta-orchestrator in `tools/run-sweep.py` that selects files and invokes `tools/run-agent.py` once per file. + +The current per-file invocation uses the Phase 2 harness identity: + +```bash +python tools/run-agent.py \ + --phase 2 \ + --label "Deep Sweep: " \ + --agent auditor \ + --prompt-file tmp/file-sweep-prompts/sweep-.md +``` + +The harness sees `--phase 2`, so the Phase 2 completion gate expects a fresh run summary matching: + +```text +runs/phase-2-summary*.md +``` + +Historically, the sweep prompt told the model to write: + +```text +runs/sweep--summary-YYYY-MM-DD-HHMMSS.md +``` + +That naming does not satisfy the Phase 2 completion gate. A correctly completed per-file sweep can therefore fail because the artifact has the wrong name for the harness that executed it. + +The first alignment change renamed the per-file summary to: + +```text +runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md +``` + +That fixes the completion gate, but introduces a second issue: helper code that asks for the latest broad Phase 2 summary now also sees sweep summaries. This is confusing for `codecome hints` and for any tool that wants the latest normal `make phase-2` run rather than the latest per-file sweep run. + +There is also a usability gap: a sweep may process many files, creating many per-file summaries. Operators need one final sweep-level summary that consolidates findings, open questions, rerun hints, limitations, and next steps. + +## Important Clarification + +Sweep runs do **not** process both `prompts/phase-2-audit.md` and the sweep prompt. + +`--phase 2` is used by the harness for phase identity, session title, prompt-extra lookup, completion gates, retries, and transcript naming. The actual prompt body comes from `--prompt-file`. + +For sweep, `tools/run-sweep.py` reads the sweep prompt template, replaces `FILE_PATH_OR_ID`, writes a generated prompt under `tmp/file-sweep-prompts/`, and passes that generated path as `--prompt-file`. + +The only normal Phase 2 content a sweep may inherit is `audit.extra_prompts.hypothesis_generation` from `codecome.yml`, because `load_prompt(..., phase="2")` maps phase `2` to `hypothesis_generation`. It does not include `prompts/phase-2-audit.md`. + +## Decision + +Treat sweep as a specialized Phase 2 execution mode for each file, plus a separate sweep-level rollup step after all selected files complete. + +The per-file sweep runs should continue to invoke `run-agent.py --phase 2`, because each per-file run creates Phase 2 candidate findings under `itemdb/findings/PENDING/` and should use existing Phase 2 readiness and completion behavior. + +Per-file sweep summaries should remain Phase 2 summaries so the Phase 2 harness accepts them: + +```text +runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md +``` + +The final aggregate sweep summary should **not** use `phase-2-summary-*`. It is not a single Phase 2 agent run and should not be treated as the latest broad Phase 2 summary. Use a distinct sweep-level name: + +```text +runs/sweep-summary-YYYY-MM-DD-HHMMSS.md +``` + +This gives each artifact a clear meaning: + +- `runs/phase-2-summary-YYYY-MM-DD-HHMMSS.md`: broad `make phase-2` hypothesis-generation summary. +- `runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md`: one file-scoped Phase 2 sweep summary, accepted by the Phase 2 completion gate. +- `runs/sweep-summary-YYYY-MM-DD-HHMMSS.md`: aggregate sweep rollup for humans and `codecome hints`. + +## Non-Goals + +- Do not add a separate `--phase sweep` execution mode for per-file sweep runs. +- Do not make the Phase 2 completion gate require `sweep-summary-*.md`; the per-file sweep summary remains the completion artifact for each Phase 2 harness run. +- Do not concatenate the full standard Phase 2 audit prompt into the sweep prompt. +- Do not make the aggregate sweep summary perform fresh vulnerability hunting. +- Do not create new findings during the aggregate summary step. +- Do not make `codecome hints` print every per-file sweep summary by default. + +## Prompt Strategy + +### Per-File Sweep Prompt + +Rename the per-file sweep prompt: + +```text +prompts/sweep.md -> prompts/phase-2-sweep.md +``` + +The renamed prompt should explicitly state: + +- This is CodeCome Phase 2 hypothesis generation in file-scoped sweep mode. +- It complements the broad `make phase-2` pass. +- It inherits Phase 2 finding-quality and artifact expectations. +- It narrows the broad Phase 2 scope to one target file plus immediate dependencies needed for reachability and source-to-sink reasoning. +- It must write `runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md`. +- It should print a concise end-of-run summary to the screen in addition to writing the durable run summary, matching the operator experience of other phase runs. + +Avoid including the full `prompts/phase-2-audit.md` text verbatim. That prompt contains broad-scope instructions such as avoiding line-by-line deep dives, which directly conflicts with sweep mode. + +If shared prompt content becomes necessary later, extract common Phase 2 requirements into a reusable prompt fragment and include it from both the broad Phase 2 prompt and the sweep-mode prompt. Do not solve that extraction in this fix. + +### Aggregate Sweep Summary Prompt + +Add a new prompt: + +```text +prompts/phase-2-sweep-summary.md +``` + +This prompt is for the final rollup step after `tools/run-sweep.py` has finished the selected per-file sweeps. + +It should instruct the model to read and consolidate: + +- The per-file sweep summaries matching `runs/phase-2-summary-sweep-*.md`. +- The relevant candidate findings under `itemdb/findings/PENDING/`, especially findings created or touched during the sweep when identifiable from summaries. +- Any sweep selection context provided in the generated rollup prompt, such as selected files and failed files if partial summary mode is later added. + +It should instruct the model to write a durable summary to: + +```text +runs/sweep-summary-YYYY-MM-DD-HHMMSS.md +``` + +It should also instruct the model to print the same concise summary to the screen. The screen output should be useful to the operator immediately after `make sweep` finishes, without requiring them to open the summary file. + +The aggregate prompt should explicitly forbid fresh vulnerability hunting and new finding creation. Its job is consolidation, not another audit pass. + +The aggregate summary should include: + +- Files selected for the sweep. +- Per-file sweep summaries considered. +- Findings created or updated, grouped by likely theme or affected component when possible. +- Duplicate or overlapping finding candidates noticed across files. +- Open questions consolidated across per-file summaries. +- Re-run hints consolidated into concrete `PROMPT_EXTRA` or `PROMPT_EXTRA_FILE` suggestions. +- Limitations, including missing summaries, skipped files, failed per-file runs, or summaries that were too vague to consolidate. +- Recommended next step, usually Phase 3 counter-analysis once the operator is satisfied with the candidate set. + +## Helper Semantics + +### Broad Phase 2 Summary Lookup + +`find_latest_summary("2")` should mean the latest broad Phase 2 summary by default. It should skip per-file sweep summaries. + +Concretely, it should not return files matching: + +```text +phase-2-summary-sweep-*.md +``` + +This keeps callers from accidentally treating a narrow file sweep as the latest global hypothesis-generation pass. + +A minimal implementation is to add optional exclude support: + +```python +def find_latest_summary( + phase_id: str, + finding: str | None = None, + *, + exclude_patterns: tuple[str, ...] = (), +) -> Path | None: + ... +``` + +Then the default broad Phase 2 caller can pass `exclude_patterns=("phase-2-summary-sweep-*.md",)` or the function can special-case phase `2` if all current callers expect broad Phase 2 semantics. + +The first option is more explicit and less surprising for future code. + +### Sweep Summary Lookup + +Add a separate helper for aggregate sweep summaries, for example: + +```python +def find_latest_sweep_summary() -> Path | None: + ... +``` + +It should search: + +```text +runs/sweep-summary-*.md +``` + +This prevents aggregate sweep summaries from being mixed into Phase 2 summary lookup. + +### `codecome hints` + +`codecome hints` should consider both broad phase summaries and the aggregate sweep rollup. + +Recommended display model: + +- `Phase 1a`: latest Phase 1a summary. +- `Phase 1b`: latest Phase 1b summary. +- `Phase 1c`: latest Phase 1c summary. +- `Phase 2`: latest non-sweep `phase-2-summary*.md`. +- `Sweep`: latest `sweep-summary-*.md`, if present. +- `Phase 3`: latest Phase 3 summary. +- `Phase 4`: latest finding-scoped Phase 4 summary. +- `Phase 5`: latest finding-scoped Phase 5 summary. + +Do not print every per-file `phase-2-summary-sweep-*.md` by default. Large sweeps can produce many files, and dumping all of them makes `hints` noisy. The rollup exists to consolidate those details. + +If no `sweep-summary-*.md` exists, there are two acceptable behaviors: + +- Preferred: print no `Sweep` block and rely on the absence of the rollup as a signal that the sweep did not complete aggregation. +- Alternative: print only the latest per-file sweep summary as `Sweep file`, but clearly label it as incomplete and avoid duplicating it as `Phase 2`. + +The preferred behavior is less noisy and encourages `run-sweep.py` to produce the aggregate summary consistently. + +## Implementation Steps + +1. Rename the per-file sweep prompt. + + ```text + prompts/sweep.md -> prompts/phase-2-sweep.md + ``` + +2. Update `tools/run-sweep.py` to use the renamed prompt. + + Change: + + ```python + PROMPT_TEMPLATE = ROOT / "prompts" / "sweep.md" + ``` + + to: + + ```python + PROMPT_TEMPLATE = ROOT / "prompts" / "phase-2-sweep.md" + ``` + + Keep each per-file invocation using `--phase 2`. + +3. Update the per-file prompt text. + + Replace the old run-summary instruction with: + + ```text + runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md + ``` + + Explain that `` is the sanitized target file path. + + Instruct the model to print a concise end-of-run summary to the screen as well as writing the durable file. + +4. Add the aggregate sweep summary prompt. + + Create: + + ```text + prompts/phase-2-sweep-summary.md + ``` + + The prompt should instruct the model to consolidate `runs/phase-2-summary-sweep-*.md` into: + + ```text + runs/sweep-summary-YYYY-MM-DD-HHMMSS.md + ``` + + It should instruct the model to print the same concise rollup to the screen. + + It should explicitly say not to create findings and not to perform fresh vulnerability hunting. + +5. Add final rollup orchestration to `tools/run-sweep.py`. + + After all selected per-file sweeps succeed and when `--dry-run` is not set, run a final summary step using `prompts/phase-2-sweep-summary.md`. + + The rollup invocation should receive enough context to know which files were selected. The simplest approach is to generate a temporary prompt under `tmp/file-sweep-prompts/`, appending a selected-file list to the static rollup prompt, then run the selected agent against that generated prompt. + + Minimal acceptable behavior: + + - Run the rollup only after all per-file sweeps return success. + - Skip the rollup during `--dry-run`. + - Return a non-zero exit code if the rollup step fails. + + Future partial-summary behavior can be added later with a `--continue-on-error` or `--summarize-partial` flag. + +6. Choose the rollup execution path. + + Prefer the smallest implementation that preserves existing rendering behavior. + + Option A: invoke `tools/run-agent.py` with a normal prompt but without pretending the rollup is Phase 2, if the runner supports a non-phase run mode. + + Option B: invoke `opencode run --agent auditor ` directly. This is simple, but bypasses the CodeCome styled wrapper and any run-agent conveniences. + + Option C: extend `tools/run-agent.py` to support a non-phase utility run. This is more invasive and should only be chosen if wrapper behavior is required. + + The current minimal preference is Option B unless inspection shows that `run-agent.py` already supports non-phase prompt execution. The aggregate summary is a convenience rollup, not a phase completion gate participant. + +7. Update summary lookup helpers. + + Add explicit filtering so broad Phase 2 lookup skips `phase-2-summary-sweep-*.md`. + + Add a separate helper for `sweep-summary-*.md`. + +8. Update `tools/codecome.py` hints behavior. + + Remove the current per-file sweep scan from the default hints output. + + Display the latest aggregate `sweep-summary-*.md` as `Sweep` if present. + + Ensure a per-file sweep summary can never be printed once as `Phase 2` and again as `Sweep`. + +9. Update docs. + + In `README.md` and `docs/file-risk-sweeps.md`, describe sweep as file-scoped Phase 2 plus a final aggregate sweep summary. + + Mention both artifact types: + + ```text + runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md + runs/sweep-summary-YYYY-MM-DD-HHMMSS.md + ``` + + Update the reusable prompt list to include: + + ```text + prompts/phase-2-sweep.md + prompts/phase-2-sweep-summary.md + ``` + + Remove or replace references to `prompts/sweep.md` in active user-facing documentation. + +10. Add or update tests. + + Suggested tests: + + - `tools/run-sweep.py` uses `prompts/phase-2-sweep.md` as its per-file template. + - `build_prompt_for_file("src/foo.php")` produces prompt content containing `src/foo.php` and `phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md`. + - The generated per-file sweep prompt no longer mentions `runs/sweep--summary`. + - The per-file sweep prompt asks the model to print a summary to the screen. + - The Phase 2 completion gate still accepts a fresh `runs/phase-2-summary-sweep-src-foo-php-YYYY-MM-DD-HHMMSS.md` through the existing `phase-2-summary*.md` glob. + - `find_latest_summary("2")` ignores `phase-2-summary-sweep-*.md` when looking for the broad Phase 2 summary. + - `find_latest_sweep_summary()` returns the newest `sweep-summary-*.md`. + - `codecome hints` displays a `Sweep` block from the latest aggregate sweep summary. + - `codecome hints` does not duplicate the same per-file sweep summary as both `Phase 2` and `Sweep`. + - `tools/run-sweep.py` invokes the aggregate summary step only after all per-file sweeps succeed. + - `tools/run-sweep.py` skips the aggregate summary step during `--dry-run`. + +11. Run local checks. + + ```bash + make tests + ``` + +## Expected Behavior After Fix + +For a command like: + +```bash +make sweep FILE="src/zabbix-7.4.10/ui/imgstore.php" +``` + +`tools/run-sweep.py` should: + +- Generate a file-scoped prompt under `tmp/file-sweep-prompts/`. +- Invoke `tools/run-agent.py --phase 2` for the selected file. +- Create or update candidate findings under `itemdb/findings/PENDING/` when the audit identifies credible candidates. +- Require the per-file model run to write a summary such as: + +```text +runs/phase-2-summary-sweep-src-zabbix-7-4-10-ui-imgstore-php-2026-06-12-164608.md +``` + +- Accept that per-file summary through the existing Phase 2 completion gate. +- After all selected files complete, run the aggregate summary prompt. +- Require the aggregate summary run to write: + +```text +runs/sweep-summary-2026-06-12-171200.md +``` + +- Print the aggregate summary to the screen before exiting. + +After that, `make hints` should: + +- Show broad Phase 2 questions from the latest non-sweep `phase-2-summary*.md`. +- Show sweep questions from the latest `sweep-summary-*.md`. +- Avoid printing per-file sweep summaries by default. +- Avoid duplicate output. + +## Future Work + +Possible future improvements, intentionally outside this fix: + +- Add `--continue-on-error` and final partial sweep status reporting. +- Add `--summarize-partial` to produce `sweep-summary-*.md` even when some per-file runs fail. +- Add per-file result JSON under `runs/` or `tmp/` for machine-readable sweep orchestration. +- Extract shared Phase 2 finding requirements into a prompt fragment reused by both `phase-2-audit.md` and `phase-2-sweep.md`. +- Support a separate `audit.extra_prompts.sweep` config key that is appended in addition to `audit.extra_prompts.hypothesis_generation`. +- Add a `make sweep-summary` target to regenerate only the aggregate summary from existing per-file sweep summaries. diff --git a/README.md b/README.md index 6393bbb2..2499eab8 100644 --- a/README.md +++ b/README.md @@ -357,7 +357,7 @@ When to use it: Trade-off: token cost scales linearly with the number of files swept (one full agent session per file). Sweep on 10 high-risk files costs roughly as many tokens as 10 Phase 2 runs. It produces overlapping findings that Phase 3 has to deduplicate. Always preview first with `--dry-run`. -How it works: the sweep runner reads `itemdb/notes/file-risk-index.yml` (written by Phase 1), selects all files at score 4 or above (or the files matched by `FILE=`), writes one prompt per file under `tmp/file-sweep-prompts/`, then invokes the `auditor` agent once per file in sequence. +How it works: the sweep runner reads `itemdb/notes/file-risk-index.yml` (written by Phase 1), selects all files at score 4 or above (or the files matched by `FILE=`), writes one prompt per file under `tmp/file-sweep-prompts/` (using `prompts/phase-2-sweep.md`), then invokes the `auditor` agent once per file in sequence. Each sweep run writes a Phase 2 summary at `runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md`. make list-risk-files # preview which files would be swept python tools/run-sweep.py --dry-run # show selected files and prompts, no agent calls @@ -477,11 +477,12 @@ CodeCome ships reusable phase prompts under `prompts/`: prompts/phase-1b-recon.md prompts/phase-1c-sandbox.md prompts/phase-2-audit.md + prompts/phase-2-sweep.md + prompts/phase-2-sweep-summary.md prompts/phase-3-review.md prompts/phase-4-validate.md prompts/phase-5-exploit.md prompts/phase-6-report.md - prompts/sweep.md ### Wrapper environment variables diff --git a/docs/file-risk-sweeps.md b/docs/file-risk-sweeps.md index 9504a32f..69c8f45d 100644 --- a/docs/file-risk-sweeps.md +++ b/docs/file-risk-sweeps.md @@ -32,9 +32,9 @@ Show only paths for scripting: python tools/list-risk-files.py --format paths -## Run an optional Deep Sweep +## Run a Phase 2 Deep Sweep -While the global Phase 2 agent (`make phase-2`) focuses on macro-level architectural flaws and cross-component issues, you can run an optional **Deep Sweep** to perform exhaustive, line-by-line vulnerability hunting on specific high-risk files. +While the global Phase 2 agent (`make phase-2`) focuses on macro-level architectural flaws and cross-component issues, you can run an optional **Deep Sweep** (Phase 2 sweep mode) to perform exhaustive, line-by-line vulnerability hunting on specific high-risk files. Each sweep run creates Phase 2 candidate findings under `itemdb/findings/PENDING/` and writes a Phase 2 run summary. Run a sweep on specific files (supports glob patterns): @@ -49,12 +49,22 @@ Preview selected files and generated prompts without invoking OpenCode: python tools/run-sweep.py --dry-run -The sweep runner is sequential by default. It invokes the normal `auditor` agent using a specialized prompt that forces the model to read related dependencies and imports to establish complete source-to-sink context. +The sweep runner is sequential by default. It invokes the normal `auditor` agent with a specialized prompt (`prompts/phase-2-sweep.md`) that forces the model to read related dependencies and imports to establish complete source-to-sink context. Generated temporary prompts are written under: tmp/file-sweep-prompts/ +Each per-file sweep run writes a Phase 2 run summary at: + + runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md + +After all selected files complete, the runner invokes a final aggregate sweep summary using `prompts/phase-2-sweep-summary.md`. The aggregate step consolidates findings, open questions, and re-run hints from all per-file summaries and writes: + + runs/sweep-summary-YYYY-MM-DD-HHMMSS.md + +The aggregate summary is also printed to the screen. Use `make hints` to review it later — the `Sweep` block in `codecome hints` surfaces questions from the latest aggregate sweep rollup. + ## Relationship with normal Phase 2 Normal Phase 2 remains the default broad hypothesis generation pass: diff --git a/prompts/phase-2-sweep-summary.md b/prompts/phase-2-sweep-summary.md new file mode 100644 index 00000000..e2ebdaa3 --- /dev/null +++ b/prompts/phase-2-sweep-summary.md @@ -0,0 +1,71 @@ +# CodeCome Phase 2: Sweep Summary (Aggregate Rollup) + +You are performing a consolidation pass — NOT a vulnerability hunting pass. + +The per-file Phase 2 sweep runs have completed. Your job is to read all per-file sweep summaries, consolidate their findings, open questions, and re-run hints into one durable aggregate summary, and also print the same concise summary to the screen. + +## Required reading + +Read these files (all paths are relative to the project/workspace root): + +- `AGENTS.md` +- `codecome.yml` +- All per-file sweep summaries matching `runs/phase-2-summary-sweep-*.md` +- Findings under `itemdb/findings/PENDING/` that were created or touched during the sweep (identifiable from the per-file summaries) + +If available in the context of this run, note which files were selected for the sweep (see the prompt body or prompt file attached to this run). + +## Forbidden actions + +- **Do NOT create new findings.** The per-file sweep runs already did that. +- **Do NOT perform fresh vulnerability hunting.** This is a consolidation pass. +- **Do NOT modify existing findings.** You are summarizing, not re-auditing. +- **Do NOT move findings between status directories.** + +## Required output + +### 1. Durable aggregate summary + +Write a run summary using the template at `templates/run-summary.md` to: + + runs/sweep-summary-YYYY-MM-DD-HHMMSS.md + +Use the current UTC date and time. + +### 2. Screen output + +Print the same concise summary to the screen before finishing. The operator should see the rollup immediately without opening the summary file. Format the screen output clearly: + +- Files selected for the sweep +- Per-file sweep summaries considered +- Findings created or updated, grouped by likely theme or affected component +- Duplicate or overlapping finding candidates noticed across files +- Open questions consolidated across per-file summaries +- Re-run hints consolidated into concrete `PROMPT_EXTRA` or `PROMPT_EXTRA_FILE` suggestions +- Limitations (missing summaries, sweep failures, vague summaries that could not be consolidated) +- Recommended next step + +### 3. Aggregate summary content + +The durable summary must include: + +- **Goal**: Explain this is a sweep consolidation rollup from per-file Phase 2 sweep runs. +- **Files processed**: List the files selected for the sweep, and which per-file summaries were found and read. +- **Findings summary**: Consolidate findings created or updated, grouped by likely theme, affected component, or security category. Flag duplicates or near-duplicates noticed across files. +- **Open questions for the user**: Deduplicate and consolidate open questions from all per-file summaries. Questions must be complete, self-contained sentences ending in `?`. +- **Re-run prompt hints**: Merge hints into concrete `PROMPT_EXTRA` or `PROMPT_EXTRA_FILE` snippets. Remove exact duplicates. +- **Limitations**: Note any missing per-file summaries, per-file runs that appear to have failed or produced low-quality output, summaries that were too vague to consolidate, and any assumptions made during consolidation. +- **Recommended next step**: Suggest the next action (e.g., run `make phase-3` for counter-analysis, re-run a specific per-file sweep with questions answered via `PROMPT_EXTRA`). + +## Final response + +At the end, summarize in your response: + +- Number of per-file sweep summaries read +- Total findings identified across all summaries +- Key themes discovered +- Duplicates or overlaps noticed +- Files created or modified +- Open questions for the user +- Re-run prompt hints +- Recommended next step diff --git a/prompts/sweep.md b/prompts/phase-2-sweep.md similarity index 81% rename from prompts/sweep.md rename to prompts/phase-2-sweep.md index 3f0f252f..356366eb 100644 --- a/prompts/sweep.md +++ b/prompts/phase-2-sweep.md @@ -1,8 +1,10 @@ -# CodeCome Optional Deep-Dive Sweep +# CodeCome Phase 2: Sweep Mode -You are performing an optional CodeCome deep-dive sweep on a specific file. +You are performing CodeCome Phase 2 (hypothesis generation) in file-scoped sweep mode. -This mode is intentionally narrower than the normal global Phase 2. It is used to inspect high-risk files from `itemdb/notes/file-risk-index.yml` with intense focus, while still allowing you to read immediate dependencies needed to understand reachability and data flow. +This is the same Phase 2 described in the standard `prompts/phase-2-audit.md` prompt, but with a narrower scope: instead of hunting for macro-level architectural flaws across the entire codebase, you focus on a single high-risk file from `itemdb/notes/file-risk-index.yml` with intense, line-by-line analysis. Read immediate dependencies needed to establish reachability and data flow, but avoid expanding the run into a full-project audit. + +All Phase 2 expectations apply: produce durable findings under `itemdb/findings/PENDING/`, follow frontmatter quality rules, deduplicate against existing findings, run `make frontmatter`, and write a Phase 2 run summary. ## Required reading @@ -148,13 +150,15 @@ At the end, summarize in your response (or write a brief run summary under `runs - re-run prompt hints (same content as in the run summary; use `PROMPT_EXTRA` / `PROMPT_EXTRA_FILE` snippets), - files created or modified. +Print a concise end-of-run summary to the screen in addition to writing the durable run summary. The operator should see key results immediately without opening the summary file. + ## Run summary Write the run summary using the template at `templates/run-summary.md` to: - runs/sweep--summary-YYYY-MM-DD-HHMMSS.md + runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md Replace `` with a short sanitised version of the target file path -(e.g. `runs/sweep-src-app-controllers-upload-php-summary-2026-06-09-143022.md`). +(e.g. `runs/phase-2-summary-sweep-src-app-controllers-upload-php-2026-06-09-143022.md`). You MUST fill in both sections. Questions must be complete, self-contained sentences ending in `?` — avoid terse noun phrases. Hints must use actual `PROMPT_EXTRA` or `PROMPT_EXTRA_FILE` snippets. diff --git a/tests/test_phases_completion.py b/tests/test_phases_completion.py index da6f5033..22d650a9 100644 --- a/tests/test_phases_completion.py +++ b/tests/test_phases_completion.py @@ -780,6 +780,78 @@ def test_phase6_passes_with_report_and_summary(self, tmp_path): self._restore(completion_mod, originals) +class TestSweepCompletionGate: + def test_sweep_accepts_phase2_summary_sweep(self, tmp_path): + """Phase sweep should pass when a fresh sweep-named Phase 2 summary exists.""" + import os + import phases.completion as completion_mod + + orig_root = completion_mod.ROOT + orig_findings_root = completion_mod.FINDINGS_ROOT + completion_mod.ROOT = tmp_path + completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings" + (tmp_path / "runs").mkdir(parents=True, exist_ok=True) + summary = tmp_path / "runs" / "phase-2-summary-sweep-src-foo-php-2026-06-12-143022.md" + summary.write_text("", encoding="utf-8") + run_start = time.time() - 60 + os.utime(summary, (run_start + 60, run_start + 60)) + + try: + ok, failures = completion_mod.check_phase_graceful_completion( + "sweep", None, run_start + ) + assert ok is True, f"Expected ok for sweep with fresh summary, got failures={failures!r}" + assert failures == [] + finally: + completion_mod.ROOT = orig_root + completion_mod.FINDINGS_ROOT = orig_findings_root + + def test_sweep_failure_when_no_summary(self, tmp_path): + """Phase sweep should report missing Phase 2 summary when nothing is freshened.""" + import phases.completion as completion_mod + + orig_root = completion_mod.ROOT + orig_findings_root = completion_mod.FINDINGS_ROOT + completion_mod.ROOT = tmp_path + completion_mod.FINDINGS_ROOT = tmp_path / "itemdb" / "findings" + (tmp_path / "runs").mkdir(parents=True, exist_ok=True) + + try: + ok, failures = completion_mod.check_phase_graceful_completion( + "sweep", None, time.time() + ) + assert ok is False + assert any("runs/phase-2-summary" in f for f in failures), ( + f"Expected failure detail to mention runs/phase-2-summary, got {failures!r}" + ) + finally: + completion_mod.ROOT = orig_root + completion_mod.FINDINGS_ROOT = orig_findings_root + + def test_sweep_checklist_is_phase2_checklist(self): + """Phase sweep checklist should be the same as Phase 2 checklist.""" + from phases.completion import phase_checklist_lines + + sweep_lines = phase_checklist_lines("sweep", None) + phase2_lines = phase_checklist_lines("2", None) + assert sweep_lines == phase2_lines, ( + f"Expected sweep checklist to equal Phase 2 checklist" + ) + + def test_sweep_resume_prompt_uses_phase2_gate(self): + """Resume prompt for sweep should mention phase-2-summary like Phase 2.""" + from phases.completion import build_phase_resume_prompt + + prompt = build_phase_resume_prompt( + "sweep", None, "stop", 1, + failure_details=[ + "Missing: runs/phase-2-summary*.md — run summary was not created or updated", + ], + ) + assert "runs/phase-2-summary*.md" in prompt + assert "Fix only these missing items." in prompt + + class TestPhase3ChecklistMentionsRunSummary: def test_phase3_checklist_mentions_summary(self): from phases.completion import phase_checklist_lines diff --git a/tests/test_run_summary_questions.py b/tests/test_run_summary_questions.py index 0b35dc87..75b0c8da 100644 --- a/tests/test_run_summary_questions.py +++ b/tests/test_run_summary_questions.py @@ -14,6 +14,7 @@ OpenQuestion, RunSummaryQuestions, find_latest_summary, + find_latest_sweep_summary, parse_summary, _extract_section, _is_none_content, @@ -459,3 +460,117 @@ def test_returns_none_when_dir_missing(self, tmp_path, monkeypatch): monkeypatch.setattr(rsm, "ROOT", tmp_path) result = find_latest_summary("2") assert result is None + + +# --------------------------------------------------------------------------- +# find_latest_summary sweep exclusion tests +# --------------------------------------------------------------------------- + +class TestFindLatestSummarySweepExclusion: + def test_phase2_excludes_sweep_summaries_by_default(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + runs_dir = tmp_path / "runs" + runs_dir.mkdir() + monkeypatch.setattr(rsm, "ROOT", tmp_path) + + broad = runs_dir / "phase-2-summary-2026-06-12-120000.md" + sweep = runs_dir / "phase-2-summary-sweep-src-foo-php-2026-06-12-120500.md" + broad.write_text("broad phase 2") + sweep.write_text("sweep summary") + base_time = time.time() + os.utime(broad, (base_time, base_time)) + os.utime(sweep, (base_time + 1, base_time + 1)) + + result = find_latest_summary("2") + assert result is not None + assert result.name == "phase-2-summary-2026-06-12-120000.md" + + def test_phase2_returns_none_when_only_sweep_summaries_exist(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + runs_dir = tmp_path / "runs" + runs_dir.mkdir() + monkeypatch.setattr(rsm, "ROOT", tmp_path) + + sweep = runs_dir / "phase-2-summary-sweep-src-foo-2026-06-12-130000.md" + sweep.write_text("sweep") + base_time = time.time() + os.utime(sweep, (base_time, base_time)) + + result = find_latest_summary("2") + assert result is None + + def test_phase2_with_finding_does_not_exclude_sweep(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + runs_dir = tmp_path / "runs" + runs_dir.mkdir() + monkeypatch.setattr(rsm, "ROOT", tmp_path) + + finding_summary = runs_dir / "phase-2-CC-0001-summary-2026-06-12.md" + finding_summary.write_text("finding scoped") + base_time = time.time() + os.utime(finding_summary, (base_time, base_time)) + + result = find_latest_summary("2", "CC-0001") + assert result is not None + assert result.name == "phase-2-CC-0001-summary-2026-06-12.md" + + def test_explicit_exclude_pattern_overrides_default(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + runs_dir = tmp_path / "runs" + runs_dir.mkdir() + monkeypatch.setattr(rsm, "ROOT", tmp_path) + + broad = runs_dir / "phase-2-summary-2026-06-12-120000.md" + other = runs_dir / "phase-2-summary-other-2026-06-12-120100.md" + broad.write_text("broad") + other.write_text("other") + base_time = time.time() + os.utime(broad, (base_time, base_time)) + os.utime(other, (base_time + 1, base_time + 1)) + + result = find_latest_summary( + "2", exclude_patterns=("phase-2-summary-sweep-*.md", "phase-2-summary-other-*.md"), + ) + assert result is not None + assert result.name == "phase-2-summary-2026-06-12-120000.md" + + +# --------------------------------------------------------------------------- +# find_latest_sweep_summary tests +# --------------------------------------------------------------------------- + +class TestFindLatestSweepSummary: + def test_finds_newest_sweep_summary(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + runs_dir = tmp_path / "runs" + runs_dir.mkdir() + monkeypatch.setattr(rsm, "ROOT", tmp_path) + + p1 = runs_dir / "sweep-summary-2026-06-12-120000.md" + p2 = runs_dir / "sweep-summary-2026-06-12-121000.md" + p1.write_text("older") + p2.write_text("newer") + base_time = time.time() + os.utime(p1, (base_time, base_time)) + os.utime(p2, (base_time + 1, base_time + 1)) + + result = find_latest_sweep_summary() + assert result is not None + assert result.name == "sweep-summary-2026-06-12-121000.md" + + def test_returns_none_when_no_sweep_summary(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + runs_dir = tmp_path / "runs" + runs_dir.mkdir() + monkeypatch.setattr(rsm, "ROOT", tmp_path) + + p = runs_dir / "phase-2-summary-sweep-src-foo-2026-06-12.md" + p.write_text("per-file sweep") + result = find_latest_sweep_summary() + assert result is None + + def test_returns_none_when_dir_missing(self, tmp_path, monkeypatch): + import codecome.run_summary_questions as rsm + monkeypatch.setattr(rsm, "ROOT", tmp_path) + result = find_latest_sweep_summary() + assert result is None diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py new file mode 100644 index 00000000..fb09416d --- /dev/null +++ b/tests/test_run_sweep.py @@ -0,0 +1,219 @@ +"""Tests for tools/run-sweep.py.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT / "tools")) + +from conftest import load_tool_module + + +def _load_run_sweep(): + return load_tool_module("run_sweep", "tools/run-sweep.py") + + +class TestSweepPromptTemplate: + def test_prompt_template_path_is_phase_2_sweep(self): + module = _load_run_sweep() + expected = ROOT / "prompts" / "phase-2-sweep.md" + assert module.PROMPT_TEMPLATE == expected, ( + f"Expected PROMPT_TEMPLATE to be {expected}, got {module.PROMPT_TEMPLATE}" + ) + + def test_prompt_template_exists(self): + module = _load_run_sweep() + assert module.PROMPT_TEMPLATE.is_file(), ( + f"Prompt template {module.PROMPT_TEMPLATE} does not exist" + ) + + def test_prompt_template_does_not_reference_old_sweep_summary(self): + module = _load_run_sweep() + content = module.PROMPT_TEMPLATE.read_text(encoding="utf-8") + assert "runs/sweep--summary-YYYY-MM-DD" not in content, ( + "Prompt template still references old sweep summary naming" + ) + + +class TestBuildPromptForFile: + def test_generated_prompt_contains_target_file(self, tmp_path): + module = _load_run_sweep() + + real_template = module.PROMPT_TEMPLATE.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep.md" + tmp_template.write_text(real_template, encoding="utf-8") + module.PROMPT_TEMPLATE = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + + try: + prompt_path = module.build_prompt_for_file("src/app/controllers/upload.php") + content = prompt_path.read_text(encoding="utf-8") + assert "src/app/controllers/upload.php" in content, ( + "Generated prompt does not contain the target file path" + ) + finally: + module.PROMPT_TEMPLATE = ROOT / "prompts" / "phase-2-sweep.md" + module.TMP_DIR = orig_tmp_dir + + def test_generated_prompt_contains_phase2_sweep_summary(self, tmp_path): + module = _load_run_sweep() + + real_template = module.PROMPT_TEMPLATE.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep.md" + tmp_template.write_text(real_template, encoding="utf-8") + module.PROMPT_TEMPLATE = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + + try: + prompt_path = module.build_prompt_for_file("src/foo/bar.c") + content = prompt_path.read_text(encoding="utf-8") + assert "phase-2-summary-sweep-" in content, ( + "Generated prompt does not contain phase-2-summary-sweep naming" + ) + assert "runs/sweep--summary-YYYY-MM-DD" not in content, ( + "Generated prompt still contains old sweep summary naming" + ) + finally: + module.PROMPT_TEMPLATE = ROOT / "prompts" / "phase-2-sweep.md" + module.TMP_DIR = orig_tmp_dir + + def test_generated_prompt_uses_placeholder(self, tmp_path): + module = _load_run_sweep() + + template = "# Sweep\nTarget: FILE_PATH_OR_ID\n" + tmp_template = tmp_path / "test-template.md" + tmp_template.write_text(template, encoding="utf-8") + module.PROMPT_TEMPLATE = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + + try: + prompt_path = module.build_prompt_for_file("test/file.py") + content = prompt_path.read_text(encoding="utf-8") + assert "test/file.py" in content + assert "FILE_PATH_OR_ID" not in content + finally: + module.PROMPT_TEMPLATE = ROOT / "prompts" / "phase-2-sweep.md" + module.TMP_DIR = orig_tmp_dir + + def test_missing_placeholder_raises(self, tmp_path): + module = _load_run_sweep() + + template = "# No placeholder here" + tmp_template = tmp_path / "bad-template.md" + tmp_template.write_text(template, encoding="utf-8") + module.PROMPT_TEMPLATE = tmp_template + + try: + with pytest.raises(ValueError, match="does not contain placeholder"): + module.build_prompt_for_file("test/file.py") + finally: + module.PROMPT_TEMPLATE = ROOT / "prompts" / "phase-2-sweep.md" + + +class TestSlugify: + def test_slugify_sanitizes_path(self): + module = _load_run_sweep() + result = module.slugify("src/app/controllers/upload.php") + assert result == "src-app-controllers-upload.php" + + def test_slugify_truncates_long_paths(self): + module = _load_run_sweep() + long_path = "a" * 200 + result = module.slugify(long_path) + assert len(result) <= 120 + + def test_slugify_defaults_to_target_for_empty(self): + module = _load_run_sweep() + result = module.slugify("---") + assert result == "target" + + +class TestSweepSummaryPrompt: + def test_summary_prompt_path_is_set(self): + module = _load_run_sweep() + expected = ROOT / "prompts" / "phase-2-sweep-summary.md" + assert module.SWEEP_SUMMARY_PROMPT == expected, ( + f"Expected SWEEP_SUMMARY_PROMPT to be {expected}, got {module.SWEEP_SUMMARY_PROMPT}" + ) + + def test_summary_prompt_exists(self): + module = _load_run_sweep() + assert module.SWEEP_SUMMARY_PROMPT.is_file(), ( + f"Summary prompt {module.SWEEP_SUMMARY_PROMPT} does not exist" + ) + + def test_build_sweep_summary_prompt_contains_selected_files(self, tmp_path): + module = _load_run_sweep() + + real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep-summary.md" + tmp_template.write_text(real_template, encoding="utf-8") + module.SWEEP_SUMMARY_PROMPT = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + + try: + files = ["src/a.php", "src/b.cs"] + prompt_path = module.build_sweep_summary_prompt(files) + content = prompt_path.read_text(encoding="utf-8") + assert "src/a.php" in content + assert "src/b.cs" in content + assert "## Selected files" in content + finally: + module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" + module.TMP_DIR = orig_tmp_dir + + def test_build_sweep_summary_prompt_forbids_hunting(self, tmp_path): + module = _load_run_sweep() + + real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep-summary.md" + tmp_template.write_text(real_template, encoding="utf-8") + module.SWEEP_SUMMARY_PROMPT = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + + try: + prompt_path = module.build_sweep_summary_prompt(["src/foo.php"]) + content = prompt_path.read_text(encoding="utf-8") + assert "Do NOT create new findings" in content or "not create" in content.lower() + assert "Do NOT perform fresh vulnerability hunting" in content or "not perform" in content.lower() + finally: + module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" + module.TMP_DIR = orig_tmp_dir + + def test_summary_prompt_mentions_sweep_summary_naming(self, tmp_path): + module = _load_run_sweep() + + real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") + tmp_template = tmp_path / "phase-2-sweep-summary.md" + tmp_template.write_text(real_template, encoding="utf-8") + module.SWEEP_SUMMARY_PROMPT = tmp_template + orig_tmp_dir = module.TMP_DIR + module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" + + try: + prompt_path = module.build_sweep_summary_prompt(["src/foo.php"]) + content = prompt_path.read_text(encoding="utf-8") + assert "runs/sweep-summary-" in content + assert "phase-2-summary-sweep-*.md" in content + finally: + module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" + module.TMP_DIR = orig_tmp_dir + + def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path): + module = _load_run_sweep() + + module.SWEEP_SUMMARY_PROMPT = tmp_path / "nonexistent.md" + try: + with pytest.raises(FileNotFoundError, match="missing sweep summary prompt"): + module.build_sweep_summary_prompt(["src/foo.php"]) + finally: + module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" diff --git a/tools/codecome.py b/tools/codecome.py index 809d8a87..15a426b0 100755 --- a/tools/codecome.py +++ b/tools/codecome.py @@ -707,12 +707,13 @@ def command_check_codeql_plan(_: argparse.Namespace) -> int: def command_hints(_: argparse.Namespace) -> int: from codecome.run_summary_questions import ( find_latest_summary, + find_latest_sweep_summary, parse_summary, ) - # Phases 4/5 write finding-scoped summaries (phase-4-CC-0001-summary*.md), - # so we search with a wildcard glob in addition to the bare pattern. - # Sweeps write sweep--summary*.md, searched separately. + # Phase 2 lookup now automatically skips per-file sweep summaries + # (phase-2-summary-sweep-*.md) so the broad Phase 2 block is clean. + # Sweep questions are surfaced via the aggregate sweep-summary-*.md. phases = ("1a", "1b", "1c", "2", "3") finding_phases = ("4", "5") found_any = False @@ -760,6 +761,12 @@ def _process(path: Path, label: str) -> bool: if _process(summary_path, f"Phase {phase_id}"): found_any = True + # Sweep block: aggregate sweep summary only + sweep_summary = find_latest_sweep_summary() + if sweep_summary: + if _process(sweep_summary, "Sweep"): + found_any = True + for phase_id in finding_phases: # Also search the finding-scoped pattern: phase-4-*-summary*.md runs_dir = ROOT / "runs" @@ -775,17 +782,6 @@ def _process(path: Path, label: str) -> bool: if _process(summary_path, f"Phase {phase_id}"): found_any = True - # Sweep summaries: sweep--summary*.md - if (ROOT / "runs").is_dir(): - sweep_candidates = sorted( - (ROOT / "runs").glob("sweep-*-summary*.md"), - key=lambda p: p.stat().st_mtime, - reverse=True, - ) - for sp in sweep_candidates: - if _process(sp, "Sweep"): - found_any = True - if found_any: print(C.SYM_DASH * 62) print("Answer questions by re-running the phase with:") diff --git a/tools/codecome/run_summary_questions.py b/tools/codecome/run_summary_questions.py index 847595fc..3eb7f573 100644 --- a/tools/codecome/run_summary_questions.py +++ b/tools/codecome/run_summary_questions.py @@ -188,12 +188,21 @@ def parse_summary(path: Path) -> RunSummaryQuestions: def find_latest_summary( - phase_id: str, finding: str | None = None + phase_id: str, + finding: str | None = None, + *, + exclude_patterns: tuple[str, ...] = (), ) -> Path | None: """Find the newest run-summary file for a phase. Globs ``runs/phase-{phase_id}[-{finding}]-summary*.md`` and returns the one with the highest modification time. + + When *phase_id* is ``"2"`` and no *finding* is provided, per-file + sweep summaries (``phase-2-summary-sweep-*.md``) are excluded + automatically so callers get the latest broad ``make phase-2`` + summary. Pass explicit *exclude_patterns* to override or extend this + behaviour. """ runs_dir = ROOT / "runs" if not runs_dir.is_dir(): @@ -212,4 +221,36 @@ def find_latest_summary( key=lambda p: p.stat().st_mtime, reverse=True, ) + + effective_excludes: set[str] = set(exclude_patterns) + if phase_id == "2" and not finding: + effective_excludes.add("phase-2-summary-sweep-*.md") + + if effective_excludes: + candidates = [ + p for p in candidates + if not any( + p.match(pat) or (pat.endswith(".md") and pat == p.name) + for pat in effective_excludes + ) + ] + + return candidates[0] if candidates else None + + +def find_latest_sweep_summary() -> Path | None: + """Find the newest aggregate sweep rollup summary. + + Globs ``runs/sweep-summary-*.md`` and returns the one with the + highest modification time. + """ + runs_dir = ROOT / "runs" + if not runs_dir.is_dir(): + return None + + candidates = sorted( + runs_dir.glob("sweep-summary-*.md"), + key=lambda p: p.stat().st_mtime, + reverse=True, + ) return candidates[0] if candidates else None diff --git a/tools/run-sweep.py b/tools/run-sweep.py index 1e81bbc4..357c99b5 100755 --- a/tools/run-sweep.py +++ b/tools/run-sweep.py @@ -38,7 +38,8 @@ ROOT = Path(__file__).resolve().parents[1] DEFAULT_INDEX = ROOT / "itemdb" / "notes" / "file-risk-index.yml" -PROMPT_TEMPLATE = ROOT / "prompts" / "sweep.md" +PROMPT_TEMPLATE = ROOT / "prompts" / "phase-2-sweep.md" +SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" TMP_DIR = ROOT / "tmp" / "file-sweep-prompts" @@ -164,6 +165,40 @@ def run_one_file(file_path: str, dry_run: bool) -> int: return int(result.returncode) +def build_sweep_summary_prompt(selected_files: list[str]) -> Path: + if not SWEEP_SUMMARY_PROMPT.exists(): + try: + rel = SWEEP_SUMMARY_PROMPT.relative_to(ROOT) + except ValueError: + rel = SWEEP_SUMMARY_PROMPT + raise FileNotFoundError(f"missing sweep summary prompt: {rel}") + template = SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") + + files_header = "## Selected files\n\nThe per-file sweep runs were executed on these files:\n\n" + files_list = "\n".join(f" {f}" for f in selected_files) + files_block = f"{files_header}{files_list}\n" + + prompt = template + "\n" + files_block + prompt_path = TMP_DIR / "sweep-summary-prompt.md" + TMP_DIR.mkdir(parents=True, exist_ok=True) + prompt_path.write_text(prompt, encoding="utf-8") + return prompt_path + + +def run_sweep_summary(selected_files: list[str], dry_run: bool) -> int: + prompt_path = build_sweep_summary_prompt(selected_files) + print(C.header("Sweep Summary (Aggregate Rollup)")) + print(f"Prompt: {prompt_path.relative_to(ROOT)}") + + if dry_run: + return 0 + + prompt = prompt_path.read_text(encoding="utf-8") + command = ["opencode", "run", "--agent", "auditor", prompt] + result = subprocess.run(command, cwd=ROOT) + return int(result.returncode) + + def main() -> int: parser = argparse.ArgumentParser(description="Run sequential CodeCome file-scoped sweeps") parser.add_argument("--file", action="append", default=[], help="Specific file or glob to sweep. May be repeated.") @@ -208,6 +243,12 @@ def main() -> int: print(C.fail(f"Sweep failed for {file_path} with exit code {code}"), file=sys.stderr) return code + if len(files) >= 1 and not args.dry_run: + code = run_sweep_summary(files, args.dry_run) + if code != 0: + print(C.fail(f"Sweep aggregate summary failed with exit code {code}"), file=sys.stderr) + return code + return 0 From 2cc850b8e045fa896cbcc1c3d21a4d54acca589b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Ruiz=20Garc=C3=ADa?= Date: Fri, 12 Jun 2026 21:06:46 +0200 Subject: [PATCH 2/2] fix: address PR #55 review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Docstring: 'override or extend' → 'extend' (exclude_patterns cannot override the implicit Phase 2 sweep exclusion) - Use fnmatch for exclude_patterns matching instead of Path.match - Rename test: overrides_default → extends_default - Remove deprecated CODECOME_USE_WRAPPER branch from run-sweep.py; per-file sweeps always use the wrapper path - Document why aggregate rollup uses raw opencode run - Simplify aggregate-summary guard: drop redundant len(files) >= 1 - Capture sweep_start_time; only feed fresh per-file summaries to the aggregate prompt (avoid stale summary contamination) - Use timestamped aggregate prompt path instead of fixed name - Update aggregate prompt to read only injected summary paths - Fix slug example: upload-php → upload.php - Clean CODECOME_USE_WRAPPER references from README.md and docs/workflow.md --- README.md | 3 +- docs/workflow.md | 3 +- prompts/phase-2-sweep-summary.md | 4 +- prompts/phase-2-sweep.md | 2 +- tests/test_run_summary_questions.py | 2 +- tests/test_run_sweep.py | 18 ++++-- tools/codecome/run_summary_questions.py | 10 ++-- tools/run-sweep.py | 73 ++++++++++++++++--------- 8 files changed, 68 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 2499eab8..84089728 100644 --- a/README.md +++ b/README.md @@ -486,7 +486,6 @@ CodeCome ships reusable phase prompts under `prompts/`: ### Wrapper environment variables - CODECOME_USE_WRAPPER=0 # bypass the styled wrapper CODECOME_THINKING=1 # show model reasoning/thinking blocks in output CODECOME_THINKING=0 # hide model reasoning/thinking blocks CODECOME_RENDER_REASONING=0 # suppress on-screen Thinking panels (independent override) @@ -498,7 +497,7 @@ CodeCome ships reusable phase prompts under `prompts/`: CODECOME_BOOTSTRAP_DRY_RUN=1 # force --dry-run on sandbox apply/regenerate CODECOME_BASH_SHIM_RENDER=0 # disable rtk/cat/head/tail/rg/ls/find/tree routing CODECOME_BASH_SHIM_LS_STRIP_LONG_FORMAT=0 - OPENCODE_ARGS='...' # extra flags for opencode run (forwarded directly when CODECOME_USE_WRAPPER=0; in wrapper mode only --model, --variant and --thinking are used) + OPENCODE_ARGS='...' # extra flags for opencode run (--model, --variant, --thinking) CODECOME_MODEL= # pin model per phase, e.g. anthropic/claude-opus-4-7 CODECOME_MODEL_VARIANT= # pin model variant, e.g. high, max diff --git a/docs/workflow.md b/docs/workflow.md index 3cb14714..53b1b74d 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -454,9 +454,8 @@ All `make` targets that depend on Python tooling expect a repo-local `.venv/`. I Wrapper controls: - CODECOME_USE_WRAPPER=0 # bypass wrapper and use raw opencode run CODECOME_THINKING=1 # show model reasoning/thinking blocks in output - OPENCODE_ARGS='...' # extra flags for opencode run (forwarded directly when CODECOME_USE_WRAPPER=0; in wrapper mode only --model, --variant and --thinking are used) + OPENCODE_ARGS='...' # extra flags for opencode run (--model, --variant, --thinking) CODECOME_MODEL= # pin the model per phase CODECOME_MODEL_VARIANT= # pin the model variant diff --git a/prompts/phase-2-sweep-summary.md b/prompts/phase-2-sweep-summary.md index e2ebdaa3..aff3eae8 100644 --- a/prompts/phase-2-sweep-summary.md +++ b/prompts/phase-2-sweep-summary.md @@ -10,10 +10,10 @@ Read these files (all paths are relative to the project/workspace root): - `AGENTS.md` - `codecome.yml` -- All per-file sweep summaries matching `runs/phase-2-summary-sweep-*.md` +- ONLY the per-file sweep summaries listed in the `## Per-file sweep summaries` section of this prompt. Do NOT read unrelated historical sweep summaries — old summaries from previous sweeps are not part of this consolidation. - Findings under `itemdb/findings/PENDING/` that were created or touched during the sweep (identifiable from the per-file summaries) -If available in the context of this run, note which files were selected for the sweep (see the prompt body or prompt file attached to this run). +The `## Selected files` and `## Per-file sweep summaries` sections below this prompt body list exactly which files were swept and which per-file summaries were produced by this batch. Use that information, not a blind glob of all `runs/phase-2-summary-sweep-*.md`. ## Forbidden actions diff --git a/prompts/phase-2-sweep.md b/prompts/phase-2-sweep.md index 356366eb..6f2c4a0c 100644 --- a/prompts/phase-2-sweep.md +++ b/prompts/phase-2-sweep.md @@ -159,6 +159,6 @@ Write the run summary using the template at `templates/run-summary.md` to: runs/phase-2-summary-sweep--YYYY-MM-DD-HHMMSS.md Replace `` with a short sanitised version of the target file path -(e.g. `runs/phase-2-summary-sweep-src-app-controllers-upload-php-2026-06-09-143022.md`). +(e.g. `runs/phase-2-summary-sweep-src-app-controllers-upload.php-2026-06-09-143022.md`). You MUST fill in both sections. Questions must be complete, self-contained sentences ending in `?` — avoid terse noun phrases. Hints must use actual `PROMPT_EXTRA` or `PROMPT_EXTRA_FILE` snippets. diff --git a/tests/test_run_summary_questions.py b/tests/test_run_summary_questions.py index 75b0c8da..b202628b 100644 --- a/tests/test_run_summary_questions.py +++ b/tests/test_run_summary_questions.py @@ -514,7 +514,7 @@ def test_phase2_with_finding_does_not_exclude_sweep(self, tmp_path, monkeypatch) assert result is not None assert result.name == "phase-2-CC-0001-summary-2026-06-12.md" - def test_explicit_exclude_pattern_overrides_default(self, tmp_path, monkeypatch): + def test_explicit_exclude_pattern_extends_default(self, tmp_path, monkeypatch): import codecome.run_summary_questions as rsm runs_dir = tmp_path / "runs" runs_dir.mkdir() diff --git a/tests/test_run_sweep.py b/tests/test_run_sweep.py index fb09416d..e84f0684 100644 --- a/tests/test_run_sweep.py +++ b/tests/test_run_sweep.py @@ -161,7 +161,7 @@ def test_build_sweep_summary_prompt_contains_selected_files(self, tmp_path): try: files = ["src/a.php", "src/b.cs"] - prompt_path = module.build_sweep_summary_prompt(files) + prompt_path = module.build_sweep_summary_prompt(files, []) content = prompt_path.read_text(encoding="utf-8") assert "src/a.php" in content assert "src/b.cs" in content @@ -181,7 +181,7 @@ def test_build_sweep_summary_prompt_forbids_hunting(self, tmp_path): module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" try: - prompt_path = module.build_sweep_summary_prompt(["src/foo.php"]) + prompt_path = module.build_sweep_summary_prompt(["src/foo.php"], []) content = prompt_path.read_text(encoding="utf-8") assert "Do NOT create new findings" in content or "not create" in content.lower() assert "Do NOT perform fresh vulnerability hunting" in content or "not perform" in content.lower() @@ -189,7 +189,7 @@ def test_build_sweep_summary_prompt_forbids_hunting(self, tmp_path): module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" module.TMP_DIR = orig_tmp_dir - def test_summary_prompt_mentions_sweep_summary_naming(self, tmp_path): + def test_summary_prompt_contains_injected_per_file_summaries(self, tmp_path): module = _load_run_sweep() real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8") @@ -200,10 +200,16 @@ def test_summary_prompt_mentions_sweep_summary_naming(self, tmp_path): module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts" try: - prompt_path = module.build_sweep_summary_prompt(["src/foo.php"]) + summaries = [ + "runs/phase-2-summary-sweep-src-a-2026-06-12-120000.md", + "runs/phase-2-summary-sweep-src-b-2026-06-12-121000.md", + ] + prompt_path = module.build_sweep_summary_prompt(["src/a.php"], summaries) content = prompt_path.read_text(encoding="utf-8") assert "runs/sweep-summary-" in content - assert "phase-2-summary-sweep-*.md" in content + assert "phase-2-summary-sweep-src-a-2026-06-12-120000.md" in content + assert "phase-2-summary-sweep-src-b-2026-06-12-121000.md" in content + assert "## Per-file sweep summaries" in content finally: module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" module.TMP_DIR = orig_tmp_dir @@ -214,6 +220,6 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path): module.SWEEP_SUMMARY_PROMPT = tmp_path / "nonexistent.md" try: with pytest.raises(FileNotFoundError, match="missing sweep summary prompt"): - module.build_sweep_summary_prompt(["src/foo.php"]) + module.build_sweep_summary_prompt(["src/foo.php"], []) finally: module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md" diff --git a/tools/codecome/run_summary_questions.py b/tools/codecome/run_summary_questions.py index 3eb7f573..fa7be435 100644 --- a/tools/codecome/run_summary_questions.py +++ b/tools/codecome/run_summary_questions.py @@ -12,6 +12,7 @@ import re from dataclasses import dataclass, field +from fnmatch import fnmatch from pathlib import Path from codecome.config import ROOT @@ -201,8 +202,8 @@ def find_latest_summary( When *phase_id* is ``"2"`` and no *finding* is provided, per-file sweep summaries (``phase-2-summary-sweep-*.md``) are excluded automatically so callers get the latest broad ``make phase-2`` - summary. Pass explicit *exclude_patterns* to override or extend this - behaviour. + summary. Pass explicit *exclude_patterns* to extend (but not + override) this behaviour. """ runs_dir = ROOT / "runs" if not runs_dir.is_dir(): @@ -229,10 +230,7 @@ def find_latest_summary( if effective_excludes: candidates = [ p for p in candidates - if not any( - p.match(pat) or (pat.endswith(".md") and pat == p.name) - for pat in effective_excludes - ) + if not any(fnmatch(p.name, pat) for pat in effective_excludes) ] return candidates[0] if candidates else None diff --git a/tools/run-sweep.py b/tools/run-sweep.py index 357c99b5..8e0b0e38 100755 --- a/tools/run-sweep.py +++ b/tools/run-sweep.py @@ -20,10 +20,10 @@ import argparse import glob -import os import re import subprocess import sys +import time from pathlib import Path from typing import Any @@ -144,28 +144,27 @@ def run_one_file(file_path: str, dry_run: bool) -> int: if dry_run: return 0 - if os.environ.get("CODECOME_USE_WRAPPER") == "0": - prompt = prompt_path.read_text(encoding="utf-8") - command = ["opencode", "run", "--agent", "auditor", prompt] - else: - command = [ - sys.executable, - "tools/run-agent.py", - "--phase", - "2", - "--label", - f"Deep Sweep: {file_path}", - "--agent", - "auditor", - "--prompt-file", - str(prompt_path.relative_to(ROOT)), - ] + command = [ + sys.executable, + "tools/run-agent.py", + "--phase", + "2", + "--label", + f"Deep Sweep: {file_path}", + "--agent", + "auditor", + "--prompt-file", + str(prompt_path.relative_to(ROOT)), + ] result = subprocess.run(command, cwd=ROOT) return int(result.returncode) -def build_sweep_summary_prompt(selected_files: list[str]) -> Path: +def build_sweep_summary_prompt( + selected_files: list[str], + per_file_summaries: list[str], +) -> Path: if not SWEEP_SUMMARY_PROMPT.exists(): try: rel = SWEEP_SUMMARY_PROMPT.relative_to(ROOT) @@ -178,21 +177,31 @@ def build_sweep_summary_prompt(selected_files: list[str]) -> Path: files_list = "\n".join(f" {f}" for f in selected_files) files_block = f"{files_header}{files_list}\n" - prompt = template + "\n" + files_block - prompt_path = TMP_DIR / "sweep-summary-prompt.md" + summaries_block = "" + if per_file_summaries: + summaries_header = "## Per-file sweep summaries\n\nRead ONLY these per-file summaries (do not read unrelated historical sweep summaries):\n\n" + summaries_list = "\n".join(f" {s}" for s in per_file_summaries) + summaries_block = f"{summaries_header}{summaries_list}\n" + + prompt = template + "\n" + files_block + summaries_block + prompt_path = TMP_DIR / f"sweep-summary-{time.strftime('%Y%m%d-%H%M%S')}.md" TMP_DIR.mkdir(parents=True, exist_ok=True) prompt_path.write_text(prompt, encoding="utf-8") return prompt_path -def run_sweep_summary(selected_files: list[str], dry_run: bool) -> int: - prompt_path = build_sweep_summary_prompt(selected_files) +def run_sweep_summary(files: list[str], per_file_summaries: list[str]) -> int: + """Run the aggregate sweep rollup after all per-file sweeps complete. + + Uses raw ``opencode run`` directly because the aggregate rollup is + not a phase-mode run — it does not participate in the Phase 2 + completion gate and ``run-agent.py`` does not currently support + non-phase utility prompts. + """ + prompt_path = build_sweep_summary_prompt(files, per_file_summaries) print(C.header("Sweep Summary (Aggregate Rollup)")) print(f"Prompt: {prompt_path.relative_to(ROOT)}") - if dry_run: - return 0 - prompt = prompt_path.read_text(encoding="utf-8") command = ["opencode", "run", "--agent", "auditor", prompt] result = subprocess.run(command, cwd=ROOT) @@ -237,14 +246,24 @@ def main() -> int: print(C.fail(f"Readiness gate failed with exit code {exc.returncode}"), file=sys.stderr) return int(exc.returncode) + sweep_start_time = time.time() for file_path in files: code = run_one_file(file_path, args.dry_run) if code != 0: print(C.fail(f"Sweep failed for {file_path} with exit code {code}"), file=sys.stderr) return code - if len(files) >= 1 and not args.dry_run: - code = run_sweep_summary(files, args.dry_run) + if not args.dry_run: + fresh_summaries = [ + str(s.relative_to(ROOT)) + for f in files + for s in sorted( + (ROOT / "runs").glob(f"phase-2-summary-sweep-{slugify(f)}-*.md"), + key=lambda p: p.stat().st_mtime, + ) + if s.stat().st_mtime >= sweep_start_time + ] + code = run_sweep_summary(files, fresh_summaries) if code != 0: print(C.fail(f"Sweep aggregate summary failed with exit code {code}"), file=sys.stderr) return code