Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions tests/test_run_sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,150 @@ def test_build_sweep_summary_prompt_raises_when_missing(self, tmp_path):
module.build_sweep_summary_prompt(["src/foo.php"], [])
finally:
module.SWEEP_SUMMARY_PROMPT = ROOT / "prompts" / "phase-2-sweep-summary.md"


class TestRunSweepSummaryModelPropagation:
"""run_sweep_summary passes resolved model/variant/thinking to raw opencode run."""

def _setup_summary_env(self, module, tmp_path):
"""Configure temporary SWEEP_SUMMARY_PROMPT, TMP_DIR, and ROOT for a test.

Returns (orig_swp, orig_tmp_dir, orig_root) for cleanup in the caller.
"""
real_template = module.SWEEP_SUMMARY_PROMPT.read_text(encoding="utf-8")
tmp_template = tmp_path / "phase-2-sweep-summary.md"
tmp_template.write_text(real_template, encoding="utf-8")
orig_swp = module.SWEEP_SUMMARY_PROMPT
module.SWEEP_SUMMARY_PROMPT = tmp_template
orig_tmp_dir = module.TMP_DIR
module.TMP_DIR = tmp_path / "tmp" / "file-sweep-prompts"
orig_root = module.ROOT
module.ROOT = tmp_path
return orig_swp, orig_tmp_dir, orig_root

def _teardown_summary_env(self, module, orig_swp, orig_tmp_dir, orig_root):
module.SWEEP_SUMMARY_PROMPT = orig_swp
module.TMP_DIR = orig_tmp_dir
module.ROOT = orig_root

def test_passes_model_and_variant_flags(self, tmp_path, monkeypatch):
"""When model and variant are resolved, --model and --variant appear in the command."""
module = _load_run_sweep()
orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path)

from codecome.config import RuntimeConfig

fake_rc = RuntimeConfig(
model="test-provider/test-model",
variant="test-variant",
model_source="env CODECOME_MODEL",
variant_source="env CODECOME_MODEL_VARIANT",
thinking_on=False,
thinking_source="env",
)
def fake_resolver(agent: str):
assert agent == "auditor", f"Expected auditor agent, got {agent!r}"
return fake_rc
monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver)

captured_commands: list[list[str]] = []
def fake_run(command, **kwargs):
captured_commands.append(list(command))
return module.subprocess.CompletedProcess(command, 0)

monkeypatch.setattr(module.subprocess, "run", fake_run)

try:
code = module.run_sweep_summary(["src/a.php"], [])
assert code == 0
assert len(captured_commands) == 1
cmd = captured_commands[0]
assert "--model" in cmd
model_idx = cmd.index("--model")
assert cmd[model_idx + 1] == "test-provider/test-model"
assert "--variant" in cmd
variant_idx = cmd.index("--variant")
assert cmd[variant_idx + 1] == "test-variant"
assert "--thinking" not in cmd
assert cmd[0] == "opencode"
assert cmd[1] == "run"
assert "--agent" in cmd and cmd[cmd.index("--agent") + 1] == "auditor"
finally:
self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root)

def test_passes_thinking_flag_when_on(self, tmp_path, monkeypatch):
"""When thinking is on, --thinking appears in the command."""
module = _load_run_sweep()
orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path)

from codecome.config import RuntimeConfig

fake_rc = RuntimeConfig(
model=None,
variant=None,
model_source="(unknown)",
variant_source="(unknown)",
thinking_on=True,
thinking_source="provider-default",
)
def fake_resolver(agent: str):
assert agent == "auditor", f"Expected auditor agent, got {agent!r}"
return fake_rc
monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver)

captured_commands: list[list[str]] = []
def fake_run(command, **kwargs):
captured_commands.append(list(command))
return module.subprocess.CompletedProcess(command, 0)

monkeypatch.setattr(module.subprocess, "run", fake_run)

try:
code = module.run_sweep_summary(["src/b.cs"], [])
assert code == 0
assert len(captured_commands) == 1
cmd = captured_commands[0]
assert "--thinking" in cmd
assert "--model" not in cmd
assert "--variant" not in cmd
finally:
self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root)

def test_no_flags_when_nothing_resolved(self, tmp_path, monkeypatch):
"""When model/variant are None and thinking is off, no extra flags are passed."""
module = _load_run_sweep()
orig_swp, orig_tmp_dir, orig_root = self._setup_summary_env(module, tmp_path)

from codecome.config import RuntimeConfig

fake_rc = RuntimeConfig(
model=None,
variant=None,
model_source="(unknown)",
variant_source="(unknown)",
thinking_on=False,
thinking_source="env",
)
def fake_resolver(agent: str):
assert agent == "auditor", f"Expected auditor agent, got {agent!r}"
return fake_rc
monkeypatch.setattr(module, "resolve_runtime_config", fake_resolver)

captured_commands: list[list[str]] = []
def fake_run(command, **kwargs):
captured_commands.append(list(command))
return module.subprocess.CompletedProcess(command, 0)

monkeypatch.setattr(module.subprocess, "run", fake_run)

try:
code = module.run_sweep_summary(["src/c.py"], [])
assert code == 0
assert len(captured_commands) == 1
cmd = captured_commands[0]
assert "--model" not in cmd
assert "--variant" not in cmd
assert "--thinking" not in cmd
assert cmd[:3] == ["opencode", "run", "--agent"]
finally:
self._teardown_summary_env(module, orig_swp, orig_tmp_dir, orig_root)
20 changes: 19 additions & 1 deletion tools/run-sweep.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
sys.path.insert(0, str(Path(__file__).resolve().parent))

import _colors as C
from codecome.config import resolve_runtime_config

ROOT = Path(__file__).resolve().parents[1]
DEFAULT_INDEX = ROOT / "itemdb" / "notes" / "file-risk-index.yml"
Expand Down Expand Up @@ -197,13 +198,30 @@ def run_sweep_summary(files: list[str], per_file_summaries: list[str]) -> int:
not a phase-mode run — it does not participate in the Phase 2
completion gate and ``run-agent.py`` does not currently support
non-phase utility prompts.

Model and variant resolution mirrors the per-file sweep runs so
that ``CODECOME_MODEL``, ``CODECOME_MODEL_VARIANT``, ``OPENCODE_ARGS``,
and ``codecome.yml`` agent pinning are all honoured for the aggregate
step.
"""
rc = resolve_runtime_config("auditor")
prompt_path = build_sweep_summary_prompt(files, per_file_summaries)
print(C.header("Sweep Summary (Aggregate Rollup)"))
print(f"Prompt: {prompt_path.relative_to(ROOT)}")
print(f" agent=auditor model={rc.model or '(unknown)'} "
f"variant={rc.variant or '(unknown)'} "
f"thinking={'on' if rc.thinking_on else 'off'}"
f" (model source: {rc.model_source}, variant source: {rc.variant_source})")

prompt = prompt_path.read_text(encoding="utf-8")
command = ["opencode", "run", "--agent", "auditor", prompt]
command = ["opencode", "run", "--agent", "auditor"]
if rc.model:
command.extend(["--model", rc.model])
if rc.variant:
command.extend(["--variant", rc.variant])
if rc.thinking_on:
command.append("--thinking")
command.append(prompt)
result = subprocess.run(command, cwd=ROOT)
return int(result.returncode)

Expand Down
Loading