From fdad7d7d3077343333191a76c1d2b0aa8d4c78cd Mon Sep 17 00:00:00 2001 From: DB Lee Date: Thu, 14 May 2026 11:13:36 -0700 Subject: [PATCH] fix(workflow): auto-detect committed baseline in PR templates; align doc The baseline-comparison tutorial said the shipped PR workflow 'already supports' baseline comparison, but neither the GitHub Actions template (agentops-pr.yml) nor the Azure DevOps Pipelines template (pipelines/azuredevops/agentops-pr.yml) passed --baseline. Users had to manually edit the workflow. Make the doc claim true on both platforms: the PR step now checks for .agentops/baseline/results.json and passes --baseline when present. Without that file, behaviour is unchanged (no baseline). - src/agentops/templates/workflows/agentops-pr.yml: shell guard around agentops eval run. - src/agentops/templates/pipelines/azuredevops/agentops-pr.yml: same shell guard inside the AzureCLI@2 inline script. - docs/tutorial-baseline-comparison.md: section 4 reworded to drop the obsolete 'add this step' instruction. Drop the file, push, done. Mentions the same behaviour applies to the Azure DevOps template. - tests/unit/test_cicd.py: assert the baseline-detection block is present in both GitHub Actions and Azure DevOps PR templates. Verified end-to-end earlier in the validation series that 'agentops eval run --baseline' produces a top-level 'comparison' block in results.json and the matching 'Comparison vs Baseline' section in report.md. Refs #132 --- docs/tutorial-baseline-comparison.md | 17 ++++++++++------ .../pipelines/azuredevops/agentops-pr.yml | 12 ++++++++++- .../templates/workflows/agentops-pr.yml | 12 ++++++++++- tests/unit/test_cicd.py | 20 +++++++++++++++++++ 4 files changed, 53 insertions(+), 8 deletions(-) diff --git a/docs/tutorial-baseline-comparison.md b/docs/tutorial-baseline-comparison.md index 9181918..0503744 100644 --- a/docs/tutorial-baseline-comparison.md +++ b/docs/tutorial-baseline-comparison.md @@ -86,15 +86,20 @@ similarity is good. ## 4. Wire into a PR check The `agentops-pr.yml` workflow shipped by `agentops workflow generate` -already supports this — drop a baseline file in your repo (e.g. -`.agentops/baseline/results.json`) and add this step: +already supports this — drop a baseline file at +`.agentops/baseline/results.json` in your repo and the PR gate +auto-detects it (no workflow edit needed): -```yaml -- name: Run AgentOps eval against baseline - run: | - agentops eval run --baseline .agentops/baseline/results.json +```powershell +New-Item -ItemType Directory -Force .agentops\baseline | Out-Null +Copy-Item .agentops\results\latest\results.json .agentops\baseline\results.json +git add .agentops/baseline/results.json +git commit -m "chore: capture AgentOps baseline" ``` +When the file is absent, the workflow runs without baseline comparison. +When present, it runs `agentops eval run --baseline .agentops/baseline/results.json` automatically. The same auto-detection applies to the Azure DevOps Pipelines template (`agentops workflow generate --platform azure-devops`). + When a PR causes a metric to regress past your threshold, the run exits `2` and the workflow fails, blocking merge until somebody either fixes the regression or refreshes the baseline. diff --git a/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml b/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml index 5e8770a..c278a22 100644 --- a/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml +++ b/src/agentops/templates/pipelines/azuredevops/agentops-pr.yml @@ -63,7 +63,17 @@ stages: python -m pip install --upgrade pip # NOTE: pinned to develop branch until AgentOps 1.0 lands on PyPI. python -m pip install "agentops-toolkit[foundry] @ git+https://github.com/Azure/agentops.git@develop" - agentops eval run --config "$(AGENTOPS_CONFIG)" + # Auto-detect a committed baseline so the PR gate becomes a + # regression check when .agentops/baseline/results.json is present. + # Drop or refresh that file with `cp .agentops/results/latest/results.json .agentops/baseline/results.json`. + BASELINE_ARG="" + if [ -f .agentops/baseline/results.json ]; then + BASELINE_ARG="--baseline .agentops/baseline/results.json" + echo "Using baseline: .agentops/baseline/results.json" + else + echo "No .agentops/baseline/results.json found; running without baseline comparison." + fi + agentops eval run --config "$(AGENTOPS_CONFIG)" $BASELINE_ARG ec=$? echo "##vso[task.setvariable variable=evalExitCode;isOutput=true]$ec" if [ $ec -eq 0 ]; then diff --git a/src/agentops/templates/workflows/agentops-pr.yml b/src/agentops/templates/workflows/agentops-pr.yml index 0e81d9a..f4dd979 100644 --- a/src/agentops/templates/workflows/agentops-pr.yml +++ b/src/agentops/templates/workflows/agentops-pr.yml @@ -79,7 +79,17 @@ jobs: AZURE_OPENAI_DEPLOYMENT: ${{ vars.AZURE_OPENAI_DEPLOYMENT }} run: | set +e - agentops eval run --config "${{ inputs.config || 'agentops.yaml' }}" + # Auto-detect a committed baseline so the PR gate becomes a + # regression check when .agentops/baseline/results.json is present. + # Drop or refresh that file with `cp .agentops/results/latest/results.json .agentops/baseline/results.json`. + BASELINE_ARG="" + if [ -f .agentops/baseline/results.json ]; then + BASELINE_ARG="--baseline .agentops/baseline/results.json" + echo "Using baseline: .agentops/baseline/results.json" + else + echo "No .agentops/baseline/results.json found; running without baseline comparison." + fi + agentops eval run --config "${{ inputs.config || 'agentops.yaml' }}" $BASELINE_ARG ec=$? echo "exit_code=$ec" >> "$GITHUB_OUTPUT" if [ $ec -eq 0 ]; then diff --git a/tests/unit/test_cicd.py b/tests/unit/test_cicd.py index 492dfac..a53044f 100644 --- a/tests/unit/test_cicd.py +++ b/tests/unit/test_cicd.py @@ -163,6 +163,19 @@ def test_pr_template_triggers_and_no_environment(tmp_path: Path) -> None: assert "" in content +def test_pr_template_auto_detects_committed_baseline(tmp_path: Path) -> None: + """The shipped PR workflow turns into a regression gate when a baseline + file is committed at ``.agentops/baseline/results.json``.""" + generate_cicd_workflows(directory=tmp_path, kinds=["pr"]) + content = (tmp_path / _PR_PATH).read_text(encoding="utf-8") + + # Baseline auto-detection: the run step checks for the committed file + # and passes --baseline only when it exists. + assert ".agentops/baseline/results.json" in content + assert "--baseline" in content + assert "$BASELINE_ARG" in content + + def test_dev_template_triggers_and_environment(tmp_path: Path) -> None: generate_cicd_workflows(directory=tmp_path, kinds=["dev"]) content = (tmp_path / _DEV_PATH).read_text(encoding="utf-8") @@ -321,6 +334,13 @@ def test_azure_devops_pr_template_uses_ado_idioms(tmp_path: Path) -> None: # PR comment marker preserved across platforms. assert "" in content + # Baseline auto-detection (same behaviour as the GitHub Actions + # template) so the PR gate becomes a regression check when the + # committed baseline file is present. + assert ".agentops/baseline/results.json" in content + assert "--baseline" in content + assert "$BASELINE_ARG" in content + def test_azure_devops_deploy_templates_use_deployment_job(tmp_path: Path) -> None: generate_cicd_workflows(