Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions .github/workflows/ab-report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
name: A/B report (vanilla vs Pilot)

# When a publish PR touches submissions/<id>/, run the equivalent commands two
# ways — the vanilla CLI binary vs the Pilot adapter (socket mode, no daemon) —
# and post an HTML A/B report. Limitation: GitHub-hosted runners have no nested
# virtualization (KVM), so VM-launching commands cannot run here. The per-app
# command set (submissions/<id>/ab-commands.json) must therefore stay non-VM
# (version / help / subcommand --help). See docs/CI-AB-REPORT.md.

on:
pull_request:
paths: ['submissions/**']
workflow_dispatch:
inputs:
app_id:
description: 'App id, e.g. io.pilot.smolvm'
required: true

permissions:
contents: read
pull-requests: write

jobs:
ab-report:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Detect app id
id: app
run: |
set -euo pipefail
if [ -n "${{ github.event.inputs.app_id }}" ]; then
APP="${{ github.event.inputs.app_id }}"
else
APP=$(git diff --name-only "origin/${{ github.base_ref }}...HEAD" -- 'submissions/**' \
| sed -nE 's#^submissions/([^/]+)/.*#\1#p' | sort -u | head -1)
fi
if [ -z "$APP" ]; then echo "::error::could not detect an app id under submissions/"; exit 1; fi
echo "app=$APP" >> "$GITHUB_OUTPUT"
echo "Detected app: $APP"

- name: Build ipc-call
run: go build -o /tmp/ipc-call ./cmd/ipc-call

- name: Extract bundle and stage the adapter
id: stage
run: |
set -euo pipefail
APP="${{ steps.app.outputs.app }}"
DIR="submissions/$APP"
# Prefer the linux/amd64 bundle (this runner's arch); fall back to any.
BUNDLE=$(ls "$DIR"/*linux-amd64*.tar.gz 2>/dev/null | head -1 || true)
[ -n "$BUNDLE" ] || BUNDLE=$(ls "$DIR"/*.tar.gz 2>/dev/null | head -1 || true)
if [ -z "$BUNDLE" ]; then
echo "::notice::$APP ships no bundle tarball under $DIR (metadata-only change, or the binary is delivered out-of-band) — A/B report not applicable"
echo "skip=1" >> "$GITHUB_OUTPUT"; exit 0
fi
echo "Using bundle: $BUNDLE"
ROOT=/tmp/app; rm -rf "$ROOT"; mkdir -p "$ROOT"; tar -xzf "$BUNDLE" -C "$ROOT"

if [ ! -f "$ROOT/install.json" ]; then
echo "::notice::$APP ships no install.json (HTTP app, or a cli whose binary is not delivered) — A/B report not applicable"
echo "skip=1" >> "$GITHUB_OUTPUT"; exit 0
fi

NS=$(jq -r '.namespace // empty' "$DIR/submission.json" 2>/dev/null || true)
[ -n "$NS" ] || NS="${APP##*.}"
BIN=$(jq -r '.binary.path' "$ROOT/manifest.json")
chmod +x "$ROOT/$BIN"
CMD=$(jq -r '.command' "$ROOT/install.json")
EXEC=$(jq -r --arg c "$CMD" '.assets[] | select((.exec_path|split("/")|last)==$c or .exec_path==$c) | .exec_path' "$ROOT/install.json" | head -1)
if [ -z "$EXEC" ]; then echo "::error::could not resolve the command exec_path from install.json"; exit 1; fi
echo "ns=$NS root=$ROOT bin=$BIN exec=$EXEC"
{
echo "ns=$NS"; echo "root=$ROOT"; echo "exec=$EXEC";
} >> "$GITHUB_OUTPUT"

# Run the adapter exactly as the daemon would; it stages the host's
# artifacts from the R2 registry on startup (needs network egress).
"$ROOT/$BIN" --socket "$ROOT/app.sock" --manifest "$ROOT/manifest.json" > /tmp/adapter.log 2>&1 &
echo $! > /tmp/adapter.pid
for i in $(seq 1 90); do [ -S "$ROOT/app.sock" ] && break; sleep 2; done
if [ ! -S "$ROOT/app.sock" ]; then
echo "::error::adapter socket never appeared (staging from R2 likely failed for this runner's os/arch)"
echo "----- adapter log -----"; cat /tmp/adapter.log || true
exit 1
fi
echo "Adapter up; staged binary: $ROOT/$EXEC"

- name: Run A/B report
if: steps.stage.outputs.skip != '1'
run: |
set -euo pipefail
ROOT="${{ steps.stage.outputs.root }}"
python3 scripts/ab_report.py \
--app "${{ steps.app.outputs.app }}" --ns "${{ steps.stage.outputs.ns }}" \
--mode socket --socket "$ROOT/app.sock" --ipc-call /tmp/ipc-call \
--vanilla "$ROOT/${{ steps.stage.outputs.exec }}" \
--submissions ./submissions --out ab-report.html

- name: Stop adapter
if: always()
run: '[ -f /tmp/adapter.pid ] && kill "$(cat /tmp/adapter.pid)" 2>/dev/null || true'

- name: Upload report
if: steps.stage.outputs.skip != '1'
uses: actions/upload-artifact@v4
with:
name: ab-report-${{ steps.app.outputs.app }}
path: ab-report.html

- name: Comment on the PR
if: steps.stage.outputs.skip != '1' && github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const app = '${{ steps.app.outputs.app }}';
const html = fs.readFileSync('ab-report.html', 'utf8');
const re = /<tr><td>([^<]+)<\/td><td class='r'>(\d+)<\/td><td class='r'>(\d+)<\/td><td class='r'>([^<]+)<\/td><td>([^<]+)<\/td><\/tr>/g;
let table = '| Command | Vanilla ms | Pilot ms | Δ | Exit match |\n|---|--:|--:|--:|:--:|\n';
let n = 0;
for (const m of html.matchAll(re)) { table += `| ${m[1]} | ${m[2]} | ${m[3]} | ${m[4]} | ${m[5]} |\n`; n++; }
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
const marker = `<!-- ab-report:${app} -->`;
const body = `${marker}\n### A/B report — \`${app}\` (vanilla vs Pilot)\n\n` +
`Equivalent commands run two ways: the vanilla CLI binary vs the Pilot adapter (socket mode). ` +
`CI runs **non-VM** commands only (GitHub runners have no KVM). ` +
`Full HTML report (commands, outputs, timings, adapter-generated help): ` +
`**[download from the run artifacts](${runUrl})**.\n\n${n ? table : '_No command pairs ran._'}`;
const { owner, repo } = context.repo;
const issue_number = context.issue.number;
const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number });
const prev = comments.find(c => c.body && c.body.includes(marker));
if (prev) await github.rest.issues.updateComment({ owner, repo, comment_id: prev.id, body });
else await github.rest.issues.createComment({ owner, repo, issue_number, body });
2 changes: 1 addition & 1 deletion cmd/publish-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ func (s *server) apiArtifactPresign(w http.ResponseWriter, r *http.Request) {
// installs work off a stable URL even when the bucket has no public domain.
func (s *server) artifactProxy(w http.ResponseWriter, r *http.Request) {
if s.r2 == nil {
http.Error(w, "artifact registry not configured", 503)
http.Error(w, "artifact registry not configured", http.StatusServiceUnavailable)
return
}
key := strings.TrimPrefix(r.URL.Path, "/artifact/")
Expand Down
63 changes: 63 additions & 0 deletions docs/CI-AB-REPORT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# CI: per-app vanilla-vs-Pilot A/B report

`.github/workflows/ab-report.yml` runs an A/B report when a **publish PR** touches
`submissions/<id>/`. It runs each equivalent command two ways — the vanilla CLI
binary vs the Pilot adapter — and posts an HTML report (commands, outputs, exit
codes, timings, and the adapter-generated `<ns>.help`).

## How it works

1. **Detect** the app id from the changed `submissions/<id>/` path (or the
`workflow_dispatch` `app_id` input).
2. **Extract** the committed bundle (prefers `*linux-amd64*.tar.gz` for the
ubuntu runner) → `manifest.json`, `bin/<adapter>`, `install.json`.
3. **Stage**: run the adapter with `--socket/--manifest`. On startup it fetches
this host's artifacts from the R2 registry (per `install.json`), sha-verifies,
and stages them — exactly as the daemon-spawned adapter does. The staged
`exec_path` binary is the *vanilla* side.
4. **Run** `scripts/ab_report.py --mode socket`, driving the adapter through
`cmd/ipc-call` (no daemon needed), with the per-app command set.
5. **Publish**: upload `ab-report.html` as a run artifact and upsert a PR comment
with the summary table + a link to the artifact.

## ⚠️ No VM boots in CI

GitHub-hosted runners have **no nested virtualization (KVM)**, so VM-launching
commands (`smolvm machine run …`) cannot run there. Keep the CI command set to
non-VM commands that still prove the adapter forwards the full surface:
`--version`, `--help`, and subcommand `--help` (e.g. `machine --help`,
`pack --help`). Run microVM workloads locally with `--mode pilotctl` against a
daemon (see `scripts/ab_report.py`).

## Per-app command set

Add `submissions/<id>/ab-commands.json`:

```json
{
"commands": [
{"label": "Version", "vanilla": ["--version"], "method": "<ns>.version", "payload": {}},
{"label": "machine --help", "vanilla": ["machine","--help"],
"method": "<ns>.exec", "payload": {"args": ["machine","--help"]}}
]
}
```

- `vanilla` — argv passed to the staged binary directly.
- `method` + `payload` — the adapter method and JSON args (use the enumerated
method for `version`; the passthrough `<ns>.exec` for everything else).

If the file is absent, a built-in default runs `--version` and `--help` via the
passthrough exec method. See `submissions/io.pilot.smolvm/ab-commands.json`.

## Requirements / limitations

- **Platform artifact**: the report runs on `ubuntu-latest`, so the submission's
R2 artifacts must include a **linux/amd64** build for the adapter to stage.
Apps that ship only other platforms (e.g. darwin/arm64) will skip with a clear
error — point the workflow at a matching runner if needed (e.g. `macos-14` for
darwin/arm64-only apps).
- **Applicability**: only cli apps that **deliver a binary** (ship `install.json`)
get a report. HTTP apps and cli apps whose binary is assumed-present are
skipped with a notice.
- **Network**: the runner needs egress to the R2 public URL to stage artifacts.
16 changes: 0 additions & 16 deletions internal/publish/r2.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"net/url"
"os"
"sort"
"strconv"
Expand Down Expand Up @@ -179,18 +178,3 @@ func rfc3986Escape(s string, encodeSlash bool) string {
}
return b.String()
}

// parsePublicKey extracts the object key from a public/proxy URL, for validating
// that a submitted artifact url points into our registry.
func (r *R2) keyFromURL(raw string) (string, bool) {
u, err := url.Parse(raw)
if err != nil {
return "", false
}
if r.PublicBase != "" {
if pb, err := url.Parse(r.PublicBase); err == nil && u.Host == pb.Host {
return strings.TrimPrefix(u.Path, "/"), true
}
}
return "", false
}
Loading
Loading