pilot-protocol · Alexgodoroja · Jun 22, 2026 · Jun 24, 2026 · Jun 24, 2026
diff --git a/.github/workflows/ab-report.yml b/.github/workflows/ab-report.yml
@@ -0,0 +1,143 @@
+name: A/B report (vanilla vs Pilot)
+
+# When a publish PR touches submissions/<id>/, run the equivalent commands two
+# ways — the vanilla CLI binary vs the Pilot adapter (socket mode, no daemon) —
+# and post an HTML A/B report. Limitation: GitHub-hosted runners have no nested
+# virtualization (KVM), so VM-launching commands cannot run here. The per-app
+# command set (submissions/<id>/ab-commands.json) must therefore stay non-VM
+# (version / help / subcommand --help). See docs/CI-AB-REPORT.md.
+
+on:
+  pull_request:
+    paths: ['submissions/**']
+  workflow_dispatch:
+    inputs:
+      app_id:
+        description: 'App id, e.g. io.pilot.smolvm'
+        required: true
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  ab-report:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+
+      - name: Detect app id
+        id: app
+        run: |
+          set -euo pipefail
+          if [ -n "${{ github.event.inputs.app_id }}" ]; then
+            APP="${{ github.event.inputs.app_id }}"
+          else
+            APP=$(git diff --name-only "origin/${{ github.base_ref }}...HEAD" -- 'submissions/**' \
+              | sed -nE 's#^submissions/([^/]+)/.*#\1#p' | sort -u | head -1)
+          fi
+          if [ -z "$APP" ]; then echo "::error::could not detect an app id under submissions/"; exit 1; fi
+          echo "app=$APP" >> "$GITHUB_OUTPUT"
+          echo "Detected app: $APP"
+
+      - name: Build ipc-call
+        run: go build -o /tmp/ipc-call ./cmd/ipc-call
+
+      - name: Extract bundle and stage the adapter
+        id: stage
+        run: |
+          set -euo pipefail
+          APP="${{ steps.app.outputs.app }}"
+          DIR="submissions/$APP"
+          # Prefer the linux/amd64 bundle (this runner's arch); fall back to any.
+          BUNDLE=$(ls "$DIR"/*linux-amd64*.tar.gz 2>/dev/null | head -1 || true)
+          [ -n "$BUNDLE" ] || BUNDLE=$(ls "$DIR"/*.tar.gz 2>/dev/null | head -1 || true)
+          if [ -z "$BUNDLE" ]; then
+            echo "::notice::$APP ships no bundle tarball under $DIR (metadata-only change, or the binary is delivered out-of-band) — A/B report not applicable"
+            echo "skip=1" >> "$GITHUB_OUTPUT"; exit 0
+          fi
+          echo "Using bundle: $BUNDLE"
+          ROOT=/tmp/app; rm -rf "$ROOT"; mkdir -p "$ROOT"; tar -xzf "$BUNDLE" -C "$ROOT"
+
+          if [ ! -f "$ROOT/install.json" ]; then
+            echo "::notice::$APP ships no install.json (HTTP app, or a cli whose binary is not delivered) — A/B report not applicable"
+            echo "skip=1" >> "$GITHUB_OUTPUT"; exit 0
+          fi
+
+          NS=$(jq -r '.namespace // empty' "$DIR/submission.json" 2>/dev/null || true)
+          [ -n "$NS" ] || NS="${APP##*.}"
+          BIN=$(jq -r '.binary.path' "$ROOT/manifest.json")
+          chmod +x "$ROOT/$BIN"
+          CMD=$(jq -r '.command' "$ROOT/install.json")
+          EXEC=$(jq -r --arg c "$CMD" '.assets[] | select((.exec_path|split("/")|last)==$c or .exec_path==$c) | .exec_path' "$ROOT/install.json" | head -1)
+          if [ -z "$EXEC" ]; then echo "::error::could not resolve the command exec_path from install.json"; exit 1; fi
+          echo "ns=$NS root=$ROOT bin=$BIN exec=$EXEC"
+          {
+            echo "ns=$NS"; echo "root=$ROOT"; echo "exec=$EXEC";
+          } >> "$GITHUB_OUTPUT"
+
+          # Run the adapter exactly as the daemon would; it stages the host's
+          # artifacts from the R2 registry on startup (needs network egress).
+          "$ROOT/$BIN" --socket "$ROOT/app.sock" --manifest "$ROOT/manifest.json" > /tmp/adapter.log 2>&1 &
+          echo $! > /tmp/adapter.pid
+          for i in $(seq 1 90); do [ -S "$ROOT/app.sock" ] && break; sleep 2; done
+          if [ ! -S "$ROOT/app.sock" ]; then
+            echo "::error::adapter socket never appeared (staging from R2 likely failed for this runner's os/arch)"
+            echo "----- adapter log -----"; cat /tmp/adapter.log || true
+            exit 1
+          fi
+          echo "Adapter up; staged binary: $ROOT/$EXEC"
+
+      - name: Run A/B report
+        if: steps.stage.outputs.skip != '1'
+        run: |
+          set -euo pipefail
+          ROOT="${{ steps.stage.outputs.root }}"
+          python3 scripts/ab_report.py \
+            --app "${{ steps.app.outputs.app }}" --ns "${{ steps.stage.outputs.ns }}" \
+            --mode socket --socket "$ROOT/app.sock" --ipc-call /tmp/ipc-call \
+            --vanilla "$ROOT/${{ steps.stage.outputs.exec }}" \
+            --submissions ./submissions --out ab-report.html
+
+      - name: Stop adapter
+        if: always()
+        run: '[ -f /tmp/adapter.pid ] && kill "$(cat /tmp/adapter.pid)" 2>/dev/null || true'
+
+      - name: Upload report
+        if: steps.stage.outputs.skip != '1'
+        uses: actions/upload-artifact@v4
+        with:
+          name: ab-report-${{ steps.app.outputs.app }}
+          path: ab-report.html
+
+      - name: Comment on the PR
+        if: steps.stage.outputs.skip != '1' && github.event_name == 'pull_request'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const app = '${{ steps.app.outputs.app }}';
+            const html = fs.readFileSync('ab-report.html', 'utf8');
+            const re = /<tr><td>([^<]+)<\/td><td class='r'>(\d+)<\/td><td class='r'>(\d+)<\/td><td class='r'>([^<]+)<\/td><td>([^<]+)<\/td><\/tr>/g;
+            let table = '| Command | Vanilla ms | Pilot ms | Δ | Exit match |\n|---|--:|--:|--:|:--:|\n';
+            let n = 0;
+            for (const m of html.matchAll(re)) { table += `| ${m[1]} | ${m[2]} | ${m[3]} | ${m[4]} | ${m[5]} |\n`; n++; }
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const marker = `<!-- ab-report:${app} -->`;
+            const body = `${marker}\n### A/B report — \`${app}\` (vanilla vs Pilot)\n\n` +
+              `Equivalent commands run two ways: the vanilla CLI binary vs the Pilot adapter (socket mode). ` +
+              `CI runs **non-VM** commands only (GitHub runners have no KVM). ` +
+              `Full HTML report (commands, outputs, timings, adapter-generated help): ` +
+              `**[download from the run artifacts](${runUrl})**.\n\n${n ? table : '_No command pairs ran._'}`;
+            const { owner, repo } = context.repo;
+            const issue_number = context.issue.number;
+            const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number });
+            const prev = comments.find(c => c.body && c.body.includes(marker));
+            if (prev) await github.rest.issues.updateComment({ owner, repo, comment_id: prev.id, body });
+            else await github.rest.issues.createComment({ owner, repo, issue_number, body });
diff --git a/cmd/publish-server/main.go b/cmd/publish-server/main.go
@@ -294,7 +294,7 @@ func (s *server) apiArtifactPresign(w http.ResponseWriter, r *http.Request) {
 // installs work off a stable URL even when the bucket has no public domain.
 func (s *server) artifactProxy(w http.ResponseWriter, r *http.Request) {
 	if s.r2 == nil {
-		http.Error(w, "artifact registry not configured", 503)
+		http.Error(w, "artifact registry not configured", http.StatusServiceUnavailable)
 		return
 	}
 	key := strings.TrimPrefix(r.URL.Path, "/artifact/")

diff --git a/docs/CI-AB-REPORT.md b/docs/CI-AB-REPORT.md
@@ -0,0 +1,63 @@
+# CI: per-app vanilla-vs-Pilot A/B report
+
+`.github/workflows/ab-report.yml` runs an A/B report when a **publish PR** touches
+`submissions/<id>/`. It runs each equivalent command two ways — the vanilla CLI
+binary vs the Pilot adapter — and posts an HTML report (commands, outputs, exit
+codes, timings, and the adapter-generated `<ns>.help`).
+
+## How it works
+
+1. **Detect** the app id from the changed `submissions/<id>/` path (or the
+   `workflow_dispatch` `app_id` input).
+2. **Extract** the committed bundle (prefers `*linux-amd64*.tar.gz` for the
+   ubuntu runner) → `manifest.json`, `bin/<adapter>`, `install.json`.
+3. **Stage**: run the adapter with `--socket/--manifest`. On startup it fetches
+   this host's artifacts from the R2 registry (per `install.json`), sha-verifies,
+   and stages them — exactly as the daemon-spawned adapter does. The staged
+   `exec_path` binary is the *vanilla* side.
+4. **Run** `scripts/ab_report.py --mode socket`, driving the adapter through
+   `cmd/ipc-call` (no daemon needed), with the per-app command set.
+5. **Publish**: upload `ab-report.html` as a run artifact and upsert a PR comment
+   with the summary table + a link to the artifact.
+
+## ⚠️ No VM boots in CI
+
+GitHub-hosted runners have **no nested virtualization (KVM)**, so VM-launching
+commands (`smolvm machine run …`) cannot run there. Keep the CI command set to
+non-VM commands that still prove the adapter forwards the full surface:
+`--version`, `--help`, and subcommand `--help` (e.g. `machine --help`,
+`pack --help`). Run microVM workloads locally with `--mode pilotctl` against a
+daemon (see `scripts/ab_report.py`).
+
+## Per-app command set
+
+Add `submissions/<id>/ab-commands.json`:
+
+```json
+{
+  "commands": [
+    {"label": "Version", "vanilla": ["--version"], "method": "<ns>.version", "payload": {}},
+    {"label": "machine --help", "vanilla": ["machine","--help"],
+     "method": "<ns>.exec", "payload": {"args": ["machine","--help"]}}
+  ]
+}
+```
+
+- `vanilla` — argv passed to the staged binary directly.
+- `method` + `payload` — the adapter method and JSON args (use the enumerated
+  method for `version`; the passthrough `<ns>.exec` for everything else).
+
+If the file is absent, a built-in default runs `--version` and `--help` via the
+passthrough exec method. See `submissions/io.pilot.smolvm/ab-commands.json`.
+
+## Requirements / limitations
+
+- **Platform artifact**: the report runs on `ubuntu-latest`, so the submission's
+  R2 artifacts must include a **linux/amd64** build for the adapter to stage.
+  Apps that ship only other platforms (e.g. darwin/arm64) will skip with a clear
+  error — point the workflow at a matching runner if needed (e.g. `macos-14` for
+  darwin/arm64-only apps).
+- **Applicability**: only cli apps that **deliver a binary** (ship `install.json`)
+  get a report. HTTP apps and cli apps whose binary is assumed-present are
+  skipped with a notice.
+- **Network**: the runner needs egress to the R2 public URL to stage artifacts.
diff --git a/internal/publish/r2.go b/internal/publish/r2.go
@@ -5,7 +5,6 @@ import (
 	"crypto/sha256"
 	"encoding/hex"
 	"fmt"
-	"net/url"
 	"os"
 	"sort"
 	"strconv"
@@ -179,18 +178,3 @@ func rfc3986Escape(s string, encodeSlash bool) string {
 	}
 	return b.String()
 }
-
-// parsePublicKey extracts the object key from a public/proxy URL, for validating
-// that a submitted artifact url points into our registry.
-func (r *R2) keyFromURL(raw string) (string, bool) {
-	u, err := url.Parse(raw)
-	if err != nil {
-		return "", false
-	}
-	if r.PublicBase != "" {
-		if pb, err := url.Parse(r.PublicBase); err == nil && u.Host == pb.Host {
-			return strings.TrimPrefix(u.Path, "/"), true
-		}
-	}
-	return "", false
-}