diff --git a/cmd/pilot-app/main.go b/cmd/pilot-app/main.go index ee0b5b1..8e89c28 100644 --- a/cmd/pilot-app/main.go +++ b/cmd/pilot-app/main.go @@ -8,12 +8,16 @@ package main import ( + "archive/tar" + "bytes" + "compress/gzip" "crypto/ed25519" "crypto/sha256" "encoding/hex" "encoding/json" "flag" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -212,6 +216,7 @@ func cmdVerifySubmission(args []string) { fatalf("%v", err) } defer os.RemoveAll(tmp) + hasArtifacts := len(sub.Artifacts) > 0 allOK := true for _, p := range b.Platforms { path := filepath.Join(tmp, p.TarballName) @@ -231,6 +236,19 @@ func cmdVerifySubmission(args []string) { } fmt.Printf(" %s %-34s %s\n", mark, c.Name, c.Msg) } + // Close the native-delivery gap: a submission that declares `artifacts` + // MUST yield a bundle that actually carries install.json AND a manifest + // wired for staging (the fs.write $APP grant the asset-aware adapter + // needs). Absent either, the published app would have no binary to run — + // exactly the silent breakage that slipped through before this check. + if hasArtifacts { + if msg, ok := checkStaging(p.Tarball); ok { + fmt.Printf(" ✓ %-34s %s\n", "native-delivery (install.json)", msg) + } else { + fmt.Printf(" ✗ %-34s %s\n", "native-delivery (install.json)", msg) + allOK = false + } + } } if !allOK { fmt.Fprintln(os.Stderr, "\nVERIFY FAILED — fix the ✗ items before submitting.") @@ -239,6 +257,104 @@ func cmdVerifySubmission(args []string) { fmt.Printf("\nVERIFY OK — built + verified %d platform(s) from the submission spec.\n", len(b.Platforms)) } +// checkStaging inspects one built platform tarball and confirms the native +// asset-delivery wiring is present: install.json (the registry staging spec the +// adapter reads at startup) AND a manifest whose grants include fs.write $APP +// (the capability the staging runtime needs to write the fetched binary). It +// returns a human-readable message and whether the bundle is staging-ready. +// +// This is what closes the false-pass: before this, a submission that declared +// artifacts but whose build dropped them (no install.json, no staging grant) +// still passed verify-submission, because the catalogue gate only checks the +// binary sha/signature and is blind to install.json. +func checkStaging(tarball []byte) (string, bool) { + files, err := tarballFiles(tarball) + if err != nil { + return fmt.Sprintf("read bundle: %v", err), false + } + spec, hasInstall := files["./install.json"] + if !hasInstall { + if _, alt := files["install.json"]; alt { + spec, hasInstall = files["install.json"], true + } + } + if !hasInstall { + return "submission declares artifacts but the built bundle has NO install.json — the adapter would have no binary to stage", false + } + // install.json must name a command and at least one asset. + var is struct { + Command string `json:"command"` + Assets []struct { + ExecPath string `json:"exec_path"` + } `json:"assets"` + } + if err := json.Unmarshal(spec, &is); err != nil { + return fmt.Sprintf("install.json present but unparseable: %v", err), false + } + if len(is.Assets) == 0 { + return "install.json present but lists no assets", false + } + // The manifest must grant fs.write $APP so the staging runtime can write the + // fetched binary under $APP — proof the adapter is wired for delivery, not + // just that the spec file rode along. + mfRaw, hasMf := files["./manifest.json"] + if !hasMf { + mfRaw, hasMf = files["manifest.json"] + } + if !hasMf { + return "install.json present but manifest.json missing from bundle", false + } + var mf struct { + Grants []struct { + Cap string `json:"cap"` + Target string `json:"target"` + } `json:"grants"` + } + if err := json.Unmarshal(mfRaw, &mf); err != nil { + return fmt.Sprintf("manifest.json unparseable: %v", err), false + } + stagingGrant := false + for _, g := range mf.Grants { + if g.Cap == "fs.write" && g.Target == "$APP" { + stagingGrant = true + break + } + } + if !stagingGrant { + return "install.json present but manifest lacks the fs.write $APP grant the staging adapter needs", false + } + return fmt.Sprintf("install.json + staging grant present (%d asset(s))", len(is.Assets)), true +} + +// tarballFiles reads a gzipped tar bundle into a map of header-name → contents. +func tarballFiles(tarball []byte) (map[string][]byte, error) { + gz, err := gzip.NewReader(bytes.NewReader(tarball)) + if err != nil { + return nil, err + } + defer gz.Close() + out := map[string][]byte{} + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if hdr.Typeflag != tar.TypeReg { + continue + } + b, err := io.ReadAll(tr) + if err != nil { + return nil, err + } + out[hdr.Name] = b + } + return out, nil +} + func cmdSubmit(args []string) { fs := flag.NewFlagSet("submit", flag.ExitOnError) dir := fs.String("C", ".", "project dir (holds manifest.json + the built tarball)") diff --git a/cmd/pilot-app/verify_staging_test.go b/cmd/pilot-app/verify_staging_test.go new file mode 100644 index 0000000..3e464e2 --- /dev/null +++ b/cmd/pilot-app/verify_staging_test.go @@ -0,0 +1,153 @@ +package main + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "crypto/ed25519" + "io" + "testing" + + "github.com/pilot-protocol/app-template/internal/publish" +) + +// cliArtifactsSubmission is the miren-shaped fixture: a cli backend (id +// io.pilot.) whose binary is DELIVERED from the R2 artifact registry — the +// artifacts[] step with per-platform os/arch/url/sha256/unpack/exec_path. This +// is exactly the shape whose absence on main produced broken published bundles. +func cliArtifactsSubmission() publish.Submission { + return publish.Submission{ + ID: "io.pilot.miren", + Version: "0.1.0", + Description: "Delivers and fronts the miren CLI from the registry.", + Email: "ops@pilotprotocol.network", + Backend: publish.SubBackend{ + Type: "cli", + Command: []string{"miren"}, + }, + Methods: []publish.SubMethod{ + {Name: "miren.version", Description: "Print the miren version.", Latency: "fast", + CLI: publish.SubCLIRoute{Args: []string{"version"}}}, + {Name: "miren.exec", Description: "Run any miren subcommand.", Latency: "med", + Params: []publish.SubParam{{Name: "args", Type: "array"}}, + CLI: publish.SubCLIRoute{Passthrough: true}}, + }, + Listing: publish.SubListing{DisplayName: "Miren", License: "MIT", Categories: []string{"dev"}, AppDescription: "Miren on Pilot."}, + Vendor: publish.SubVendor{Name: "Pilot", AgentUsage: "agents drive miren", Capabilities: "microvm"}, + Artifacts: []publish.SubArtifact{ + {OS: "darwin", Arch: "arm64", URL: "https://pub-x.r2.dev/io.pilot.miren/0.1.0/darwin-arm64/miren.tar.gz", + SHA256: "1111111111111111111111111111111111111111111111111111111111111111", + Unpack: "tar.gz", ExecPath: "miren-0.1.0-darwin-arm64/miren", Order: 1}, + {OS: "linux", Arch: "amd64", URL: "https://pub-x.r2.dev/io.pilot.miren/0.1.0/linux-amd64/miren", + SHA256: "2222222222222222222222222222222222222222222222222222222222222222", + ExecPath: "bin/miren", Order: 1}, + }, + } +} + +// TestVerifyStagingGate_BuildsWiredBundle proves the positive path: a cli +// submission WITH artifacts builds a bundle that actually contains install.json +// and the StageAssets-wired adapter (manifest fs.write $APP grant), so +// checkStaging passes on every platform. +func TestVerifyStagingGate_BuildsWiredBundle(t *testing.T) { + if testing.Short() { + t.Skip("cross-compiles the adapter for all platforms; skipped under -short") + } + sub := cliArtifactsSubmission() + if errs := sub.Validate(); len(errs) != 0 { + t.Fatalf("fixture must validate, got: %v", errs) + } + _, priv, err := ed25519.GenerateKey(nil) + if err != nil { + t.Fatal(err) + } + b, err := publish.BuildBundle(sub.ToConfig(), priv) + if err != nil { + t.Fatalf("BuildBundle: %v", err) + } + if len(b.Platforms) == 0 { + t.Fatal("no platforms built") + } + for _, p := range b.Platforms { + msg, ok := checkStaging(p.Tarball) + if !ok { + t.Errorf("platform %s: staging check must pass for an artifact-delivering app, got: %s", p.Platform, msg) + } + } +} + +// TestVerifyStagingGate_FailsWhenStagingStripped is the regression guard: if a +// build produced platform bundles WITHOUT install.json (the exact silent +// breakage that let broken bundles publish), checkStaging — and therefore +// verify-submission — must FAIL. We simulate a stripped bundle by rebuilding the +// tarball without install.json and without the fs.write $APP grant. +func TestVerifyStagingGate_FailsWhenStagingStripped(t *testing.T) { + if testing.Short() { + t.Skip("cross-compiles the adapter; skipped under -short") + } + sub := cliArtifactsSubmission() + _, priv, err := ed25519.GenerateKey(nil) + if err != nil { + t.Fatal(err) + } + b, err := publish.BuildBundle(sub.ToConfig(), priv) + if err != nil { + t.Fatalf("BuildBundle: %v", err) + } + stripped := stripStaging(t, b.Primary().Tarball) + if msg, ok := checkStaging(stripped); ok { + t.Fatalf("staging check MUST fail for a bundle missing install.json, but it passed: %s", msg) + } +} + +// stripStaging rewrites a bundle tarball dropping install.json/install.sh and +// the manifest's fs.write $APP grant — modelling a build that declared artifacts +// but silently failed to wire native delivery. +func stripStaging(t *testing.T, tarball []byte) []byte { + t.Helper() + gz, err := gzip.NewReader(bytes.NewReader(tarball)) + if err != nil { + t.Fatal(err) + } + defer gz.Close() + var buf bytes.Buffer + outGz := gzip.NewWriter(&buf) + tw := tar.NewWriter(outGz) + tr := tar.NewReader(gz) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + name := hdr.Name + if name == "./install.json" || name == "install.json" || + name == "./install.sh" || name == "install.sh" { + continue // drop the staging spec + } + body, err := io.ReadAll(tr) + if err != nil { + t.Fatal(err) + } + if name == "./manifest.json" || name == "manifest.json" { + body = bytes.ReplaceAll(body, + []byte(`{"cap": "fs.write", "target": "$APP"},`), nil) + } + hdr.Size = int64(len(body)) + if err := tw.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err := tw.Write(body); err != nil { + t.Fatal(err) + } + } + if err := tw.Close(); err != nil { + t.Fatal(err) + } + if err := outGz.Close(); err != nil { + t.Fatal(err) + } + return buf.Bytes() +} diff --git a/docs/NATIVE-APPS.md b/docs/NATIVE-APPS.md index e2f3b7c..558fa4e 100644 --- a/docs/NATIVE-APPS.md +++ b/docs/NATIVE-APPS.md @@ -1,6 +1,16 @@ # Native (binary-delivery) apps — design -> Status: DESIGN + TODO. Native/CLI apps are **Coming soon** — blocked at the +> **SUPERSEDED (2026-06-22) for the delivery model.** This doc proposed delivering +> native binaries *by reference* (customer-hosted URL, "we never store the bytes"). +> The shipped implementation instead **hosts the bytes in a Pilot-run Cloudflare +> R2 artifact registry**: the publisher uploads per-OS/arch binaries in the +> publish form's Artifacts step, and the generated cli adapter fetches + verifies +> + stages + execs them at install (with install order + optional args). See +> **`docs/R2-ARTIFACT-REGISTRY.md`** for the canonical, implemented design. The +> `assets[]` schema and the daemon-side staging notes below remain useful +> background, but where they disagree with R2-ARTIFACT-REGISTRY.md, that doc wins. + +> Status (original): DESIGN + TODO. Native/CLI apps are **Coming soon** — blocked at the > wizard's type step; only HTTP (translation-only) apps ship today. Decision > (2026-06-17): native apps deliver the real binary via a **customer-hosted URL + > per-OS/arch sha256**, pinned in the signed manifest and **fetched + verified + diff --git a/docs/R2-ARTIFACT-REGISTRY.md b/docs/R2-ARTIFACT-REGISTRY.md new file mode 100644 index 0000000..c4b33e7 --- /dev/null +++ b/docs/R2-ARTIFACT-REGISTRY.md @@ -0,0 +1,115 @@ +# R2 Artifact Registry — native binary delivery for cli apps + +> Status: IMPLEMENTED (RC). Lets the Pilot app store **host** publisher-supplied, +> platform-specific, versioned, signed binaries in Cloudflare R2 and install them +> — in a declared order, with optional install args — via the generated cli +> adapter. Builds on the cli-app support (proc.exec + CLI adapter) from +> app-store#24 / app-template#31. **Supersedes** the "deliver by reference, never +> store the bytes" stance in `NATIVE-APPS.md`: we now store the bytes in R2. + +## Why + +`NATIVE-APPS.md` / `CLI-ADAPTER.md` shipped the *translation* half (a cli adapter +that execs a local command under `proc.exec`) but assumed the binary was already +on the host. Delivering it is the point of a store. This adds the *delivery* +half: the publisher uploads per-OS/arch binaries to a Pilot-run R2 registry, and +the adapter fetches + verifies + stages them at install. + +## The flow + +``` +PUBLISH FORM (Artifacts step) BUILD (publish-api) INSTALL (host) +upload binaries → R2 ─────▶ generate adapter + install.json ─▶ adapter staging (stage.go) +set install order + args fold into the bundle tarball fetch R2 → verify sha → stage + (sha-pinned in the catalogue) → run install args (order) + → exec the staged command +``` + +1. **Artifacts step** (publish form, website). The publisher uploads each + platform binary (or `.tar.gz`) to the R2 registry and sets, per artifact: + target `os`/`arch`, `exec_path`, install `order`, optional install `args`, and + `unpack` for archives. The form submits a JSON `Submission` carrying + `artifacts[]` (R2 url + sha256 + order + args — never the bytes). +2. **Submit** (`POST /api/submit`). `Submission.Validate()` checks the artifacts + (cli-only, known os/arch, https URL, 64-hex sha, relative `exec_path` under + `$APP`, per-platform-unique order). The sha is the integrity anchor. +3. **Build** (`/admin/build` → `BuildBundle`). In addition to the signed adapter, + the build emits **`install.json`** (the staging spec, from `cfg.Assets`) into + every platform tarball, and the manifest gains the delivery grants + (`proc.exec`, `fs.write $APP`, `net.dial `). The whole tarball is + sha-pinned in the catalogue, so `install.json` (and the expected asset shas) + can't be altered undetected. +4. **Install + call** (host). The generated cli adapter calls `StageAssets($APP)` + on first spawn (`internal/backend/stage.go`): read `install.json` → select the + asset(s) for `runtime.GOOS/GOARCH` → in ascending `order`, fetch from R2, + verify sha256, stage under `$APP` (single file, or `tar.gz` extracted via the + host `tar`), run any install `args` — then exec the staged `exec_path` per call. + +## R2 layout + +``` +s3://pilot-artifacts-{dev,prod}///-/ + io.pilot.smolvm/1.2.0/darwin-arm64/smolvm-1.2.0-darwin-arm64.tar.gz +``` +Write-once (a new app version = a new prefix). Buckets `pilot-artifacts-dev` and +`pilot-artifacts-prod` exist on the Pilot R2 account. **Public read** is served by +an r2.dev managed URL (dev: `https://pub-2328865fa11041b8a5efba00b940ec14.r2.dev`); +production should attach a custom domain (e.g. `artifacts.pilotprotocol.network`). +Generated install scripts reference the public base URL. + +## Schema + +`pilot.app.yaml` / `scaffold.Config` gains `assets[]` (see `example.pilot.app.yaml`); +the publish `Submission` gains `artifacts[]`. Both map to: + +| field | meaning | +|---|---| +| `role` | `binary` (chmod +x, default) \| `data` | +| `os` / `arch` | host match: `linux`/`darwin`, `amd64`/`arm64` | +| `url` | https R2 public URL of the artifact | +| `sha256` | 64-hex of the uploaded object; verified after download | +| `unpack` | `""` (single file) \| `tar.gz` (extract under `$APP`) | +| `exec_path` | dest under `$APP`, or the path inside the extracted tree | +| `order` | ascending install sequence (unique per platform) | +| `args` | optional post-stage invocation (e.g. a one-time setup) | + +## Integrity & security + +- **sha256** on every asset, checked after download; mismatch refuses to install. +- The **bundle tarball is sha-pinned** in the catalogue, so `install.json` is + tamper-evident transitively (no app-store manifest-schema change needed). +- **`proc.exec`** (app-store#24) authorizes the exec; **`fs.write $APP`** and + **`net.dial `** authorize staging. cli apps ship `protection: guarded`. +- Archive extraction uses the host `tar` (handles GNU/sparse artifacts Go's + `archive/tar` rejects) **after** a name-scan that rejects absolute paths and + `..` traversal (zip-slip defence). + +## E2E + +`scripts/e2e-smolvm.sh` + `internal/scaffold/r2_e2e_test.go` (`TestR2AssetDeliveryE2E`): +download smolvm (`smol-machines/smolvm`, a real multi-file microVM CLI: wrapper + +binary + libs + sparse disk images) for the host, upload it to `pilot-artifacts-dev`, +then build the generated adapter and let it fetch+verify+extract from R2 and exec +it — asserting `smolvm --version → "smolvm 1.2.0"`. The Go test is env-gated +(`PILOT_E2E_ASSET_URL/_SHA256/_EXECPATH/...`) so CI needs no live bucket; the +script wires it up against the real registry. + +## Build / repo coordination + +| Repo | Role | +|---|---| +| **app-template** (this) | schema, build-time `install.json` gen, staging runtime, manifest grants, e2e — the bulk | +| **app-store** #24 | `proc.exec` capability (reused as the exec permission) | +| **pilotprotocol** #317 | daemon dep bump so it accepts `proc.exec` | +| website #44 | publish wizard cli path; **TODO**: add the Artifacts step (uploads + order/args) as a thin client over a presign endpoint | + +## Follow-ups + +- **Presign upload endpoint** (`POST /api/artifact/presign`) + a signing-proxy + `GET /artifact/...` so the form uploads straight to R2 and installs can run off + a stable proxy URL where a public domain isn't configured. (The e2e uploads via + the S3 API directly.) +- **Server-side re-verify** of each artifact sha against the stored R2 object at + submit time. +- Production **custom domain** for `pilot-artifacts-prod` (needs a Cloudflare API + token with R2 + DNS scope; the S3 keys can't enable public access). diff --git a/internal/publish/build.go b/internal/publish/build.go index 2835d03..e9b2227 100644 --- a/internal/publish/build.go +++ b/internal/publish/build.go @@ -82,6 +82,24 @@ func BuildBundle(cfg *scaffold.Config, priv ed25519.PrivateKey) (*Bundle, error) if err != nil { return nil, err } + // install.json (the registry staging spec, generated for asset-delivering + // apps) ships in every platform tarball alongside the manifest. It is the + // same across platforms — it carries each platform's asset — so it is staged + // into the shared bundle dir once, before the per-platform loop tars it. + if spec, err := os.ReadFile(filepath.Join(tmp, "install.json")); err == nil { + if err := os.MkdirAll(filepath.Join(tmp, "bundle"), 0o755); err != nil { + return nil, err + } + if err := os.WriteFile(filepath.Join(tmp, "bundle", "install.json"), spec, 0o644); err != nil { + return nil, fmt.Errorf("stage install.json into bundle: %w", err) + } + // install.sh ships beside it (transparency / direct-install path). + if script, err := os.ReadFile(filepath.Join(tmp, "install.sh")); err == nil { + if err := os.WriteFile(filepath.Join(tmp, "bundle", "install.sh"), script, 0o755); err != nil { + return nil, fmt.Errorf("stage install.sh into bundle: %w", err) + } + } + } var ( platforms []PlatformBundle diff --git a/internal/publish/cli_e2e_test.go b/internal/publish/cli_e2e_test.go index 68b30ef..db03cbf 100644 --- a/internal/publish/cli_e2e_test.go +++ b/internal/publish/cli_e2e_test.go @@ -150,6 +150,87 @@ func TestCLISubmissionBuildsAndVerifies(t *testing.T) { } } +// sampleCLIAssetsSubmission fronts a CLI that is DELIVERED from the R2 artifact +// registry rather than assumed-installed: it carries per-platform artifacts with +// install order + (here) a post-stage install arg. +func sampleCLIAssetsSubmission() Submission { + s := sampleCLISubmission() + s.ID = "io.pilot.toolx" + s.Description = "Delivers and fronts the toolx CLI." + s.Backend.Command = []string{"toolx"} + s.Methods = []SubMethod{ + {Name: "toolx.version", Description: "Print toolx version.", Latency: "fast", CLI: SubCLIRoute{Args: []string{"version"}}}, + {Name: "toolx.exec", Description: "Run any toolx subcommand.", Latency: "med", + Params: []SubParam{{Name: "args", Type: "array"}}, CLI: SubCLIRoute{Passthrough: true}}, + } + s.Artifacts = []SubArtifact{ + {OS: "darwin", Arch: "arm64", URL: "https://pub-x.r2.dev/io.pilot.toolx/0.1.0/darwin-arm64/toolx", + SHA256: "1111111111111111111111111111111111111111111111111111111111111111", ExecPath: "bin/toolx", Order: 1}, + {OS: "linux", Arch: "amd64", URL: "https://pub-x.r2.dev/io.pilot.toolx/0.1.0/linux-amd64/toolx", + SHA256: "2222222222222222222222222222222222222222222222222222222222222222", ExecPath: "bin/toolx", Order: 1, + Args: []string{"--version"}}, + } + return s +} + +// TestCLIAssetsSubmissionBuildsAndVerifies proves an artifact-delivering cli app +// builds through the real pipeline (scaffold → cross-compile → sign → catalogue +// self-verify) for every platform, ships install.json in the bundle, and the +// shipped manifest carries the delivery grants (proc.exec + fs.write $APP + +// net.dial to the registry host). +func TestCLIAssetsSubmissionBuildsAndVerifies(t *testing.T) { + if testing.Short() { + t.Skip("cross-compiles the cli adapter for all platforms; skipped under -short") + } + if errs := sampleCLIAssetsSubmission().Validate(); len(errs) != 0 { + t.Fatalf("a well-formed cli+assets submission must validate, got: %v", errs) + } + priv, err := LoadOrCreateKey(t.TempDir() + "/k.key") + if err != nil { + t.Fatal(err) + } + b, err := BuildBundle(sampleCLIAssetsSubmission().ToConfig(), priv) + if err != nil { + t.Fatalf("BuildBundle for an asset-delivering app failed: %v", err) + } + + // install.json must ship in the bundle and list both platforms. + spec := fileFromTarball(t, b.Primary().Tarball, "./install.json") + var got struct { + Command string `json:"command"` + Assets []struct { + OS, Arch, URL, SHA256, ExecPath string + Order int + Args []string + } `json:"assets"` + } + if err := json.Unmarshal(spec, &got); err != nil { + t.Fatalf("parse shipped install.json: %v", err) + } + if got.Command != "toolx" || len(got.Assets) != 2 { + t.Fatalf("install.json: want command=toolx + 2 assets, got %+v", got) + } + + // Manifest must carry the delivery grants on top of proc.exec. + mfRaw := fileFromTarball(t, b.Primary().Tarball, "./manifest.json") + var mf struct { + Grants []struct{ Cap, Target string } `json:"grants"` + } + if err := json.Unmarshal(mfRaw, &mf); err != nil { + t.Fatalf("parse shipped manifest: %v", err) + } + want := map[string]string{"proc.exec": "toolx", "fs.write": "$APP", "net.dial": "pub-x.r2.dev"} + have := map[string]string{} + for _, g := range mf.Grants { + have[g.Cap] = g.Target + } + for cap, target := range want { + if have[cap] != target { + t.Errorf("manifest missing grant %s=%q (got %q)", cap, target, have[cap]) + } + } +} + func hasSubErr(errs []string, substr string) bool { for _, e := range errs { if strings.Contains(e, substr) { diff --git a/internal/publish/submission.go b/internal/publish/submission.go index 0e86ec8..079192f 100644 --- a/internal/publish/submission.go +++ b/internal/publish/submission.go @@ -2,6 +2,7 @@ package publish import ( "fmt" + "net/url" "regexp" "sort" "strings" @@ -26,6 +27,31 @@ type Submission struct { Methods []SubMethod `json:"methods"` Listing SubListing `json:"listing"` Vendor SubVendor `json:"vendor"` + + // Artifacts is the native-binary delivery set for a cli app: the + // platform-specific binaries the publisher uploaded to the Pilot R2 artifact + // registry in the form's Artifacts step, with the install order and any + // optional install args. Empty for http apps and for cli apps whose command + // is already present on the host. ToConfig maps these to scaffold.Asset. + Artifacts []SubArtifact `json:"artifacts"` +} + +// SubArtifact is one uploaded, platform-specific, signed binary in the publish +// form's Artifacts step. URL is the R2 location returned by the presign upload; +// SHA256 is verified server-side against the stored object before the case is +// accepted, and again on the host at install. Mirrors scaffold.Asset. +type SubArtifact struct { + Role string `json:"role"` // "binary" (default) | "data" + Name string `json:"name"` // per-platform id (default: exec_path basename); referenced by deps + OS string `json:"os"` // linux | darwin + Arch string `json:"arch"` // amd64 | arm64 + URL string `json:"url"` // R2 public URL + SHA256 string `json:"sha256"` // 64-hex of the uploaded object + Unpack string `json:"unpack"` // "" (single file) | "tar.gz" (extract under $APP) + ExecPath string `json:"exec_path"` // dest under $APP, or path inside the extracted tree + Deps []string `json:"deps"` // names of same-platform artifacts installed first + Order int `json:"order"` // tiebreaker among independent artifacts (per platform) + Args []string `json:"args"` // optional post-stage install args } // SubBackend selects and configures the data plane the adapter forwards to: @@ -175,6 +201,7 @@ func (s Submission) Validate() []string { } else if !reURL.MatchString(strings.TrimSpace(s.Backend.BaseURL)) { e = append(e, "Backend base URL must be an absolute http(s) URL") } + e = append(e, s.validateArtifacts()...) if len(s.Methods) == 0 { e = append(e, "Add at least one method") } @@ -215,6 +242,62 @@ func (s Submission) Validate() []string { return e } +var ( + subSHA256 = regexp.MustCompile(`^[0-9a-f]{64}$`) + subOSOK = map[string]bool{"linux": true, "darwin": true} + subArchOK = map[string]bool{"amd64": true, "arm64": true} +) + +// validateArtifacts mirrors the scaffold asset rules at the submission boundary +// so a publisher gets clear, server-authoritative errors before any build: +// artifacts are cli-only, each names a known os/arch, an https R2 URL, a 64-hex +// sha256, and a relative exec_path under $APP; install order is unique per +// platform. (The sha is additionally re-verified against the stored R2 object on +// submit, and on the host at install.) +func (s Submission) validateArtifacts() []string { + if len(s.Artifacts) == 0 { + return nil + } + var e []string + if !s.Backend.IsCLI() { + e = append(e, "Artifacts (binary delivery) are only valid for a cli backend") + } + orders := map[string]bool{} + for i, a := range s.Artifacts { + role := a.Role + if role == "" { + role = "binary" + } + if role != "binary" && role != "data" { + e = append(e, fmt.Sprintf("Artifact %d: role %q must be binary or data", i+1, a.Role)) + } + if a.Unpack != "" && a.Unpack != "tar.gz" { + e = append(e, fmt.Sprintf("Artifact %d: unpack %q must be empty or \"tar.gz\"", i+1, a.Unpack)) + } + if !subOSOK[a.OS] { + e = append(e, fmt.Sprintf("Artifact %d: os %q must be linux or darwin", i+1, a.OS)) + } + if !subArchOK[a.Arch] { + e = append(e, fmt.Sprintf("Artifact %d: arch %q must be amd64 or arm64", i+1, a.Arch)) + } + if u, err := url.Parse(strings.TrimSpace(a.URL)); err != nil || u.Scheme != "https" || u.Host == "" { + e = append(e, fmt.Sprintf("Artifact %d: url must be an absolute https URL (the R2 upload location)", i+1)) + } + if !subSHA256.MatchString(a.SHA256) { + e = append(e, fmt.Sprintf("Artifact %d: sha256 must be 64 lowercase hex chars", i+1)) + } + if a.ExecPath == "" || strings.HasPrefix(a.ExecPath, "/") || strings.Contains(a.ExecPath, "..") { + e = append(e, fmt.Sprintf("Artifact %d: exec_path must be a relative path under $APP (no leading / or \"..\")", i+1)) + } + key := fmt.Sprintf("%s/%s#%d", a.OS, a.Arch, a.Order) + if orders[key] { + e = append(e, fmt.Sprintf("Artifact %d: duplicate install order %d for %s/%s", i+1, a.Order, a.OS, a.Arch)) + } + orders[key] = true + } + return e +} + // ToConfig derives the buildable adapter spec from the submission (the fields // the generator needs). Review-only fields (vendor free-text, agent-usage, // capabilities, binary URL) are intentionally not part of it. @@ -286,6 +369,12 @@ func (s Submission) ToConfig() *scaffold.Config { } cfg.Methods = append(cfg.Methods, method) } + for _, a := range s.Artifacts { + cfg.Assets = append(cfg.Assets, scaffold.Asset{ + Role: a.Role, Name: a.Name, OS: a.OS, Arch: a.Arch, URL: a.URL, SHA256: a.SHA256, + Unpack: a.Unpack, ExecPath: a.ExecPath, Deps: a.Deps, Order: a.Order, Args: a.Args, + }) + } cfg.Resolve() return cfg } diff --git a/internal/scaffold/compile_test.go b/internal/scaffold/compile_test.go index 199a071..1e36ea9 100644 --- a/internal/scaffold/compile_test.go +++ b/internal/scaffold/compile_test.go @@ -63,6 +63,69 @@ func TestGeneratedCLIProjectCompiles(t *testing.T) { } } +// cliAssetsSpec is a cli app that delivers its binary from the R2 artifact +// registry: an asset per host plus an enumerated + passthrough method. It +// exercises the generated staging runtime (backend/stage.go) and the asset-aware +// main, both of which only render when assets are present. +const cliAssetsSpec = ` +id: io.pilot.toolx +app_version: 0.2.0 +description: "Delivers and wraps the toolx CLI." +backend: + type: cli + command: ["toolx"] +assets: + - {os: darwin, arch: arm64, url: "https://pub-x.r2.dev/io.pilot.toolx/0.2.0/darwin-arm64/toolx", sha256: "1111111111111111111111111111111111111111111111111111111111111111", exec_path: bin/toolx, order: 1} + - {os: linux, arch: amd64, url: "https://pub-x.r2.dev/io.pilot.toolx/0.2.0/linux-amd64/toolx", sha256: "2222222222222222222222222222222222222222222222222222222222222222", exec_path: bin/toolx, order: 1} +methods: + - name: toolx.version + summary: "Print version." + duration: fast + cli: {args: ["version"]} + - name: toolx.exec + summary: "Passthrough." + duration: med + cli: {passthrough: true} +` + +// TestGeneratedCLIWithAssetsCompiles type-checks the asset-delivery code paths: +// the staging runtime and the asset-aware main are generated only when an app +// ships assets, so an unused import or a bad template there is invisible to the +// no-asset cli compile test. It also asserts install.json is emitted. +func TestGeneratedCLIWithAssetsCompiles(t *testing.T) { + if testing.Short() { + t.Skip("skipping compile test in -short mode") + } + goBin, err := exec.LookPath("go") + if err != nil { + t.Skip("go toolchain not available") + } + + cfg := parseSpec(t, cliAssetsSpec) + dir := t.TempDir() + if _, err := Generate(cfg, dir); err != nil { + t.Fatalf("generate: %v", err) + } + if _, err := os.Stat(filepath.Join(dir, "install.json")); err != nil { + t.Fatalf("install.json must be emitted for an asset-delivering app: %v", err) + } + if _, err := os.Stat(filepath.Join(dir, "internal", "backend", "stage.go")); err != nil { + t.Fatalf("stage.go must be generated for an asset-delivering app: %v", err) + } + if sum, err := os.ReadFile(filepath.Join("..", "..", "go.sum")); err == nil { + if err := os.WriteFile(filepath.Join(dir, "go.sum"), sum, 0o644); err != nil { + t.Fatalf("seed go.sum: %v", err) + } + } + + cmd := exec.Command(goBin, "build", "./...") + cmd.Dir = dir + cmd.Env = append(os.Environ(), "GOFLAGS=-mod=mod") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("generated cli+assets project failed to compile: %v\n%s", err, out) + } +} + // TestCLIRouteValidation pins the cli route rules: passthrough is mutually // exclusive with baked args/flags, and an empty route is rejected. func TestCLIRouteValidation(t *testing.T) { diff --git a/internal/scaffold/config.go b/internal/scaffold/config.go index 07cc0c8..1e50d60 100644 --- a/internal/scaffold/config.go +++ b/internal/scaffold/config.go @@ -44,10 +44,79 @@ type Config struct { Grants Grants `yaml:"grants"` Listing Listing `yaml:"listing"` // store-page metadata (catalogue v2) - // TODO(native-apps): add `Assets []Asset` (per-OS/arch download url + sha256 + - // exec_path) for native/CLI apps that deliver a real binary. The generator - // emits manifest `assets`; the daemon fetches/verifies/stages at install. - // See docs/NATIVE-APPS.md. Coming soon — http (translation-only) ships first. + // Assets is the native-binary delivery set for a cli backend: the + // platform-specific binaries the publisher uploaded to the Pilot R2 artifact + // registry. At install the generated adapter fetches the asset matching the + // host os/arch, verifies its sha256, stages it under $APP/, and (in + // `order`) runs any with install `args`. The fronted command then execs the + // staged path instead of an assumed-installed binary. Empty for http apps and + // for cli apps whose command is already present on the host. See + // docs/R2-ARTIFACT-REGISTRY.md. + Assets []Asset `yaml:"assets"` +} + +// Asset is one platform-specific file delivered from the R2 artifact registry. +// Integrity is the sha256 (verified at install); the whole bundle tarball is +// itself sha-pinned in the catalogue, so install.json (which carries these +// shas) cannot be tampered with undetected. +type Asset struct { + Role string `yaml:"role" json:"role"` // "binary" (default, chmod +x) | "data" + Name string `yaml:"name" json:"name"` // stable id within a platform (default: exec_path basename); referenced by other assets' deps + OS string `yaml:"os" json:"os"` // linux | darwin + Arch string `yaml:"arch" json:"arch"` // amd64 | arm64 + URL string `yaml:"url" json:"url"` // https download (R2 public URL) + SHA256 string `yaml:"sha256" json:"sha256"` // 64-hex of the downloaded object; verified after download + Unpack string `yaml:"unpack" json:"unpack"` // "" (single file) | "tar.gz" (extract archive under $APP) + ExecPath string `yaml:"exec_path" json:"exec_path"` // dest under $APP for a single file, or the path INSIDE the extracted tree for an archive (e.g. smolvm-1.2.0-darwin-arm64/smolvm) + Deps []string `yaml:"deps" json:"deps"` // names of assets on the same platform that must install first + Order int `yaml:"order" json:"order"` // tiebreaker among assets with no dependency relation (ascending) + Args []string `yaml:"args" json:"args"` // optional post-stage invocation, run as "$APP/ args..." +} + +// AssetName is the stable per-platform id used in dependency edges: the explicit +// name, else the exec_path basename. +func (a Asset) AssetName() string { + if a.Name != "" { + return a.Name + } + return a.ExecPath[strings.LastIndexByte(a.ExecPath, '/')+1:] +} + +// HasAssets reports whether this app delivers native binaries from the registry. +func (c *Config) HasAssets() bool { return len(c.Assets) > 0 } + +// PrimaryExecPath is the staged path the fronted command resolves to: the asset +// whose exec_path basename matches command[0] (the binary the adapter execs). +// Empty when there are no assets or no match (the command stays as-is). +func (c *Config) PrimaryExecPath() string { + if len(c.Backend.Command) == 0 { + return "" + } + cmd := c.Backend.Command[0] + for _, a := range c.Assets { + if a.Role == "data" { + continue + } + if base := a.ExecPath[strings.LastIndexByte(a.ExecPath, '/')+1:]; base == cmd || a.ExecPath == cmd { + return a.ExecPath + } + } + return "" +} + +// AssetHosts returns the unique hostnames the adapter must dial to fetch assets, +// for the manifest net.dial grants. Sorted for deterministic generation. +func (c *Config) AssetHosts() []string { + seen := map[string]bool{} + var hosts []string + for _, a := range c.Assets { + if u, err := url.Parse(a.URL); err == nil && u.Hostname() != "" && !seen[u.Hostname()] { + seen[u.Hostname()] = true + hosts = append(hosts, u.Hostname()) + } + } + sort.Strings(hosts) + return hosts } // Listing is the store-page metadata that drives the catalogue v2 rich view @@ -250,6 +319,15 @@ type RawGrant struct { var ( idPattern = regexp.MustCompile(`^[a-z0-9]([a-z0-9_-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9_-]*[a-z0-9])?)+$`) semverPattern = regexp.MustCompile(`^\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?$`) + sha256Pattern = regexp.MustCompile(`^[0-9a-f]{64}$`) +) + +// knownOS / knownArch are the host targets the registry + staging understand. +// These match scaffold/build platform tuples (DefaultPlatforms) and the daemon's +// runtime.GOOS/GOARCH values. +var ( + knownOS = map[string]bool{"linux": true, "darwin": true} + knownArch = map[string]bool{"amd64": true, "arm64": true} ) // Parse decodes a pilot.app.yaml document (strict: unknown keys are errors, so @@ -373,6 +451,7 @@ func (c *Config) Validate() []error { } } } + errs = append(errs, c.validateAssets()...) if len(c.Methods) == 0 { errs = append(errs, fmt.Errorf("at least one method must be declared")) } @@ -437,6 +516,146 @@ func (c *Config) Validate() []error { return errs } +// validateAssets enforces the registry-delivery rules: assets are cli-only, +// each names a known os/arch, an https URL, a 64-hex sha256, and an exec_path +// that stays under $APP (no absolute path, no "..", no leading slash). Orders +// must be unique so the install sequence is deterministic, and (os,arch,role) +// must be unique so the host match is unambiguous. +func (c *Config) validateAssets() []error { + if len(c.Assets) == 0 { + return nil + } + var errs []error + if c.Backend.Type != "cli" { + errs = append(errs, fmt.Errorf("assets are only valid for a cli backend (an http app delivers no binary)")) + } + // Orders and binary roles are scoped per host platform: each host installs + // only its own (os,arch) assets, so two platforms may both use order 1, but + // within one platform the order must be unique (deterministic sequence) and a + // platform must not ship two binaries for the same exec_path. + orders := map[string]bool{} + platforms := map[string]bool{} + for i, a := range c.Assets { + role := a.Role + if role == "" { + role = "binary" + } + if role != "binary" && role != "data" { + errs = append(errs, fmt.Errorf("assets[%d].role %q must be \"binary\" or \"data\"", i, a.Role)) + } + if a.Unpack != "" && a.Unpack != "tar.gz" { + errs = append(errs, fmt.Errorf("assets[%d].unpack %q must be \"\" or \"tar.gz\"", i, a.Unpack)) + } + if !knownOS[a.OS] { + errs = append(errs, fmt.Errorf("assets[%d].os %q must be linux or darwin", i, a.OS)) + } + if !knownArch[a.Arch] { + errs = append(errs, fmt.Errorf("assets[%d].arch %q must be amd64 or arm64", i, a.Arch)) + } + if u, err := url.Parse(a.URL); err != nil || u.Scheme != "https" || u.Host == "" { + errs = append(errs, fmt.Errorf("assets[%d].url %q must be an absolute https URL", i, a.URL)) + } + if !sha256Pattern.MatchString(a.SHA256) { + errs = append(errs, fmt.Errorf("assets[%d].sha256 %q must be 64 lowercase hex chars", i, a.SHA256)) + } + if a.ExecPath == "" || strings.HasPrefix(a.ExecPath, "/") || strings.Contains(a.ExecPath, "..") { + errs = append(errs, fmt.Errorf("assets[%d].exec_path %q must be a relative path under $APP (no leading / and no \"..\")", i, a.ExecPath)) + } + plat := a.OS + "/" + a.Arch + orderKey := fmt.Sprintf("%s#%d", plat, a.Order) + if orders[orderKey] { + errs = append(errs, fmt.Errorf("assets[%d]: duplicate install order %d for %s — orders must be unique within a platform", i, a.Order, plat)) + } + orders[orderKey] = true + key := plat + "/" + a.ExecPath + if platforms[key] { + errs = append(errs, fmt.Errorf("assets[%d]: duplicate asset for %s at %s", i, plat, a.ExecPath)) + } + platforms[key] = true + } + // Per-platform: dependency names must resolve to a sibling and form a DAG. + for _, plat := range c.assetPlatforms() { + if _, err := c.ResolveAssets(plat[0], plat[1]); err != nil { + errs = append(errs, fmt.Errorf("assets for %s/%s: %w", plat[0], plat[1], err)) + } + } + return errs +} + +// assetPlatforms lists the distinct (os,arch) tuples present in Assets. +func (c *Config) assetPlatforms() [][2]string { + seen := map[string]bool{} + var out [][2]string + for _, a := range c.Assets { + k := a.OS + "/" + a.Arch + if !seen[k] { + seen[k] = true + out = append(out, [2]string{a.OS, a.Arch}) + } + } + return out +} + +// ResolveAssets returns the assets for one host platform in install order: a +// topological sort over `deps` (an asset installs after everything it depends +// on), with `order` then name as the deterministic tiebreaker among assets that +// have no dependency relation. Errors on an unknown dep name or a cycle. +func (c *Config) ResolveAssets(os, arch string) ([]Asset, error) { + var plat []Asset + for _, a := range c.Assets { + if a.OS == os && a.Arch == arch { + plat = append(plat, a) + } + } + byName := map[string]Asset{} + for _, a := range plat { + byName[a.AssetName()] = a + } + // Kahn's algorithm with a deterministic ready-set ordering. + indeg := map[string]int{} + for _, a := range plat { + indeg[a.AssetName()] = 0 + } + for _, a := range plat { + for _, d := range a.Deps { + if _, ok := byName[d]; !ok { + return nil, fmt.Errorf("asset %q depends on unknown asset %q", a.AssetName(), d) + } + indeg[a.AssetName()]++ + } + } + less := func(x, y Asset) bool { + if x.Order != y.Order { + return x.Order < y.Order + } + return x.AssetName() < y.AssetName() + } + var out []Asset + for len(out) < len(plat) { + var ready []Asset + for _, a := range plat { + if indeg[a.AssetName()] == 0 { + ready = append(ready, a) + } + } + if len(ready) == 0 { + return nil, fmt.Errorf("dependency cycle among assets") + } + sort.Slice(ready, func(i, j int) bool { return less(ready[i], ready[j]) }) + next := ready[0] + indeg[next.AssetName()] = -1 // mark consumed + out = append(out, next) + for _, a := range plat { + for _, d := range a.Deps { + if d == next.AssetName() { + indeg[a.AssetName()]-- + } + } + } + } + return out, nil +} + // BackendHost returns the net.dial target for the grant block (http only). func (c *Config) BackendHost() string { u, err := url.Parse(c.Backend.BaseURL) diff --git a/internal/scaffold/install.go b/internal/scaffold/install.go new file mode 100644 index 0000000..55a6121 --- /dev/null +++ b/internal/scaffold/install.go @@ -0,0 +1,183 @@ +package scaffold + +import ( + "encoding/json" + "fmt" + "path" + "sort" + "strings" +) + +// InstallSpec is the staging contract shipped as install.json in the bundle. The +// generated adapter reads it at startup and, for the asset(s) matching the host +// os/arch, fetches → sha256-verifies → stages under $APP/ → runs any +// install args, in the resolved install order. It is the machine-readable form +// of the publisher's Artifacts step (R2 location + dependencies + order + args). +// +// Integrity: each asset carries its own sha256, and the whole bundle tarball is +// itself sha-pinned in the catalogue (bundle_sha256), so install.json cannot be +// altered without failing the install-time tarball check. +type InstallSpec struct { + Schema int `json:"schema"` // 1 + App string `json:"app"` // io.pilot. + Version string `json:"version"` // app_version + Command string `json:"command"` // base command the adapter execs (proc.exec target) + Assets []InstallAsset `json:"assets"` +} + +// InstallAsset mirrors scaffold.Asset in the on-disk install spec. Order is the +// RESOLVED per-platform install sequence (0-based) after dependency sorting, so +// a consumer can simply install ascending-by-order within its platform. +type InstallAsset struct { + Name string `json:"name"` + Role string `json:"role"` // binary | data + OS string `json:"os"` // linux | darwin + Arch string `json:"arch"` // amd64 | arm64 + URL string `json:"url"` // https (R2 public URL) + SHA256 string `json:"sha256"` // 64-hex of the downloaded object + Unpack string `json:"unpack"` // "" | "tar.gz" + ExecPath string `json:"exec_path"` // dest under $APP, or path inside the extracted tree + Deps []string `json:"deps"` // names of same-platform assets installed first + Order int `json:"order"` // resolved install sequence within the platform + Args []string `json:"args"` // optional post-stage invocation +} + +// resolvedAssets returns every asset across all platforms, each platform's set +// dependency-ordered (Order set to the resolved 0-based sequence). The slice is +// grouped by platform and sorted (os, arch, order) so install.json/install.sh +// are deterministic. +func (c *Config) resolvedAssets() ([]InstallAsset, error) { + var out []InstallAsset + plats := c.assetPlatforms() + sort.Slice(plats, func(i, j int) bool { + if plats[i][0] != plats[j][0] { + return plats[i][0] < plats[j][0] + } + return plats[i][1] < plats[j][1] + }) + for _, p := range plats { + seq, err := c.ResolveAssets(p[0], p[1]) + if err != nil { + return nil, err + } + for i, a := range seq { + role := a.Role + if role == "" { + role = "binary" + } + out = append(out, InstallAsset{ + Name: a.AssetName(), Role: role, OS: a.OS, Arch: a.Arch, URL: a.URL, + SHA256: a.SHA256, Unpack: a.Unpack, ExecPath: path.Clean(a.ExecPath), + Deps: a.Deps, Order: i, Args: a.Args, + }) + } + } + return out, nil +} + +// marshalInstallSpec builds install.json from cfg.Assets in resolved order. +func marshalInstallSpec(c *Config) ([]byte, error) { + var cmd string + if len(c.Backend.Command) > 0 { + cmd = c.Backend.Command[0] + } + assets, err := c.resolvedAssets() + if err != nil { + return nil, err + } + spec := InstallSpec{Schema: 1, App: c.ID, Version: c.AppVersion, Command: cmd, Assets: assets} + b, err := json.MarshalIndent(spec, "", " ") + if err != nil { + return nil, err + } + return append(b, '\n'), nil +} + +// renderInstallScript produces a standalone, dependency-free POSIX install.sh +// that performs the SAME install the adapter's stage.go does — fetch from R2, +// sha256-verify, stage under $APP (single file or tar.gz), run install args — in +// the resolved order, for the host's os/arch. It ships in the bundle for +// transparency and for hosts/operators that want to run the install directly +// (e.g. debugging, or a non-Go runtime). The adapter remains the default path. +func renderInstallScript(c *Config) ([]byte, error) { + assets, err := c.resolvedAssets() + if err != nil { + return nil, err + } + var b strings.Builder + w := func(format string, a ...any) { fmt.Fprintf(&b, format, a...); b.WriteByte('\n') } + + w("#!/usr/bin/env sh") + w("# install.sh — GENERATED by pilot-app for %s %s.", c.ID, c.AppVersion) + w("# Fetches this app's artifacts from the Pilot R2 registry, verifies each") + w("# sha256, stages them under $APP, and runs any install steps — in dependency") + w("# order. This mirrors the adapter's built-in staging (internal/backend/stage.go);") + w("# either is sufficient. Usage: APP=/path/to/app sh install.sh") + w("set -eu") + w("") + w(`APP="${APP:-$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)}"`) + w(`OS="$(uname -s | tr '[:upper:]' '[:lower:]')"`) + w(`ARCH="$(uname -m)"`) + w(`case "$ARCH" in aarch64) ARCH=arm64;; x86_64) ARCH=amd64;; esac`) + w(`echo "pilot-install: staging %s artifacts for ${OS}/${ARCH} into ${APP}"`, c.ID) + w("") + w("sha256_of() {") + w(` if command -v sha256sum >/dev/null 2>&1; then sha256sum "$1" | awk '{print $1}';`) + w(` else shasum -a 256 "$1" | awk '{print $1}'; fi`) + w("}") + w("") + w("stage_one() { # url sha unpack exec_path role") + w(` url="$1"; want="$2"; unpack="$3"; exec_path="$4"; role="$5"`) + w(` tmp="$(mktemp)"`) + w(` echo " fetch $url"`) + w(` curl -fSL "$url" -o "$tmp"`) + w(` got="$(sha256_of "$tmp")"`) + w(` if [ "$got" != "$want" ]; then echo " sha256 MISMATCH for $exec_path: want $want got $got" >&2; rm -f "$tmp"; exit 1; fi`) + w(` echo " verify ok ($want)"`) + w(` if [ "$unpack" = "tar.gz" ]; then`) + w(` tar -xzf "$tmp" -C "$APP"; rm -f "$tmp"`) + w(` else`) + w(` dest="$APP/$exec_path"; mkdir -p "$(dirname -- "$dest")"; mv "$tmp" "$dest"`) + w(` if [ "$role" != "data" ]; then chmod 0755 "$dest"; fi`) + w(` fi`) + w(` echo " staged $exec_path"`) + w("}") + w("") + // Emit a guarded block per platform so one script serves every host. + plats := map[string]bool{} + var order []string + for _, a := range assets { + k := a.OS + "/" + a.Arch + if !plats[k] { + plats[k] = true + order = append(order, k) + } + } + for _, k := range order { + os, arch, _ := strings.Cut(k, "/") + w(`if [ "$OS" = "%s" ] && [ "$ARCH" = "%s" ]; then`, os, arch) + for _, a := range assets { + if a.OS != os || a.Arch != arch { + continue + } + w(` stage_one %q %q %q %q %q`, a.URL, a.SHA256, a.Unpack, a.ExecPath, a.Role) + if len(a.Args) > 0 { + w(` echo " run %s %s"`, a.ExecPath, shJoin(a.Args)) + w(` "$APP/%s" %s`, a.ExecPath, shJoin(a.Args)) + } + } + w(` echo "pilot-install: done"; exit 0`) + w(`fi`) + } + w(`echo "pilot-install: no artifact for ${OS}/${ARCH}" >&2; exit 1`) + return []byte(b.String()), nil +} + +// shJoin renders args as a single-quoted, space-joined POSIX argv fragment. +func shJoin(args []string) string { + parts := make([]string, len(args)) + for i, a := range args { + parts[i] = "'" + strings.ReplaceAll(a, "'", `'\''`) + "'" + } + return strings.Join(parts, " ") +} diff --git a/internal/scaffold/install_test.go b/internal/scaffold/install_test.go new file mode 100644 index 0000000..44914fc --- /dev/null +++ b/internal/scaffold/install_test.go @@ -0,0 +1,125 @@ +package scaffold + +import ( + "encoding/json" + "strings" + "testing" +) + +// depSpec: three assets on one platform where deps force an order that differs +// from the raw `order` field, proving the topological resolver (not just the +// integer order) drives the install sequence. +// +// runtime (order 9, no deps) +// plugin (order 1, deps: [runtime]) -> must come AFTER runtime despite lower order +// tool (order 5, deps: [plugin]) -> must come last +const depSpec = ` +id: io.pilot.toolx +app_version: 0.3.0 +description: "Multi-artifact app with dependencies." +backend: + type: cli + command: ["tool"] +assets: + - {name: plugin, os: darwin, arch: arm64, url: "https://r.example/plugin", sha256: "1111111111111111111111111111111111111111111111111111111111111111", exec_path: bin/plugin, order: 1, deps: [runtime]} + - {name: tool, os: darwin, arch: arm64, url: "https://r.example/tool", sha256: "2222222222222222222222222222222222222222222222222222222222222222", exec_path: bin/tool, order: 5, deps: [plugin], args: ["--init"]} + - {name: runtime, os: darwin, arch: arm64, url: "https://r.example/runtime", sha256: "3333333333333333333333333333333333333333333333333333333333333333", exec_path: bin/runtime, order: 9} +methods: + - {name: toolx.run, summary: "run", cli: {passthrough: true}} +` + +func TestDependencyInstallOrder(t *testing.T) { + cfg := parseSpec(t, depSpec) + if errs := cfg.Validate(); len(errs) != 0 { + t.Fatalf("valid dep spec must pass: %v", errs) + } + seq, err := cfg.ResolveAssets("darwin", "arm64") + if err != nil { + t.Fatalf("resolve: %v", err) + } + got := []string{seq[0].AssetName(), seq[1].AssetName(), seq[2].AssetName()} + want := []string{"runtime", "plugin", "tool"} + for i := range want { + if got[i] != want[i] { + t.Fatalf("install order = %v, want %v (deps must override raw order)", got, want) + } + } +} + +func TestInstallSpecAndScriptHonorDeps(t *testing.T) { + cfg := parseSpec(t, depSpec) + + // install.json: resolved Order is the topo index, not the raw order field. + raw, err := marshalInstallSpec(cfg) + if err != nil { + t.Fatal(err) + } + var spec InstallSpec + if err := json.Unmarshal(raw, &spec); err != nil { + t.Fatal(err) + } + pos := map[string]int{} + for _, a := range spec.Assets { + pos[a.Name] = a.Order + } + if !(pos["runtime"] < pos["plugin"] && pos["plugin"] < pos["tool"]) { + t.Fatalf("install.json order wrong: %+v", pos) + } + + // install.sh: the staged lines must appear in dependency order, and the + // tool's install arg must be emitted after it stages. + sh, err := renderInstallScript(cfg) + if err != nil { + t.Fatal(err) + } + s := string(sh) + ir := strings.Index(s, "https://r.example/runtime") + ip := strings.Index(s, "https://r.example/plugin") + it := strings.Index(s, "https://r.example/tool") + if !(ir >= 0 && ir < ip && ip < it) { + t.Fatalf("install.sh stage order wrong (runtime=%d plugin=%d tool=%d)", ir, ip, it) + } + if !strings.Contains(s, `"$APP/bin/tool" '--init'`) { + t.Errorf("install.sh missing the tool's install-arg invocation:\n%s", s) + } + if !strings.HasPrefix(s, "#!/usr/bin/env sh") { + t.Errorf("install.sh missing shebang") + } +} + +// parseNoValidate parses + resolves but does NOT fail on validation errors, so +// negative cases can assert the error themselves. +func parseNoValidate(t *testing.T, spec string) *Config { + t.Helper() + cfg, err := Parse([]byte(spec)) + if err != nil { + t.Fatalf("parse: %v", err) + } + cfg.Resolve() + return cfg +} + +func TestDependencyCycleRejected(t *testing.T) { + cyc := strings.Replace(depSpec, "exec_path: bin/runtime, order: 9}", "exec_path: bin/runtime, order: 9, deps: [tool]}", 1) + errs := parseNoValidate(t, cyc).Validate() + if !anyContains(errs, "cycle") { + t.Fatalf("a dependency cycle must be rejected, got: %v", errs) + } +} + +func TestUnknownDepRejected(t *testing.T) { + bad := strings.Replace(depSpec, "deps: [plugin]", "deps: [nope]", 1) + errs := parseNoValidate(t, bad).Validate() + if !anyContains(errs, "unknown asset") { + t.Fatalf("an unknown dep must be rejected, got: %v", errs) + } +} + +func anyContains(errs []error, sub string) bool { + for _, e := range errs { + if strings.Contains(e.Error(), sub) { + return true + } + } + return false +} diff --git a/internal/scaffold/metadata.go b/internal/scaffold/metadata.go index 1cf78cd..d8bf9c7 100644 --- a/internal/scaffold/metadata.go +++ b/internal/scaffold/metadata.go @@ -80,7 +80,9 @@ func BuildMetadata(c *Config) Metadata { changelog := c.Listing.Changelog if len(changelog) == 0 { - changelog = []ChangelogRel{{Version: c.AppVersion, Notes: []string{c.Description}}} + // A neutral default note — do NOT echo the one-line description here, so the + // store-page "Description" (description_md) is the only prose a viewer reads. + changelog = []ChangelogRel{{Version: c.AppVersion, Notes: []string{"Released v" + c.AppVersion}}} } // Managed apps require a daemon that provisions a per-app identity (--identity) diff --git a/internal/scaffold/r2_e2e_test.go b/internal/scaffold/r2_e2e_test.go new file mode 100644 index 0000000..783d70b --- /dev/null +++ b/internal/scaffold/r2_e2e_test.go @@ -0,0 +1,178 @@ +//go:build !windows + +package scaffold + +import ( + "encoding/json" + "fmt" + "net" + "os" + "os/exec" + "path/filepath" + "runtime" + "testing" + "time" + + "github.com/pilot-protocol/app-store/pkg/ipc" +) + +// TestR2AssetDeliveryE2E is the full native-app proof: it scaffolds a cli +// adapter for a real, complex CLI whose binary is DELIVERED from the Pilot R2 +// artifact registry (not assumed-installed), builds it, and runs it exactly as +// the daemon would. On first spawn the adapter reads install.json, fetches the +// tar.gz asset from its R2 URL, verifies the sha256, extracts it under $APP, and +// execs the staged command — proving discover→install→call works for a binary +// the host never had. +// +// Env-driven so the committed test needs no live bucket in CI; scripts/e2e-smolvm.sh +// uploads the artifact and sets these: +// +// PILOT_E2E_ASSET_URL https R2 public URL of the artifact (a .tar.gz) +// PILOT_E2E_ASSET_SHA256 its sha256 +// PILOT_E2E_ASSET_EXECPATH path to the command INSIDE the extracted tree +// PILOT_E2E_ASSET_CALLARG arg that makes the CLI print its version (e.g. --version) +// PILOT_E2E_ASSET_EXPECT substring the version output must contain (e.g. 1.2.0) +func TestR2AssetDeliveryE2E(t *testing.T) { + url := os.Getenv("PILOT_E2E_ASSET_URL") + sha := os.Getenv("PILOT_E2E_ASSET_SHA256") + execPath := os.Getenv("PILOT_E2E_ASSET_EXECPATH") + if url == "" || sha == "" || execPath == "" { + t.Skip("set PILOT_E2E_ASSET_URL/_SHA256/_EXECPATH to run the live R2 delivery e2e (see scripts/e2e-smolvm.sh)") + } + callArg := envOr("PILOT_E2E_ASSET_CALLARG", "--version") + expect := os.Getenv("PILOT_E2E_ASSET_EXPECT") + if _, err := exec.LookPath("go"); err != nil { + t.Skip("go toolchain not available") + } + + root := t.TempDir() + // command basename must match the staged exec_path basename so the adapter + // resolves the fronted command to the staged binary. + cmd := filepath.Base(execPath) + spec := fmt.Sprintf(` +id: io.pilot.smolvm +app_version: 1.2.0 +description: "Delivers and fronts the smolvm microVM CLI from the R2 registry." +namespace: smolvm +backend: + type: cli + command: ["%s"] +assets: + - os: %s + arch: %s + url: "%s" + sha256: "%s" + unpack: tar.gz + exec_path: "%s" + order: 1 +methods: + - name: smolvm.version + summary: "Print the smolvm version." + cli: {args: ["%s"]} + - name: smolvm.exec + summary: "Run any smolvm subcommand." + cli: {passthrough: true} +`, cmd, runtime.GOOS, runtime.GOARCH, url, sha, execPath, callArg) + + cfg := parseSpec(t, spec) + proj := filepath.Join(root, "proj") + if _, err := Generate(cfg, proj); err != nil { + t.Fatalf("generate: %v", err) + } + if sum, err := os.ReadFile(filepath.Join("..", "..", "go.sum")); err == nil { + _ = os.WriteFile(filepath.Join(proj, "go.sum"), sum, 0o644) + } + + bin := filepath.Join(root, "adapter") + build := exec.Command("go", "build", "-o", bin, "./cmd/"+cfg.BinaryName) + build.Dir = proj + build.Env = append(os.Environ(), "GOFLAGS=-mod=mod") + if out, err := build.CombinedOutput(); err != nil { + t.Fatalf("build adapter: %v\n%s", err, out) + } + + // Run the adapter as the daemon would. $APP is the manifest dir (proj), where + // install.json was generated — the adapter stages the asset there on startup. + sock := filepath.Join(root, "app.sock") + adapter := exec.Command(bin, "--socket", sock, "--manifest", filepath.Join(proj, "manifest.json")) + adapter.Stderr = os.Stderr + if err := adapter.Start(); err != nil { + t.Fatalf("start adapter: %v", err) + } + defer func() { _ = adapter.Process.Kill(); _, _ = adapter.Process.Wait() }() + + // Staging downloads + extracts the artifact BEFORE the socket appears, so + // allow generous time for the fetch from R2. + deadline := time.Now().Add(150 * time.Second) + for time.Now().Before(deadline) { + if _, err := os.Stat(sock); err == nil { + break + } + time.Sleep(100 * time.Millisecond) + } + if _, err := os.Stat(sock); err != nil { + t.Fatalf("adapter socket never appeared — staging from R2 likely failed (see adapter stderr above)") + } + + // The asset must actually be on disk under $APP, delivered from R2. + staged := filepath.Join(proj, filepath.FromSlash(execPath)) + if _, err := os.Stat(staged); err != nil { + t.Fatalf("staged command not found at %s: %v", staged, err) + } + + call := func(method, args string) json.RawMessage { + t.Helper() + conn, err := net.DialTimeout("unix", sock, 5*time.Second) + if err != nil { + t.Fatalf("dial: %v", err) + } + defer conn.Close() + var out json.RawMessage + if err := ipc.Call(conn, method, json.RawMessage(args), &out); err != nil { + t.Fatalf("call %s: %v", method, err) + } + return out + } + + // smolvm.version → the adapter execs the R2-delivered binary and returns its + // output. Version text isn't JSON, so it comes back wrapped as {stdout,...}. + var res struct { + Stdout string `json:"stdout"` + Stderr string `json:"stderr"` + Exit int `json:"exit"` + } + raw := call("smolvm.version", "{}") + if err := json.Unmarshal(raw, &res); err != nil { + t.Fatalf("smolvm.version reply not the wrapped shape: %v (raw=%s)", err, raw) + } + got := res.Stdout + res.Stderr + t.Logf("smolvm.version via R2-delivered binary → exit=%d out=%q", res.Exit, got) + if expect != "" && !contains(got, expect) { + t.Fatalf("version output %q did not contain %q", got, expect) + } + + // Discovery still works locally. + if h := string(call("smolvm.help", "{}")); !json.Valid([]byte(h)) { + t.Fatalf("smolvm.help invalid: %s", h) + } +} + +func envOr(k, def string) string { + if v := os.Getenv(k); v != "" { + return v + } + return def +} + +func contains(s, sub string) bool { + return len(sub) == 0 || (len(s) >= len(sub) && indexOf(s, sub) >= 0) +} + +func indexOf(s, sub string) int { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return i + } + } + return -1 +} diff --git a/internal/scaffold/scaffold.go b/internal/scaffold/scaffold.go index 94a721a..55826f4 100644 --- a/internal/scaffold/scaffold.go +++ b/internal/scaffold/scaffold.go @@ -43,6 +43,11 @@ func Generate(cfg *Config, outDir string) ([]string, error) { } case "cli": files = append(files, file{filepath.Join("internal", "backend", "exec.go"), "client_cli.go.tmpl"}) + // Native-binary delivery: emit the staging runtime only when the app + // actually ships assets (an already-installed cli needs no stager). + if cfg.HasAssets() { + files = append(files, file{filepath.Join("internal", "backend", "stage.go"), "stage.go.tmpl"}) + } } var written []string @@ -62,6 +67,29 @@ func Generate(cfg *Config, outDir string) ([]string, error) { } written = append(written, "metadata.json") + // install.json (the registry staging spec) ships in the bundle alongside the + // manifest. The adapter reads it at startup to fetch/verify/stage each asset. + // Built from a Go model (not a text template) so the JSON is assembled safely. + if cfg.HasAssets() { + spec, err := marshalInstallSpec(cfg) + if err != nil { + return written, fmt.Errorf("build install.json: %w", err) + } + if err := os.WriteFile(filepath.Join(outDir, "install.json"), spec, 0o644); err != nil { + return written, fmt.Errorf("write install.json: %w", err) + } + written = append(written, "install.json") + + script, err := renderInstallScript(cfg) + if err != nil { + return written, fmt.Errorf("build install.sh: %w", err) + } + if err := os.WriteFile(filepath.Join(outDir, "install.sh"), script, 0o755); err != nil { + return written, fmt.Errorf("write install.sh: %w", err) + } + written = append(written, "install.sh") + } + for _, f := range files { rendered, err := render(f.tmpl, cfg) if err != nil { diff --git a/internal/scaffold/templates/client_cli.go.tmpl b/internal/scaffold/templates/client_cli.go.tmpl index 49eca28..9edcb5f 100644 --- a/internal/scaffold/templates/client_cli.go.tmpl +++ b/internal/scaffold/templates/client_cli.go.tmpl @@ -20,12 +20,14 @@ // - A non-zero exit is a normal result returned structurally ({stdout, stderr, // exit}); only spawn/timeout failures surface as IPC errors. // -// TODO(native-apps): COMING SOON. The real model delivers the binary via the -// app store (manifest `assets`: per-OS/arch url+sha256, staged at $APP/), -// and this runner execs that staged path — not an assumed-installed command. -// See docs/NATIVE-APPS.md. The command must currently already be installed on the -// operator's host. Exec is a declared `proc.exec` capability (user-consented at -// install); it is not yet brokered per-call. See docs/CLI-ADAPTER.md. +// Native delivery: when the app ships `assets`, the binary is delivered from the +// Pilot R2 artifact registry (per-OS/arch url+sha256 in install.json, fetched + +// verified + staged at $APP/ by backend/stage.go), and main rewrites +// this runner's base command to that staged path — so the host need not have the +// CLI pre-installed. With no assets, the base command is resolved from PATH as +// before. Exec is the declared `proc.exec` capability (user-consented at +// install); it is not brokered per-call. See docs/R2-ARTIFACT-REGISTRY.md and +// docs/CLI-ADAPTER.md. package backend import ( diff --git a/internal/scaffold/templates/example.pilot.app.yaml b/internal/scaffold/templates/example.pilot.app.yaml index 51d1f17..00a124b 100644 --- a/internal/scaffold/templates/example.pilot.app.yaml +++ b/internal/scaffold/templates/example.pilot.app.yaml @@ -88,7 +88,7 @@ listing: # date: "2026-06-16" # notes: ["Initial release"] -# --- a cli backend instead (COMING SOON; see docs/CLI-ADAPTER.md) --- +# --- a cli backend instead (see docs/CLI-ADAPTER.md) --- # backend: # type: cli # command: ["weathercli"] # base argv; method args appended @@ -108,3 +108,17 @@ listing: # params: {args: "verbatim argv forwarded to weathercli"} # cli: # passthrough: true + +# --- native delivery: ship the binary from the Pilot R2 artifact registry --- +# For a cli app whose binary is NOT already on the host, list the per-OS/arch +# artifacts you uploaded in the publish form's Artifacts step. At install the +# adapter fetches the asset matching the host, verifies its sha256, stages it +# under $APP, runs any install `args` (in `order`), and execs the staged path. +# See docs/R2-ARTIFACT-REGISTRY.md. +# assets: +# # a single self-contained binary: +# - {os: linux, arch: amd64, url: "https://artifacts.pilotprotocol.network/io.pilot.weather/0.1.0/linux-amd64/weathercli", +# sha256: "<64-hex>", exec_path: bin/weathercli, order: 1} +# # an archive that unpacks to a wrapper + libs, with a one-time setup step: +# - {os: darwin, arch: arm64, url: "https://artifacts.pilotprotocol.network/io.pilot.weather/0.1.0/darwin-arm64/weathercli.tar.gz", +# sha256: "<64-hex>", unpack: tar.gz, exec_path: weathercli-0.1.0-darwin-arm64/weathercli, order: 1, args: ["--accept-license"]} diff --git a/internal/scaffold/templates/main.go.tmpl b/internal/scaffold/templates/main.go.tmpl index 2ac0834..ec8306c 100644 --- a/internal/scaffold/templates/main.go.tmpl +++ b/internal/scaffold/templates/main.go.tmpl @@ -69,8 +69,27 @@ func main() { if err != nil { log.Fatalf("{{.BinaryName}}: backend config: %v", err) } +{{- else}} +{{- if .HasAssets}} + // Native delivery: fetch this host's binaries from the Pilot R2 artifact + // registry (verify sha → stage under $APP → run ordered install args) and + // exec the staged path, not an assumed-installed command. See backend/stage.go. + appDir := os.Getenv("APP") + if *manifestPath != "" { + appDir = filepath.Dir(*manifestPath) + } + stagedCmd, err := backend.StageAssets(appDir) + if err != nil { + log.Fatalf("{{.BinaryName}}: install assets: %v", err) + } + base := {{printf "%#v" .Backend.Command}} + if stagedCmd != "" { + base[0] = stagedCmd + } + runner := backend.NewRunner(base{{range .Backend.EnvPassthrough}}, {{printf "%q" .}}{{end}}) {{- else}} runner := backend.NewRunner({{printf "%#v" .Backend.Command}}{{range .Backend.EnvPassthrough}}, {{printf "%q" .}}{{end}}) +{{- end}} {{- end}} d := ipc.NewDispatcher() diff --git a/internal/scaffold/templates/manifest.json.tmpl b/internal/scaffold/templates/manifest.json.tmpl index 860a14c..0a1539d 100644 --- a/internal/scaffold/templates/manifest.json.tmpl +++ b/internal/scaffold/templates/manifest.json.tmpl @@ -13,9 +13,6 @@ {{- end}} "{{.Namespace}}.help" ], -{{/* TODO(native-apps): emit an "assets" array (per-OS/arch download url + sha256 - + exec_path) for native/CLI apps; fold asset shas into the signed payload. - See docs/NATIVE-APPS.md. http (translation-only) apps need no assets. */}} "grants": [ {"cap": "fs.read", "target": "$APP/config.json"}, {{- if .Backend.NeedsSecrets}} @@ -30,6 +27,14 @@ {{- end}} {{- if eq .Backend.Type "cli"}} {"cap": "proc.exec", "target": "{{index .Backend.Command 0}}"}, +{{- end}} +{{- if .HasAssets}} + {"cap": "fs.read", "target": "$APP/install.json"}, + {"cap": "fs.write", "target": "$APP"}, +{{- range .AssetHosts}} + {"cap": "net.dial", "target": "{{.}}", + "if": {"kind": "rate", "params": {"per": "min", "limit": {{$.Grants.RatePerMin}}}}}, +{{- end}} {{- end}} {"cap": "audit.log", "target": "*"} {{- if .Backend.X402}}, diff --git a/internal/scaffold/templates/stage.go.tmpl b/internal/scaffold/templates/stage.go.tmpl new file mode 100644 index 0000000..4f0451b --- /dev/null +++ b/internal/scaffold/templates/stage.go.tmpl @@ -0,0 +1,314 @@ +// Asset staging for {{.ID}} — native binary delivery from the Pilot R2 artifact +// registry. GENERATED by pilot-app (only for cli apps that ship assets); edit +// pilot.app.yaml and re-generate. +// +// At startup the adapter calls StageAssets($APP). It reads $APP/install.json +// (shipped in the bundle), selects the asset(s) matching this host's os/arch, +// and for each — in ascending install order — fetches it from its R2 URL, +// verifies its sha256 against the (tamper-pinned) install spec, stages it under +// $APP (a single file at exec_path, or a tar.gz extracted in place), and runs +// any install args. The fronted command then execs the staged exec_path instead +// of an assumed-installed binary. +// +// Integrity: each asset's sha256 is checked after download; the whole bundle +// tarball is itself sha-pinned in the catalogue, so install.json (and thus the +// expected shas) cannot be altered without failing the install-time check. +package backend + +import ( + "bufio" + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "sort" + "strings" + "time" +) + +// fetchTimeout bounds a single asset download so a stuck registry can't hang the +// adapter's startup indefinitely. +const fetchTimeout = 10 * time.Minute + +// installStepTimeout bounds one post-stage install command (e.g. a one-time +// " init" step). +const installStepTimeout = 2 * time.Minute + +// maxAssetBytes caps a single download / extracted file so a malicious or +// corrupt artifact can't fill the disk. +const maxAssetBytes = 2 << 30 // 2 GiB + +type installSpec struct { + Schema int `json:"schema"` + App string `json:"app"` + Version string `json:"version"` + Command string `json:"command"` + Assets []installAsset `json:"assets"` +} + +type installAsset struct { + Role string `json:"role"` + OS string `json:"os"` + Arch string `json:"arch"` + URL string `json:"url"` + SHA256 string `json:"sha256"` + Unpack string `json:"unpack"` + ExecPath string `json:"exec_path"` + Order int `json:"order"` + Args []string `json:"args"` +} + +// StageAssets materializes the registry assets for this host and returns the +// absolute path of the staged command binary (the asset whose exec_path matches +// install.json's "command"). When there is no install.json the app ships no +// assets, so it returns ("", nil) and the caller keeps the command as-is. +func StageAssets(appDir string) (string, error) { + raw, err := os.ReadFile(filepath.Join(appDir, "install.json")) + if errors.Is(err, os.ErrNotExist) { + return "", nil + } + if err != nil { + return "", fmt.Errorf("stage: read install.json: %w", err) + } + var spec installSpec + if err := json.Unmarshal(raw, &spec); err != nil { + return "", fmt.Errorf("stage: parse install.json: %w", err) + } + + var host []installAsset + for _, a := range spec.Assets { + if a.OS == runtime.GOOS && a.Arch == runtime.GOARCH { + host = append(host, a) + } + } + if len(host) == 0 { + return "", fmt.Errorf("stage: no asset for %s/%s; available: %s", runtime.GOOS, runtime.GOARCH, availablePlatforms(spec.Assets)) + } + sort.SliceStable(host, func(i, j int) bool { return host[i].Order < host[j].Order }) + + var cmdPath string + for _, a := range host { + execPath, err := stageOne(appDir, a) + if err != nil { + return "", err + } + if base := path.Base(a.ExecPath); base == spec.Command || a.ExecPath == spec.Command { + cmdPath = execPath + } + if len(a.Args) > 0 { + if err := runInstallStep(execPath, a.Args); err != nil { + return "", fmt.Errorf("stage: install step for %q failed: %w", a.ExecPath, err) + } + } + } + return cmdPath, nil +} + +// stageOne ensures the asset is materialized under appDir and returns the +// absolute exec path. A single-file asset is written to $APP/exec_path; a +// tar.gz asset is extracted in place and exec_path names a file inside the +// extracted tree. Staging is idempotent: a sha-stamped marker skips re-work on +// re-spawn. +func stageOne(appDir string, a installAsset) (string, error) { + execAbs := filepath.Join(appDir, filepath.FromSlash(a.ExecPath)) + marker := filepath.Join(appDir, ".staged", a.SHA256) + if _, err := os.Stat(marker); err == nil { + if _, err := os.Stat(execAbs); err == nil { + return execAbs, nil // already staged + verified + } + } + + body, err := download(a.URL, a.SHA256) + if err != nil { + return "", err + } + defer os.Remove(body) + + switch a.Unpack { + case "tar.gz": + if err := extractTarGz(body, appDir); err != nil { + return "", fmt.Errorf("stage: extract %q: %w", a.ExecPath, err) + } + default: + if err := installFile(body, execAbs, a.Role); err != nil { + return "", err + } + } + if _, err := os.Stat(execAbs); err != nil { + return "", fmt.Errorf("stage: exec_path %q missing after staging %s: %w", a.ExecPath, a.URL, err) + } + if err := os.MkdirAll(filepath.Dir(marker), 0o755); err == nil { + _ = os.WriteFile(marker, nil, 0o644) + } + return execAbs, nil +} + +// download fetches url to a temp file, verifying its sha256 streams-as-it-goes. +// Returns the temp file path (caller removes it). A mismatch is fatal so a +// tampered or wrong artifact is never installed. +func download(url, wantSHA string) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), fetchTimeout) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", fmt.Errorf("stage: request %s: %w", url, err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("stage: fetch %s: %w", url, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("stage: fetch %s: HTTP %d", url, resp.StatusCode) + } + + tmp, err := os.CreateTemp("", "pilot-asset-*") + if err != nil { + return "", fmt.Errorf("stage: temp file: %w", err) + } + h := sha256.New() + if _, err := io.Copy(io.MultiWriter(tmp, h), io.LimitReader(resp.Body, maxAssetBytes)); err != nil { + tmp.Close() + os.Remove(tmp.Name()) + return "", fmt.Errorf("stage: download %s: %w", url, err) + } + tmp.Close() + if got := hex.EncodeToString(h.Sum(nil)); got != wantSHA { + os.Remove(tmp.Name()) + return "", fmt.Errorf("stage: sha256 mismatch for %s: want %s, got %s (refusing to install tampered or wrong artifact)", url, wantSHA, got) + } + return tmp.Name(), nil +} + +// installFile atomically moves a downloaded single-file asset into place under +// $APP, chmod 0755 for a binary (default) or 0644 for data. +func installFile(src, dest, role string) error { + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return fmt.Errorf("stage: mkdir for %q: %w", dest, err) + } + mode := os.FileMode(0o755) + if role == "data" { + mode = 0o644 + } + if err := os.Chmod(src, mode); err != nil { + return fmt.Errorf("stage: chmod %q: %w", dest, err) + } + if err := os.Rename(src, dest); err != nil { + // Rename can fail across filesystems (temp dir vs $APP); fall back to copy. + if err := copyFile(src, dest, mode); err != nil { + return fmt.Errorf("stage: install %q: %w", dest, err) + } + } + return nil +} + +// extractTarGz unpacks a gzipped tar under dir using the host's tar(1). The +// system tar is used deliberately: real-world CLI bundles use GNU/sparse tar +// features (e.g. sparse disk images) that Go's archive/tar rejects, while tar(1) +// is present on every linux/darwin host and handles them. Before extracting, +// every entry name is scanned and any absolute path or "../" traversal is +// rejected (zip-slip defence), since tar's own stripping is not relied upon. +func extractTarGz(archive, dir string) error { + if err := assertSafeArchive(archive); err != nil { + return err + } + if err := os.MkdirAll(dir, 0o755); err != nil { + return err + } + ctx, cancel := context.WithTimeout(context.Background(), installStepTimeout) + defer cancel() + cmd := exec.CommandContext(ctx, "tar", "-xzf", archive, "-C", dir) + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("tar -xzf: %w: %s", err, strings.TrimSpace(string(out))) + } + return nil +} + +// assertSafeArchive lists the archive (tar -tzf) and rejects any member that is +// an absolute path or escapes the extraction root via "..". +func assertSafeArchive(archive string) error { + ctx, cancel := context.WithTimeout(context.Background(), installStepTimeout) + defer cancel() + cmd := exec.CommandContext(ctx, "tar", "-tzf", archive) + out, err := cmd.Output() + if err != nil { + return fmt.Errorf("tar -tzf (list): %w", err) + } + sc := bufio.NewScanner(strings.NewReader(string(out))) + sc.Buffer(make([]byte, 0, 64*1024), 1<<20) + for sc.Scan() { + name := strings.TrimSpace(sc.Text()) + if name == "" { + continue + } + if strings.HasPrefix(name, "/") { + return fmt.Errorf("unsafe absolute path %q in archive", name) + } + for _, seg := range strings.Split(name, "/") { + if seg == ".." { + return fmt.Errorf("unsafe traversal path %q in archive", name) + } + } + } + return sc.Err() +} + +func copyFile(src, dest string, mode os.FileMode) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + out, err := os.OpenFile(dest, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode) + if err != nil { + return err + } + if _, err := io.Copy(out, in); err != nil { + out.Close() + return err + } + return out.Close() +} + +// runInstallStep runs a one-time post-stage command (the publisher's optional +// install args), e.g. " init". A non-zero exit fails the install so a +// broken setup never silently serves. +func runInstallStep(execPath string, args []string) error { + ctx, cancel := context.WithTimeout(context.Background(), installStepTimeout) + defer cancel() + cmd := exec.CommandContext(ctx, execPath, args...) + cmd.Env = os.Environ() + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("%s %s: %w: %s", execPath, strings.Join(args, " "), err, strings.TrimSpace(string(out))) + } + return nil +} + +// availablePlatforms renders the os/arch tuples an app DOES ship, for a clear +// "no binary for your platform" error an agent can act on. +func availablePlatforms(assets []installAsset) string { + seen := map[string]bool{} + var out []string + for _, a := range assets { + k := a.OS + "/" + a.Arch + if !seen[k] { + seen[k] = true + out = append(out, k) + } + } + sort.Strings(out) + if len(out) == 0 { + return "none" + } + return strings.Join(out, ", ") +} diff --git a/scripts/e2e-smolvm.sh b/scripts/e2e-smolvm.sh new file mode 100755 index 0000000..39449f5 --- /dev/null +++ b/scripts/e2e-smolvm.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +# e2e-smolvm.sh — end-to-end proof of native-app delivery from the Pilot R2 +# artifact registry, using a real, complex CLI: smolvm (smol-machines/smolvm), a +# microVM runtime shipped as a tar.gz (wrapper script + binary + libs + images). +# +# Flow (mirrors what a publisher + a host actually do): +# 1. download smolvm's release tarball for THIS host (publisher has the artifact) +# 2. sha256 it and upload it to the R2 artifact registry (dev bucket) (the publish form's Artifacts step) +# 3. run the scaffold runtime e2e: build the generated cli adapter, (pilotctl appstore install + call) +# let it fetch+verify+extract the artifact from R2 and exec it +# +# Requirements: bash, curl/tar, aws CLI, go, and R2 S3 credentials in the env: +# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, R2_ENDPOINT, R2_BUCKET, R2_PUBLIC_BASE +# Sensible defaults target the pilot-artifacts-dev bucket. +set -euo pipefail + +SMOLVM_VERSION="${SMOLVM_VERSION:-1.2.0}" +R2_ENDPOINT="${R2_ENDPOINT:?set R2_ENDPOINT to your account S3 endpoint}" +R2_BUCKET="${R2_BUCKET:-pilot-artifacts-dev}" +R2_PUBLIC_BASE="${R2_PUBLIC_BASE:-https://pub-2328865fa11041b8a5efba00b940ec14.r2.dev}" +export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-auto}" + +if [[ -z "${AWS_ACCESS_KEY_ID:-}" || -z "${AWS_SECRET_ACCESS_KEY:-}" ]]; then + echo "error: set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY (R2 S3 keys) in the env" >&2 + exit 2 +fi + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +work="$(mktemp -d)" +trap 'rm -rf "$work"' EXIT + +# --- 1. host platform → smolvm asset name + pilot os/arch tuple --------------- +os="$(uname -s | tr '[:upper:]' '[:lower:]')" # darwin | linux +machine="$(uname -m)" +case "$machine" in + arm64|aarch64) smol_arch=arm64; pilot_arch=arm64 ;; + x86_64|amd64) smol_arch=x86_64; pilot_arch=amd64 ;; + *) echo "unsupported arch: $machine" >&2; exit 2 ;; +esac +dirname="smolvm-${SMOLVM_VERSION}-${os}-${smol_arch}" +tarball="${dirname}.tar.gz" +echo "==> host ${os}/${pilot_arch}; smolvm asset ${tarball}" + +# --- 2. fetch the release tarball --------------------------------------------- +echo "==> downloading smolvm ${SMOLVM_VERSION}" +if command -v gh >/dev/null 2>&1; then + gh release download "v${SMOLVM_VERSION}" --repo smol-machines/smolvm --pattern "$tarball" --dir "$work" --clobber +else + curl -fsSL "https://github.com/smol-machines/smolvm/releases/download/v${SMOLVM_VERSION}/${tarball}" -o "$work/$tarball" +fi + +sha="$(shasum -a 256 "$work/$tarball" | awk '{print $1}')" +echo "==> sha256=${sha}" + +# --- 3. upload to the R2 artifact registry (the Artifacts step) --------------- +key="io.pilot.smolvm/${SMOLVM_VERSION}/${os}-${pilot_arch}/${tarball}" +echo "==> uploading to s3://${R2_BUCKET}/${key}" +aws s3 cp "$work/$tarball" "s3://${R2_BUCKET}/${key}" --endpoint-url="$R2_ENDPOINT" >/dev/null +public_url="${R2_PUBLIC_BASE}/${key}" + +# verify the public URL serves the exact bytes we uploaded +echo "==> verifying public URL integrity" +got="$(curl -fsSL "$public_url" | shasum -a 256 | awk '{print $1}')" +[[ "$got" == "$sha" ]] || { echo "public URL sha mismatch: $got != $sha" >&2; exit 1; } +echo " ok: ${public_url}" + +# --- 4. run the install+call e2e against the live R2 object ------------------- +echo "==> running adapter delivery e2e (build → fetch from R2 → verify → extract → exec)" +cd "$repo_root" +PILOT_E2E_ASSET_URL="$public_url" \ +PILOT_E2E_ASSET_SHA256="$sha" \ +PILOT_E2E_ASSET_EXECPATH="${dirname}/smolvm" \ +PILOT_E2E_ASSET_CALLARG="--version" \ +PILOT_E2E_ASSET_EXPECT="$SMOLVM_VERSION" \ + go test ./internal/scaffold/ -run TestR2AssetDeliveryE2E -v -count=1 + +echo "==> e2e OK: smolvm ${SMOLVM_VERSION} delivered from R2 and executed via the pilot cli adapter"