From 123d8db819c79b960158397d36f48c9509961bc4 Mon Sep 17 00:00:00 2001
From: Gudge <gudge@microsoft.com>
Date: Fri, 19 Jun 2026 12:53:29 -0700
Subject: [PATCH] test(e2e): characterize executor run-to-completion behavior

This PR adds cross-platform executor characterization tests that lock in
the current run-to-completion behavior of the native one-shot executors
(mxc-exec-mac / lxc-exec / wxc-exec.exe) and wires them into CI, so the
upcoming unified SandboxBackend/Runner refactor can't silently change
existing dev/user flows.

Details

* New characterization suites in wxc_e2e_tests: Seatbelt (macOS) and
  Bubblewrap (Linux) cover exit code, stdout, env and working-directory
  handling, and timeout; ProcessContainer (Windows) covers exit
  code/stdout/timeout and is opt-in via MXC_E2E_HOST_PREPPED=1. New
  harness helpers locate/run the native executor with per-run env/cwd,
  gate on bwrap, and fix macOS target-triple discovery.
* CI wiring: the macOS job now builds/tests -p wxc_e2e_tests; the Linux
  job installs bubblewrap (and relaxes the Ubuntu 24.04 unprivileged
  user-namespace AppArmor clamp) and runs the suite. wxc_e2e_tests
  previously ran on neither platform.
* The Seatbelt env-inheritance (empty process.env) and launcher-cwd
  (empty process.cwd) tests are deliberate regression guards: the
  refactor's always-clear-env / always-rewrite-cwd behavior will turn
  them red.
* Not covered: stdin/TTY (the run-to-completion harness closes stdin, so
  the SIGTTIN regression needs a PTY harness) and Windows ProcessContainer
  on CI (needs a host-prepped lane). Both are follow-ups.

Tests

* CI green on macOS, Linux, and Windows: the macOS and Linux cargo-test
  jobs now execute and pass the new suites; clippy (-D warnings) and
  rustfmt --check are clean on aarch64-apple-darwin and
  x86_64-unknown-linux-gnu.
* First CI execution empirically confirmed the Bubblewrap
  run-to-completion tree-kill gap (a forked process survives the timeout)
  and that Seatbelt tree-kills correctly; assertions were tuned to current
  behavior accordingly.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .github/workflows/Build.Linux.Job.yml         |  21 ++
 .github/workflows/Build.MacOS.Job.yml         |   8 +-
 src/testing/wxc_e2e_tests/src/lib.rs          | 111 ++++++++
 .../tests/e2e_bubblewrap_characterization.rs  | 170 ++++++++++++
 .../e2e_processcontainer_characterization.rs  | 136 ++++++++++
 .../tests/e2e_seatbelt_characterization.rs    | 242 ++++++++++++++++++
 6 files changed, 686 insertions(+), 2 deletions(-)
 create mode 100644 src/testing/wxc_e2e_tests/tests/e2e_bubblewrap_characterization.rs
 create mode 100644 src/testing/wxc_e2e_tests/tests/e2e_processcontainer_characterization.rs
 create mode 100644 src/testing/wxc_e2e_tests/tests/e2e_seatbelt_characterization.rs

diff --git a/.github/workflows/Build.Linux.Job.yml b/.github/workflows/Build.Linux.Job.yml
index 422974c71..2c9a55b8c 100644
--- a/.github/workflows/Build.Linux.Job.yml
+++ b/.github/workflows/Build.Linux.Job.yml
@@ -54,6 +54,27 @@ jobs:
         run: cargo test --locked --release --target ${{ matrix.target }}
              --no-default-features --features hyperlight
 
+      # Bubblewrap is required to run the executor characterization tests in
+      # wxc_e2e_tests (they skip via has_bwrap() when it is absent). lxc-exec
+      # always includes the Bubblewrap backend (bwrap_common is a non-optional
+      # dependency), so the binary built above can drive it.
+      - name: Install Bubblewrap
+        working-directory: ${{ github.workspace }}
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y bubblewrap
+          # Ubuntu 24.04 runners restrict unprivileged user namespaces via
+          # AppArmor, which blocks `bwrap --unshare-user`. Relax it so the
+          # sandbox can start (no-op on kernels without this knob).
+          sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0 || true
+
+      # Runs the Bubblewrap executor characterization tests. lxc-exec was built
+      # into src/target/<triple>/release above, where find_binary() locates it.
+      - name: Test executor characterization (wxc_e2e_tests)
+        working-directory: src
+        run: cargo test --locked --release --target ${{ matrix.target }}
+             -p wxc_e2e_tests
+
       # linux_test_proxy is a separate workspace member, not a dep of lxc.
       - name: Build linux-test-proxy
         working-directory: src
diff --git a/.github/workflows/Build.MacOS.Job.yml b/.github/workflows/Build.MacOS.Job.yml
index d5e510a5c..d7309e2f9 100644
--- a/.github/workflows/Build.MacOS.Job.yml
+++ b/.github/workflows/Build.MacOS.Job.yml
@@ -39,11 +39,15 @@ jobs:
       # workspace has Windows-only crates that won't compile here.
       - name: Build
         run: cargo build --locked --release --target aarch64-apple-darwin
-             -p mxc_darwin -p seatbelt_common -p wxc_common
+             -p mxc_darwin -p seatbelt_common -p wxc_common -p wxc_e2e_tests
 
+      # wxc_e2e_tests includes the Seatbelt executor characterization tests.
+      # mxc-exec-mac (built above via -p mxc_darwin) is what they drive; the
+      # tests skip via has_platform_exec() if it is missing. sandbox-exec needs
+      # no elevation, so they run in this standard macOS job.
       - name: Test
         run: cargo test --locked --release --target aarch64-apple-darwin
-             -p mxc_darwin -p seatbelt_common -p wxc_common
+             -p mxc_darwin -p seatbelt_common -p wxc_common -p wxc_e2e_tests
 
       - name: Upload binaries
         uses: actions/upload-artifact@v4
diff --git a/src/testing/wxc_e2e_tests/src/lib.rs b/src/testing/wxc_e2e_tests/src/lib.rs
index fbd8afff7..0f390b247 100644
--- a/src/testing/wxc_e2e_tests/src/lib.rs
+++ b/src/testing/wxc_e2e_tests/src/lib.rs
@@ -66,6 +66,10 @@ fn current_triple() -> &'static str {
         "x86_64-unknown-linux-gnu"
     } else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
         "aarch64-unknown-linux-gnu"
+    } else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
+        "aarch64-apple-darwin"
+    } else if cfg!(all(target_os = "macos", target_arch = "x86_64")) {
+        "x86_64-apple-darwin"
     } else {
         ""
     }
@@ -482,6 +486,113 @@ pub fn run_wxc_config_value(
     run_executable(label, &exe, args)
 }
 
+// ---------------------------------------------------------------------------
+// Cross-platform executor characterization helpers
+//
+// These drive the *native* one-shot executor binary for the current OS
+// (`mxc-exec-mac` on macOS, `lxc-exec` on Linux, `wxc-exec.exe` on Windows)
+// with an in-memory config, optionally setting the child process's environment
+// and working directory. They exist to lock in the current run-to-completion
+// behavior (exit code, stdout, env/cwd inheritance, timeout) before the
+// unified `SandboxBackend`/`Runner` refactor lands.
+// ---------------------------------------------------------------------------
+
+/// The native one-shot executor binary name for the current platform.
+pub fn platform_exec_binary_name() -> &'static str {
+    if cfg!(target_os = "windows") {
+        "wxc-exec.exe"
+    } else if cfg!(target_os = "macos") {
+        "mxc-exec-mac"
+    } else {
+        "lxc-exec"
+    }
+}
+
+/// Locate the native one-shot executor binary for the current platform.
+pub fn find_platform_exec() -> Option<PathBuf> {
+    find_binary(platform_exec_binary_name())
+}
+
+/// Whether the native executor binary for this platform is available.
+pub fn has_platform_exec() -> bool {
+    match find_platform_exec() {
+        Some(p) => {
+            println!("Using {} at {}", platform_exec_binary_name(), p.display());
+            true
+        }
+        None => {
+            println!(
+                "SKIPPED: {} not found — build the native executor first",
+                platform_exec_binary_name()
+            );
+            false
+        }
+    }
+}
+
+/// Whether `bwrap` (Bubblewrap) is installed and runnable on this Linux host.
+/// Bubblewrap characterization tests skip cleanly when it is absent (e.g. a CI
+/// runner without `bubblewrap` installed).
+pub fn has_bwrap() -> bool {
+    let available = Command::new("bwrap")
+        .arg("--version")
+        .output()
+        .map(|o| o.status.success())
+        .unwrap_or(false);
+    if !available {
+        println!("SKIPPED: bwrap not found on PATH — install `bubblewrap` to run these tests");
+    }
+    available
+}
+
+/// Opt-in switch for the Windows ProcessContainer characterization tests.
+///
+/// AppContainer/BaseContainer execution requires an elevated, host-prepped
+/// Windows host (see `docs/host-prep.md`). Standard CI runners are NOT capable,
+/// so these tests are skipped unless a host-prepped lane explicitly sets
+/// `MXC_E2E_HOST_PREPPED=1`. This keeps them from ever red-failing on incapable
+/// CI while still being runnable on a prepared box.
+pub fn host_prepped_optin() -> bool {
+    let enabled = std::env::var("MXC_E2E_HOST_PREPPED").as_deref() == Ok("1");
+    if !enabled {
+        println!(
+            "SKIPPED: ProcessContainer characterization requires a host-prepped Windows host; \
+             set MXC_E2E_HOST_PREPPED=1 on a prepared lane to enable"
+        );
+    }
+    enabled
+}
+
+/// Run the current platform's native executor binary with an in-memory config
+/// value (serialised + base64-encoded via `--config-base64`), optionally
+/// setting environment variables and a working directory on the *executor*
+/// process. `extra_env`/`cwd` are how the inheritance characterization tests
+/// observe whether the sandboxed child picks up the launcher's env/cwd.
+pub fn run_platform_config_value(
+    label: &str,
+    config: &serde_json::Value,
+    extra_env: &[(&str, &str)],
+    cwd: Option<&Path>,
+) -> CommandResult {
+    let exe = find_platform_exec().expect("native executor binary should be available");
+    let encoded = STANDARD.encode(config.to_string().as_bytes());
+
+    let start = Instant::now();
+    let mut cmd = Command::new(&exe);
+    cmd.arg("--config-base64").arg(encoded);
+    for (key, value) in extra_env {
+        cmd.env(key, value);
+    }
+    if let Some(dir) = cwd {
+        cmd.current_dir(dir);
+    }
+    let output = cmd
+        .output()
+        .unwrap_or_else(|error| panic!("failed to execute {label}: {error}"));
+
+    command_result(label, output, start.elapsed().as_millis())
+}
+
 /// Run `wxc-test-driver.exe` against a directory or a single config file.
 pub fn run_test_driver(target: &Path, extra_args: &[&str]) -> CommandResult {
     let exe = find_binary("wxc-test-driver.exe").expect("wxc-test-driver.exe should be available");
diff --git a/src/testing/wxc_e2e_tests/tests/e2e_bubblewrap_characterization.rs b/src/testing/wxc_e2e_tests/tests/e2e_bubblewrap_characterization.rs
new file mode 100644
index 000000000..c76ba2d23
--- /dev/null
+++ b/src/testing/wxc_e2e_tests/tests/e2e_bubblewrap_characterization.rs
@@ -0,0 +1,170 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Bubblewrap (Linux) executor **characterization** tests.
+//!
+//! These lock in the *current* run-to-completion behavior of the `lxc-exec`
+//! Bubblewrap path before the unified `SandboxBackend`/`Runner` refactor lands.
+//! They assert what the code does **today**.
+//!
+//! Unlike Seatbelt, Bubblewrap already `--clearenv`s unconditionally and runs
+//! the child with `stdin` closed, so the env/stdin contracts pinned here are
+//! ones the refactor should *preserve*. (The stdin/`SIGTTIN` regression that the
+//! refactor introduces is only observable under a real PTY, which the
+//! `.output()`-based harness cannot provide — that needs a separate PTY harness
+//! and is tracked as a follow-up.)
+//!
+//! They run in the existing Linux CI job (`cargo test`) **only when `bwrap` is
+//! installed** — `has_bwrap()` skips them cleanly otherwise. Each test also
+//! skips if `lxc-exec` has not been built.
+#![cfg(target_os = "linux")]
+
+use serde_json::json;
+use wxc_e2e_tests::{has_bwrap, has_platform_exec, run_platform_config_value};
+
+const SCHEMA_VERSION: &str = "0.7.0-alpha";
+
+/// Whether the Bubblewrap characterization prerequisites are present.
+fn ready() -> bool {
+    has_platform_exec() && has_bwrap()
+}
+
+/// Build a one-shot config that omits `containment` so the binary selects its
+/// OS-native backend (Bubblewrap on Linux).
+fn config(label: &str, command_line: &str) -> serde_json::Value {
+    json!({
+        "version": SCHEMA_VERSION,
+        "containerId": format!("char-bwrap-{label}"),
+        "process": { "commandLine": command_line }
+    })
+}
+
+#[test]
+fn bubblewrap_propagates_exit_code() {
+    if !ready() {
+        return;
+    }
+    let result =
+        run_platform_config_value("bwrap exit code", &config("exit-code", "exit 7"), &[], None);
+    assert_eq!(
+        result.code,
+        Some(7),
+        "expected exit 7, got {:?}\n--- stderr ---\n{}",
+        result.code,
+        result.stderr
+    );
+}
+
+#[test]
+fn bubblewrap_streams_stdout() {
+    if !ready() {
+        return;
+    }
+    let result = run_platform_config_value(
+        "bwrap stdout",
+        &config("stdout", "echo CHAR_BWRAP_STDOUT_71c4d"),
+        &[],
+        None,
+    );
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert!(
+        result.combined_output().contains("CHAR_BWRAP_STDOUT_71c4d"),
+        "stdout missing sentinel:\n{}",
+        result.combined_output()
+    );
+}
+
+/// CHARACTERIZES CURRENT BEHAVIOR.
+///
+/// Bubblewrap runs with `--clearenv`, so the sandboxed child does *not* inherit
+/// the launcher's environment even when `process.env` is empty. The refactor
+/// should preserve this; if it ever turns RED the env model has drifted.
+#[test]
+fn bubblewrap_clears_host_env_by_default() {
+    if !ready() {
+        return;
+    }
+    let marker = "CHAR_BWRAP_SHOULD_NOT_APPEAR_8a02f";
+    let result = run_platform_config_value(
+        "bwrap env clear",
+        &config("env-clear", "printf 'MARKER=[%s]\\n' \"$MXC_CHAR_MARKER\""),
+        &[("MXC_CHAR_MARKER", marker)],
+        None,
+    );
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    let out = result.combined_output();
+    assert!(
+        out.contains("MARKER=[]"),
+        "expected cleared env (MARKER=[]); current Bubblewrap --clearenv behavior. Output:\n{out}"
+    );
+    assert!(
+        !out.contains(marker),
+        "host env marker leaked into the sandbox. Output:\n{out}"
+    );
+}
+
+/// Locks in that an explicitly requested `process.env` reaches the child.
+#[test]
+fn bubblewrap_applies_requested_env() {
+    if !ready() {
+        return;
+    }
+    let mut cfg = config("env-set", "printf 'SET=[%s]\\n' \"$MXC_CHAR_SET\"");
+    cfg["process"]["env"] = json!(["MXC_CHAR_SET=from_config_c93b"]);
+    let result = run_platform_config_value("bwrap env set", &cfg, &[], None);
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert!(
+        result.combined_output().contains("SET=[from_config_c93b]"),
+        "expected requested env var to reach the child. Output:\n{}",
+        result.combined_output()
+    );
+}
+
+/// Locks in that an explicit `process.cwd` is honored (Bubblewrap emits
+/// `--chdir` for a non-empty working directory). `/` always exists inside the
+/// sandbox, so it is a stable target.
+#[test]
+fn bubblewrap_honors_explicit_process_cwd() {
+    if !ready() {
+        return;
+    }
+    let mut cfg = config("cwd-explicit", "pwd -P");
+    cfg["process"]["cwd"] = json!("/");
+    let result = run_platform_config_value("bwrap cwd explicit", &cfg, &[], None);
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert_eq!(
+        result.stdout.trim(),
+        "/",
+        "expected child cwd to honor explicit process.cwd=/"
+    );
+}
+
+/// Characterizes that a `process.timeout` shorter than the workload is
+/// enforced and surfaces as a non-zero exit.
+///
+/// NOTE: on current `main`, the Bubblewrap run-to-completion timeout kills only
+/// the `bwrap` parent (`child.kill()`), so a forked descendant can survive,
+/// keep the stdout pipe open, and have its post-timeout output captured (the
+/// call can even block until the descendant exits). That tree-kill behavior is
+/// something the unified `Runner` refactor changes, so this test deliberately
+/// does NOT assert the absence of post-timeout output or a wall-clock bound —
+/// only that the timeout fires and fails the run.
+#[test]
+fn bubblewrap_timeout_is_enforced() {
+    if !ready() {
+        return;
+    }
+    let mut cfg = config("timeout", "echo CHAR_BEFORE; /bin/sleep 5; echo CHAR_AFTER");
+    cfg["process"]["timeout"] = json!(1500);
+    let result = run_platform_config_value("bwrap timeout", &cfg, &[], None);
+    let out = result.combined_output();
+    assert!(
+        out.contains("CHAR_BEFORE"),
+        "expected pre-timeout output. Output:\n{out}"
+    );
+    assert_ne!(
+        result.code,
+        Some(0),
+        "a timed-out run should exit non-zero. Output:\n{out}"
+    );
+}
diff --git a/src/testing/wxc_e2e_tests/tests/e2e_processcontainer_characterization.rs b/src/testing/wxc_e2e_tests/tests/e2e_processcontainer_characterization.rs
new file mode 100644
index 000000000..d59d7fded
--- /dev/null
+++ b/src/testing/wxc_e2e_tests/tests/e2e_processcontainer_characterization.rs
@@ -0,0 +1,136 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Windows ProcessContainer (AppContainer / BaseContainer) executor
+//! **characterization** tests.
+//!
+//! These lock in the *current* run-to-completion behavior of the `wxc-exec.exe`
+//! ProcessContainer path before the unified `SandboxBackend`/`Runner` refactor
+//! lands. They assert what the code does **today**.
+//!
+//! ProcessContainer execution requires an elevated, host-prepped Windows host
+//! (see `docs/host-prep.md`). Standard CI runners are **not** capable, so these
+//! tests skip unless a prepared lane sets `MXC_E2E_HOST_PREPPED=1`
+//! (`host_prepped_optin()`), and additionally skip if `wxc-exec.exe` has not
+//! been built or the host is missing process prerequisites. They therefore
+//! never red-fail on incapable CI, but lock in behavior on a prepared box.
+//!
+//! Scope note: env/cwd inheritance is intentionally not characterized here —
+//! the AppContainer "clean environment" model differs from the Unix backends,
+//! and the PR's env/cwd regressions were Seatbelt-specific. These tests cover
+//! the universally-meaningful contracts: exit-code propagation, stdout capture,
+//! and timeout enforcement.
+#![cfg(target_os = "windows")]
+
+use serde_json::json;
+use wxc_e2e_tests::{
+    has_platform_exec, host_prepped_optin, run_platform_config_value, CommandResult,
+};
+
+const SCHEMA_VERSION: &str = "0.7.0-alpha";
+
+/// Whether the ProcessContainer characterization prerequisites are present.
+fn ready() -> bool {
+    has_platform_exec() && host_prepped_optin()
+}
+
+/// Build a one-shot config that omits `containment` so the binary selects its
+/// OS-native backend (ProcessContainer on Windows).
+fn config(label: &str, command_line: &str) -> serde_json::Value {
+    json!({
+        "version": SCHEMA_VERSION,
+        "containerId": format!("char-pc-{label}"),
+        "process": { "commandLine": command_line }
+    })
+}
+
+/// Skip (rather than fail) when the local host cannot launch a sandboxed
+/// process despite the opt-in being set (e.g. missing runtime prerequisites).
+fn skip_if_missing_prereq(result: &CommandResult) -> bool {
+    if result.is_missing_process_prerequisite() {
+        println!(
+            "SKIPPED: {} — host missing process prerequisites",
+            result.label
+        );
+        return true;
+    }
+    false
+}
+
+#[test]
+fn processcontainer_propagates_exit_code() {
+    if !ready() {
+        return;
+    }
+    let result = run_platform_config_value(
+        "processcontainer exit code",
+        &config("exit-code", "cmd /c exit 7"),
+        &[],
+        None,
+    );
+    if skip_if_missing_prereq(&result) {
+        return;
+    }
+    assert_eq!(
+        result.code,
+        Some(7),
+        "expected exit 7, got {:?}\n--- stderr ---\n{}",
+        result.code,
+        result.stderr
+    );
+}
+
+#[test]
+fn processcontainer_streams_stdout() {
+    if !ready() {
+        return;
+    }
+    let result = run_platform_config_value(
+        "processcontainer stdout",
+        &config("stdout", "cmd /c echo CHAR_PC_STDOUT_5d72e"),
+        &[],
+        None,
+    );
+    if skip_if_missing_prereq(&result) {
+        return;
+    }
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert!(
+        result.combined_output().contains("CHAR_PC_STDOUT_5d72e"),
+        "stdout missing sentinel:\n{}",
+        result.combined_output()
+    );
+}
+
+/// Characterizes that a `process.timeout` shorter than the workload kills the
+/// child mid-run.
+#[test]
+fn processcontainer_timeout_kills_before_completion() {
+    if !ready() {
+        return;
+    }
+    let mut cfg = config(
+        "timeout",
+        "cmd /c \"echo CHAR_BEFORE & ping -n 8 127.0.0.1 >nul & echo CHAR_AFTER\"",
+    );
+    cfg["process"]["timeout"] = json!(1500);
+    let result = run_platform_config_value("processcontainer timeout", &cfg, &[], None);
+    if skip_if_missing_prereq(&result) {
+        return;
+    }
+    let out = result.combined_output();
+    assert!(
+        out.contains("CHAR_BEFORE"),
+        "expected pre-timeout output. Output:\n{out}"
+    );
+    assert!(
+        !out.contains("CHAR_AFTER"),
+        "workload should have been killed before completing. Output:\n{out}"
+    );
+    assert_ne!(result.code, Some(0), "timed-out run should not exit 0");
+    assert!(
+        result.wall_time_ms < 6000,
+        "timeout should fire well before the workload finishes; took {}ms",
+        result.wall_time_ms
+    );
+}
diff --git a/src/testing/wxc_e2e_tests/tests/e2e_seatbelt_characterization.rs b/src/testing/wxc_e2e_tests/tests/e2e_seatbelt_characterization.rs
new file mode 100644
index 000000000..4ee0b5cff
--- /dev/null
+++ b/src/testing/wxc_e2e_tests/tests/e2e_seatbelt_characterization.rs
@@ -0,0 +1,242 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+//! Seatbelt (macOS) executor **characterization** tests.
+//!
+//! These lock in the *current* run-to-completion behavior of the `mxc-exec-mac`
+//! executor before the unified `SandboxBackend`/`Runner` refactor (the risky
+//! part of the `mxc` library work) lands. They assert what the code does
+//! **today**, not what it ideally should do.
+//!
+//! Two of them — `inherits_host_env_when_process_env_empty` and
+//! `runs_in_launcher_cwd_when_process_cwd_empty` — pin behaviors that the
+//! unification was observed to change (it makes Seatbelt unconditionally
+//! `env_clear()` and rewrite the working directory). If a future refactor turns
+//! these RED, that is the signal to confirm the change is intentional and
+//! documented as a breaking change — not an accident.
+//!
+//! They run in the existing macOS CI job (`cargo test --target
+//! aarch64-apple-darwin`) with no extra infrastructure: `sandbox-exec` needs no
+//! elevation. Each test skips cleanly if `mxc-exec-mac` has not been built.
+#![cfg(target_os = "macos")]
+
+use std::fs;
+use std::path::PathBuf;
+
+use serde_json::json;
+use wxc_e2e_tests::{has_platform_exec, run_platform_config_value};
+
+const SCHEMA_VERSION: &str = "0.7.0-alpha";
+
+/// Build a one-shot config that omits `containment` so the binary selects its
+/// OS-native backend (Seatbelt on macOS). `cwd`/`env`/`timeout` are optional.
+fn config(label: &str, command_line: &str) -> serde_json::Value {
+    json!({
+        "version": SCHEMA_VERSION,
+        "containerId": format!("char-seatbelt-{label}"),
+        "process": { "commandLine": command_line }
+    })
+}
+
+/// Create a unique temporary directory for cwd characterization.
+fn unique_tempdir(tag: &str) -> PathBuf {
+    let nanos = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap()
+        .as_nanos();
+    let dir = std::env::temp_dir().join(format!("mxc-char-{tag}-{nanos}"));
+    fs::create_dir_all(&dir).expect("create temp dir");
+    dir
+}
+
+#[test]
+fn seatbelt_propagates_exit_code() {
+    if !has_platform_exec() {
+        return;
+    }
+    let result = run_platform_config_value(
+        "seatbelt exit code",
+        &config("exit-code", "exit 7"),
+        &[],
+        None,
+    );
+    assert_eq!(
+        result.code,
+        Some(7),
+        "expected exit 7, got {:?}\n--- stderr ---\n{}",
+        result.code,
+        result.stderr
+    );
+}
+
+#[test]
+fn seatbelt_streams_stdout() {
+    if !has_platform_exec() {
+        return;
+    }
+    let result = run_platform_config_value(
+        "seatbelt stdout",
+        &config("stdout", "echo CHAR_SEATBELT_STDOUT_9f31a"),
+        &[],
+        None,
+    );
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert!(
+        result
+            .combined_output()
+            .contains("CHAR_SEATBELT_STDOUT_9f31a"),
+        "stdout missing sentinel:\n{}",
+        result.combined_output()
+    );
+}
+
+/// CHARACTERIZES CURRENT BEHAVIOR (regression guard).
+///
+/// With an empty `process.env`, the Seatbelt exec path does *not* clear the
+/// environment today, so the sandboxed child inherits the launcher's env. The
+/// unification refactor makes Seatbelt always `env_clear()` — which will turn
+/// this test RED. That is the intended early-warning signal.
+#[test]
+fn seatbelt_inherits_host_env_when_process_env_empty() {
+    if !has_platform_exec() {
+        return;
+    }
+    let marker = "CHAR_SEATBELT_ENV_INHERIT_4b7c2";
+    let result = run_platform_config_value(
+        "seatbelt env inherit",
+        &config(
+            "env-inherit",
+            "printf 'MARKER=[%s]\\n' \"$MXC_CHAR_MARKER\"",
+        ),
+        &[("MXC_CHAR_MARKER", marker)],
+        None,
+    );
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert!(
+        result
+            .combined_output()
+            .contains(&format!("MARKER=[{marker}]")),
+        "expected the child to inherit MXC_CHAR_MARKER from the launcher \
+         (current Seatbelt behavior with empty process.env). Output:\n{}",
+        result.combined_output()
+    );
+}
+
+/// Locks in that an explicitly requested `process.env` is honored (and, by
+/// implication, that the env is scrubbed to exactly the request when set).
+#[test]
+fn seatbelt_applies_requested_env() {
+    if !has_platform_exec() {
+        return;
+    }
+    let mut cfg = config("env-set", "printf 'SET=[%s]\\n' \"$MXC_CHAR_SET\"");
+    cfg["process"]["env"] = json!(["MXC_CHAR_SET=from_config_e21a"]);
+    let result = run_platform_config_value("seatbelt env set", &cfg, &[], None);
+    assert_eq!(result.code, Some(0), "stderr: {}", result.stderr);
+    assert!(
+        result.combined_output().contains("SET=[from_config_e21a]"),
+        "expected requested env var to reach the child. Output:\n{}",
+        result.combined_output()
+    );
+}
+
+/// CHARACTERIZES CURRENT BEHAVIOR (regression guard).
+///
+/// With an empty `process.cwd`, the Seatbelt exec path does *not* change
+/// directory today, so the sandboxed child runs in the launcher's working
+/// directory. The unification refactor rewrites cwd to a policy path or `/` —
+/// which will turn this test RED.
+///
+/// We observe the cwd by having the child create a file via a relative path
+/// (a shell redirection) and checking which directory it lands in — this
+/// avoids `pwd`/`realpath`, which the default Seatbelt profile denies for
+/// arbitrary temp paths. `write_dir` is a second writable policy path that is
+/// *not* the launcher cwd, so a refactor that rewrites cwd to a policy path
+/// would drop the probe there (or elsewhere) instead of in `launch_dir`.
+#[test]
+fn seatbelt_runs_in_launcher_cwd_when_process_cwd_empty() {
+    if !has_platform_exec() {
+        return;
+    }
+    let write_dir = fs::canonicalize(unique_tempdir("cwd-write")).expect("canonicalize");
+    let launch_dir = fs::canonicalize(unique_tempdir("cwd-launch")).expect("canonicalize");
+    let probe = "char_cwd_inherit_probe.txt";
+    let mut cfg = config("cwd-inherit", &format!("echo CHAR_OK > {probe}"));
+    cfg["filesystem"] = json!({
+        "readwritePaths": [write_dir.to_string_lossy(), launch_dir.to_string_lossy()]
+    });
+    let result = run_platform_config_value("seatbelt cwd inherit", &cfg, &[], Some(&launch_dir));
+    let in_launch = launch_dir.join(probe).exists();
+    let in_write = write_dir.join(probe).exists();
+    let _ = fs::remove_dir_all(&launch_dir);
+    let _ = fs::remove_dir_all(&write_dir);
+    assert_eq!(
+        result.code,
+        Some(0),
+        "run failed:\n{}",
+        result.combined_output()
+    );
+    assert!(
+        in_launch && !in_write,
+        "expected the probe in the launcher cwd {} (current behavior with empty \
+         process.cwd); in_launch={in_launch} in_write={in_write}\n{}",
+        launch_dir.display(),
+        result.combined_output()
+    );
+}
+
+/// Locks in that an explicit `process.cwd` is honored.
+#[test]
+fn seatbelt_honors_explicit_process_cwd() {
+    if !has_platform_exec() {
+        return;
+    }
+    let dir = fs::canonicalize(unique_tempdir("cwd-explicit")).expect("canonicalize");
+    let probe = "char_cwd_explicit_probe.txt";
+    let mut cfg = config("cwd-explicit", &format!("echo CHAR_OK > {probe}"));
+    cfg["process"]["cwd"] = json!(dir.to_string_lossy());
+    cfg["filesystem"] = json!({ "readwritePaths": [dir.to_string_lossy()] });
+    let result = run_platform_config_value("seatbelt cwd explicit", &cfg, &[], None);
+    let exists = dir.join(probe).exists();
+    let _ = fs::remove_dir_all(&dir);
+    assert_eq!(
+        result.code,
+        Some(0),
+        "run failed:\n{}",
+        result.combined_output()
+    );
+    assert!(
+        exists,
+        "expected the probe file in the explicit process.cwd {}\n{}",
+        dir.display(),
+        result.combined_output()
+    );
+}
+
+/// Characterizes that a `process.timeout` shorter than the workload kills the
+/// child mid-run: the pre-timeout marker is emitted, the post-timeout marker is
+/// not, and the process exits non-zero well before the workload would finish.
+#[test]
+fn seatbelt_timeout_kills_before_completion() {
+    if !has_platform_exec() {
+        return;
+    }
+    let mut cfg = config("timeout", "echo CHAR_BEFORE; /bin/sleep 5; echo CHAR_AFTER");
+    cfg["process"]["timeout"] = json!(1500);
+    let result = run_platform_config_value("seatbelt timeout", &cfg, &[], None);
+    let out = result.combined_output();
+    assert!(
+        out.contains("CHAR_BEFORE"),
+        "expected pre-timeout output. Output:\n{out}"
+    );
+    assert!(
+        !out.contains("CHAR_AFTER"),
+        "workload should have been killed before completing. Output:\n{out}"
+    );
+    assert_ne!(result.code, Some(0), "timed-out run should not exit 0");
+    assert!(
+        result.wall_time_ms < 4500,
+        "timeout should fire well before the 5s workload; took {}ms",
+        result.wall_time_ms
+    );
+}