diff --git a/.github/workflows/harness-ci.yml b/.github/workflows/harness-ci.yml
index 82606ac..5a07c1f 100644
--- a/.github/workflows/harness-ci.yml
+++ b/.github/workflows/harness-ci.yml
@@ -59,9 +59,33 @@ name: harness CI (no LLM)
 #   TEST_P256_VERIFIER_ADDRESS_HEIMA        per test-environment refresh.
 #   TEST_K11_VERIFIER_ADDRESS_HEIMA
 #
+# Additional secrets for the optional path-conditional auto-deploy of the
+# test broker EC2 (issue #101 — see docs/ci-setup.md §7):
+#
+#   OIDC_AWS_ROLE_ARN_DEPLOY  IAM role assumed by deploy-test-broker. Trust
+#                             policy: federated on GitHub Actions OIDC,
+#                             conditioned on repo:litentry/agentKeys:*.
+#                             Inline policy: ssm:SendCommand on
+#                             document/AWS-RunShellScript +
+#                             one EC2 instance ARN (= TEST_BROKER_INSTANCE_ID).
+#                             Provisioned by scripts/provision-ci-deploy-role.sh.
+#                             SEPARATE from TEST_OIDC_AWS_ROLE_ARN by design:
+#                             e2e role exercises the workload (sts:AssumeRole
+#                             on data roles, S3 verify), deploy role drives
+#                             the broker re-deploy on EC2. Separation of
+#                             duties — a compromise of one doesn't grant
+#                             the other's capability.
+#   TEST_BROKER_INSTANCE_ID   EC2 instance ID (i-xxxxxxxxxxxxxxxxx) hosting
+#                             test-broker.${ZONE}. Pinned in the deploy role's
+#                             inline SSM policy so a leaked session cred
+#                             cannot SendCommand on any other EC2.
+#
 # Gating: until TEST_OIDC_AWS_ROLE_ARN is set, the workflow's preflight
 # job surfaces a ::warning:: skip and exits clean — safe to merge before
-# the operator activates the test infra.
+# the operator activates the test infra. The auto-deploy gate is a
+# distinct check (OIDC_AWS_ROLE_ARN_DEPLOY + TEST_BROKER_INSTANCE_ID
+# both present) so harness validation can be activated without
+# auto-deploy, and vice versa.
 #
 # WebAuthn: never invoked. harness/v2-stage1-demo.sh defaults to
 # WEBAUTHN_MODE=0 (line 131), v2-stage2-demo.sh accepts --stub, neither
@@ -90,14 +114,27 @@ on:
         default: "all"
         type: choice
         options: ["1", "2", "3", "all"]
+      force_deploy_broker:
+        description: "Force deploy-test-broker even if no broker paths changed (dry-run validation)"
+        required: false
+        default: "false"
+        type: choice
+        options: ["false", "true"]
 
 concurrency:
   group: harness-ci-${{ github.ref }}
   cancel-in-progress: true
 
 permissions:
-  id-token: write   # GitHub Actions OIDC → assume TEST_OIDC_AWS_ROLE_ARN
+  id-token: write       # GitHub Actions OIDC → assume TEST_OIDC_AWS_ROLE_ARN
+                        # (and OIDC_AWS_ROLE_ARN_DEPLOY for deploy-test-broker)
   contents: read
+  pull-requests: read   # dorny/paths-filter@v3 on pull_request events queries
+                        # the GitHub REST API (/repos/.../pulls/N/files) to list
+                        # changed paths. Without this, the API returns
+                        # 'Bad credentials' and the detect-changes job fails.
+                        # Required only on PR triggers; workflow_dispatch +
+                        # push triggers don't need it (no PR to query).
 
 jobs:
   rust-checks:
@@ -126,6 +163,44 @@ jobs:
       # map — same convention as the existing @claude review workflow.
       - run: cargo test --workspace -- --test-threads=1
 
+  detect-changes:
+    # Issue #101: path-conditional triggers for auto-deploy of the test broker.
+    # Computes `broker_changed` so deploy-test-broker can skip when a PR only
+    # touches docs/harness/test infra — saves ~3 min cargo rebuild + ssm wait
+    # per CI run, and avoids touching the test EC2 from PRs that don't need to.
+    #
+    # Path-filter false-negative caveats (see issue #101 "Trade-offs"):
+    #   - workspace-shared crates (agentkeys-types, agentkeys-signer-protocol)
+    #     ripple into the broker → listed in the filter conservatively.
+    #   - Cargo.lock changes → also listed (a transitive dep bump can affect
+    #     broker behavior at runtime).
+    name: detect changed paths (broker / contracts)
+    runs-on: ubuntu-latest
+    outputs:
+      broker_changed: ${{ steps.f.outputs.broker }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # paths-filter needs the merge-base to diff against; default fetch
+          # is shallow. fetch-depth=0 ⇒ full history (cheap on a small repo).
+          fetch-depth: 0
+      - uses: dorny/paths-filter@v3
+        id: f
+        with:
+          filters: |
+            broker:
+              - 'crates/agentkeys-broker-server/**'
+              - 'crates/agentkeys-worker-*/**'
+              - 'crates/agentkeys-signer-protocol/**'
+              - 'crates/agentkeys-types/**'
+              - 'crates/agentkeys-core/**'
+              - 'scripts/setup-broker-host.sh'
+              - 'scripts/setup-broker-host.sh.d/**'
+              - 'scripts/broker.env'
+              - 'scripts/broker.test.env'
+              - 'Cargo.toml'
+              - 'Cargo.lock'
+
   preflight:
     # Gate the harness jobs on the test infra credentials being present.
     # Until the operator sets TEST_OIDC_AWS_ROLE_ARN, the harness jobs
@@ -135,6 +210,7 @@ jobs:
     needs: rust-checks
     outputs:
       should_run: ${{ steps.gate.outputs.should_run }}
+      deploy_ready: ${{ steps.gate.outputs.deploy_ready }}
     steps:
       - id: gate
         run: |
@@ -145,11 +221,281 @@ jobs:
             echo "should_run=false" >> "$GITHUB_OUTPUT"
             echo "::warning::TEST_OIDC_AWS_ROLE_ARN unset — harness E2E skipped. See workflow header for operator setup."
           fi
+          # deploy_ready: both deploy-side secrets must be present. Independent
+          # of should_run so an operator can opt INTO harness validation
+          # without enabling auto-deploy (e.g. while still vetting the deploy
+          # role's blast radius).
+          if [ -n "${{ secrets.OIDC_AWS_ROLE_ARN_DEPLOY }}" ] && [ -n "${{ secrets.TEST_BROKER_INSTANCE_ID }}" ]; then
+            echo "deploy_ready=true" >> "$GITHUB_OUTPUT"
+            echo "deploy secrets present; auto-deploy eligible"
+          else
+            echo "deploy_ready=false" >> "$GITHUB_OUTPUT"
+            echo "::notice::OIDC_AWS_ROLE_ARN_DEPLOY or TEST_BROKER_INSTANCE_ID unset — auto-deploy skipped. See docs/ci-setup.md §7."
+          fi
+
+  deploy-test-broker:
+    # Issue #101: drives `setup-broker-host.sh --test --yes` on the test broker
+    # EC2 via AWS SSM whenever a PR/push changes broker-affecting paths.
+    #
+    # Why deploy BEFORE harness-e2e (vs the issue's `needs: harness-e2e`):
+    # the failure mode this fixes is "harness scripts at version B vs broker
+    # binary at version A → spurious pass or confusing failure". Deploying
+    # first means harness-e2e validates the SAME revision the PR proposes —
+    # so a broker bug introduced by the PR is caught in the same PR, not
+    # leaked to whoever pushes next. Trade-off: a broker bug that crashes on
+    # startup will fail the deploy and skip harness-e2e (which is also the
+    # right signal — there's nothing to test).
+    #
+    # Concurrency: cross-PR races on the test EC2 are possible (PR-A deploys
+    # version A, PR-B deploys version B mid-flight, PR-A's harness sees B).
+    # Mitigation deferred to the followup PR — first cut accepts the race
+    # since concurrent broker-touching PRs are rare and the test EC2 is
+    # disposable. To add later: `concurrency: group: test-broker-deploy`
+    # with `cancel-in-progress: false` so deploys queue.
+    name: deploy broker to test EC2 (path-conditional)
+    needs: [preflight, detect-changes]
+    if: |
+      needs.preflight.outputs.should_run == 'true' &&
+      needs.preflight.outputs.deploy_ready == 'true' &&
+      (needs.detect-changes.outputs.broker_changed == 'true' ||
+       (github.event_name == 'workflow_dispatch' && inputs.force_deploy_broker == 'true'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Configure AWS credentials via OIDC (deploy role)
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.OIDC_AWS_ROLE_ARN_DEPLOY }}
+          aws-region: ${{ secrets.TEST_AWS_REGION || 'us-east-1' }}
+          # Session name shows up in CloudTrail — distinct from the e2e
+          # role's session-name pattern so the deploy invocations are
+          # filterable separately.
+          role-session-name: gh-deploy-${{ github.run_id }}
+
+      - name: Sanity-check the test broker EC2 is SSM-managed
+        # Fail fast with a clear remediation path. Three failure modes are
+        # distinguished:
+        #   - AccessDenied → deploy role lacks ssm:DescribeInstanceInformation.
+        #     Operator re-runs provision-ci-deploy-role.sh on their laptop;
+        #     the inline policy is idempotently refreshed to include it.
+        #   - Empty/None  → instance genuinely not registered (no agent, no
+        #     profile, wrong region). Operator SSH-debugs or re-runs
+        #     setup-broker-host.sh which auto-installs amazon-ssm-agent.
+        #   - Other state → unexpected; fail loud with the value for triage.
+        env:
+          REGION: ${{ secrets.TEST_AWS_REGION || 'us-east-1' }}
+          INSTANCE_ID: ${{ secrets.TEST_BROKER_INSTANCE_ID }}
+        run: |
+          set -euo pipefail
+          stderr_file=$(mktemp)
+          state=$(aws ssm describe-instance-information \
+            --region "$REGION" \
+            --filters "Key=InstanceIds,Values=$INSTANCE_ID" \
+            --query 'InstanceInformationList[0].PingStatus' \
+            --output text 2>"$stderr_file" || echo "")
+          if grep -q "AccessDenied" "$stderr_file"; then
+            echo "::error::Deploy role lacks ssm:DescribeInstanceInformation."
+            echo "::error::Fix: re-run scripts/provision-ci-deploy-role.sh on the operator laptop —"
+            echo "::error::the inline policy is now refreshed with the missing perm (idempotent)."
+            rm -f "$stderr_file"
+            exit 1
+          fi
+          rm -f "$stderr_file"
+          [ -z "$state" ] && state="None"
+          case "$state" in
+            Online)
+              echo "::notice::SSM agent online on $INSTANCE_ID"
+              ;;
+            None)
+              echo "::error::$INSTANCE_ID is not SSM-managed (state=$state)."
+              echo "::error::SSH into the broker EC2 and run scripts/setup-broker-host.sh --test --yes —"
+              echo "::error::it auto-installs amazon-ssm-agent. See docs/ci-setup.md §7.1."
+              exit 1
+              ;;
+            *)
+              echo "::error::SSM agent state = $state on $INSTANCE_ID (expected Online)"
+              exit 1
+              ;;
+          esac
+
+      - name: Compute deploy ref (PR head or push branch)
+        # GitHub provides GITHUB_HEAD_REF for PRs (source branch) and
+        # GITHUB_REF_NAME for push events. Falling through to "evm" as a
+        # safety net for manual workflow_dispatch on the default branch.
+        # The test EC2 fetches + checks out this ref before re-running
+        # setup-broker-host.sh, so the deployed binary matches the PR.
+        run: |
+          set -euo pipefail
+          ref="${GITHUB_HEAD_REF:-${GITHUB_REF_NAME:-evm}}"
+          if [ -z "$ref" ]; then
+            echo "::error::could not derive a ref to deploy"
+            exit 1
+          fi
+          # Refuse refs that contain shell metacharacters (defense-in-depth
+          # — GitHub already validates branch names, but the value is
+          # interpolated into a remote shell snippet below).
+          if printf '%s' "$ref" | grep -qE '[^A-Za-z0-9._/-]'; then
+            echo "::error::ref '$ref' contains unsupported characters"
+            exit 1
+          fi
+          echo "DEPLOY_REF=$ref" >> "$GITHUB_ENV"
+          echo "::notice::will deploy ref: $ref"
+
+      - name: SendCommand — fetch + checkout + setup-broker-host.sh --test --yes
+        env:
+          REGION: ${{ secrets.TEST_AWS_REGION || 'us-east-1' }}
+          INSTANCE_ID: ${{ secrets.TEST_BROKER_INSTANCE_ID }}
+          # Operator-pinnable override; the auto-discover loop below covers the
+          # common candidates when this isn't set.
+          REPO_DIR_OVERRIDE: ${{ secrets.TEST_BROKER_REPO_DIR }}
+        run: |
+          set -euo pipefail
+          # Compose the remote shell script. `$DEPLOY_REF` is interpolated by
+          # the runner's shell (GHA env block makes it visible here); the
+          # remote SSM-driven shell sees the literal branch name. The remote
+          # shell runs as root (SSM-default on Ubuntu AMIs); git ops use
+          # `sudo -u <owner>` so the working tree stays owned by whoever
+          # originally cloned it (typically ubuntu, sometimes agentkeys / root).
+          #
+          # Repo location auto-discovery: try TEST_BROKER_REPO_DIR override
+          # first, then common candidates. Fail fast with a clear remediation
+          # path if no candidate has the repo. Avoids the 'cd: can\'t cd to
+          # /home/ubuntu/agentKeys' failure mode when the operator cloned to
+          # a non-default path.
+          read -r -d '' deploy_script <<EOF || true
+          set -euo pipefail
+          REPO_DIR_OVERRIDE="\${REPO_DIR_OVERRIDE:-$REPO_DIR_OVERRIDE}"
+          REPO_DIR=""
+          for candidate in "\$REPO_DIR_OVERRIDE" /home/ubuntu/agentKeys /home/ubuntu/agentkeys /home/ubuntu/agentkey /home/agentkey/agentKeys /home/agentkey/agentkeys /home/agentkeys/agentKeys /opt/agentkeys /srv/agentkeys /root/agentKeys /root/agentkeys; do
+            [ -n "\$candidate" ] || continue
+            if [ -f "\$candidate/scripts/setup-broker-host.sh" ]; then
+              REPO_DIR=\$candidate
+              break
+            fi
+          done
+          if [ -z "\$REPO_DIR" ]; then
+            echo "could not locate the agentKeys checkout on this EC2" >&2
+            echo "candidates tried: \$REPO_DIR_OVERRIDE /home/ubuntu/agentKeys /home/agentkey/agentKeys /opt/agentkeys /srv/agentkeys /root/agentKeys etc." >&2
+            echo "Fix: pin the path via the TEST_BROKER_REPO_DIR repo secret." >&2
+            exit 2
+          fi
+          echo "using repo at \$REPO_DIR"
+          REPO_OWNER=\$(stat -c '%U' "\$REPO_DIR")
+          echo "tree is owned by \$REPO_OWNER"
+          cd "\$REPO_DIR"
+          sudo -u "\$REPO_OWNER" git fetch --prune origin
+          sudo -u "\$REPO_OWNER" git checkout "$DEPLOY_REF" || sudo -u "\$REPO_OWNER" git checkout "origin/$DEPLOY_REF"
+          sudo -u "\$REPO_OWNER" git pull --ff-only origin "$DEPLOY_REF" 2>/dev/null || true
+          bash scripts/setup-broker-host.sh --test --yes --non-interactive
+          EOF
+
+          # jq --arg passes the multi-line script outside of shell parameter
+          # expansion (no modifier bugs per CLAUDE.md heredoc-trap rule).
+          params=$(jq -n --arg script "$deploy_script" '{
+            commands: [$script],
+            executionTimeout: ["900"]
+          }')
+
+          cmd_id=$(aws ssm send-command \
+            --region "$REGION" \
+            --instance-ids "$INSTANCE_ID" \
+            --document-name "AWS-RunShellScript" \
+            --comment "gh-ci deploy ${GITHUB_RUN_ID} ref=${DEPLOY_REF}" \
+            --parameters "$params" \
+            --query 'Command.CommandId' \
+            --output text)
+          echo "SSM_COMMAND_ID=$cmd_id" >> "$GITHUB_ENV"
+          echo "::notice::SSM SendCommand queued: $cmd_id"
+
+      - name: Poll SSM command until completion
+        env:
+          REGION: ${{ secrets.TEST_AWS_REGION || 'us-east-1' }}
+          INSTANCE_ID: ${{ secrets.TEST_BROKER_INSTANCE_ID }}
+        run: |
+          set -euo pipefail
+          # Poll every 10s for up to 15 min. The command runs setup-broker-host.sh
+          # which rebuilds + restarts broker/signer/4 workers; cold cargo cache
+          # can be ~10min, warm ~3min.
+          for i in $(seq 1 90); do
+            sleep 10
+            status=$(aws ssm get-command-invocation \
+              --region "$REGION" \
+              --command-id "$SSM_COMMAND_ID" \
+              --instance-id "$INSTANCE_ID" \
+              --query 'Status' \
+              --output text 2>/dev/null || echo "Pending")
+            echo "iter=$i status=$status"
+            case "$status" in
+              Success)
+                aws ssm get-command-invocation \
+                  --region "$REGION" \
+                  --command-id "$SSM_COMMAND_ID" \
+                  --instance-id "$INSTANCE_ID" \
+                  --query 'StandardOutputContent' \
+                  --output text | tail -200
+                echo "::notice::deploy ok (ssm command $SSM_COMMAND_ID)"
+                exit 0
+                ;;
+              Failed|Cancelled|TimedOut)
+                echo "::error::SSM command terminal status: $status"
+                aws ssm get-command-invocation \
+                  --region "$REGION" \
+                  --command-id "$SSM_COMMAND_ID" \
+                  --instance-id "$INSTANCE_ID" \
+                  --query '{stdout:StandardOutputContent,stderr:StandardErrorContent}' \
+                  --output json
+                exit 1
+                ;;
+              Pending|InProgress|Delayed)
+                continue
+                ;;
+              *)
+                echo "::warning::unexpected status: $status"
+                ;;
+            esac
+          done
+          echo "::error::SSM command $SSM_COMMAND_ID did not complete within 15min"
+          exit 1
 
   harness-e2e:
     name: harness/v2-stage*-demo.sh on Heima mainnet (test deployer)
-    needs: preflight
-    if: needs.preflight.outputs.should_run == 'true'
+    needs: [preflight, deploy-test-broker]
+    # Codex adversarial review (PR #102) confirmed: the harness's chain-mutating
+    # scripts (heima-fund-account.sh + heima-agent-create.sh) share ONE Heima
+    # test deployer wallet. The outer `concurrency: harness-ci-${{ github.ref }}`
+    # only cancels in-flight runs on the SAME ref — concurrent runs on DIFFERENT
+    # refs (PR branch + manual dispatch, two PRs, etc.) share the deployer and
+    # collide on nonce in the Heima mempool, surfacing as
+    # `replacement transaction underpriced`.
+    #
+    # This second concurrency group, scoped to the deployer (not the ref),
+    # serializes harness-e2e runs globally. `cancel-in-progress: false` queues
+    # subsequent runs instead of cancelling them — so a long-running harness
+    # doesn't lose work to a newer push.
+    concurrency:
+      group: heima-test-deployer-nonce
+      cancel-in-progress: false
+    # Run when:
+    #   - preflight gates green (test infra is set up)
+    #   - AND either:
+    #       (a) deploy-test-broker succeeded (PR re-deployed the broker
+    #           to test EC2, validating fresh broker code), OR
+    #       (b) deploy-test-broker was skipped (no broker paths changed
+    #           OR deploy_ready=false — the EC2's existing binary still
+    #           covers the harness contract).
+    # always() forces evaluation even when the upstream `if:` skips
+    # deploy-test-broker (GHA treats `needs:` deps with skipped jobs as
+    # failing the implicit `success()` filter without always()).
+    if: |
+      always() &&
+      needs.preflight.outputs.should_run == 'true' &&
+      (needs.deploy-test-broker.result == 'success' ||
+       needs.deploy-test-broker.result == 'skipped')
     runs-on: ubuntu-latest
     timeout-minutes: 60
 
diff --git a/README.md b/README.md
index 8807068..75d499e 100644
--- a/README.md
+++ b/README.md
@@ -4,16 +4,20 @@ Credential broker for AI agents. A master (human) delegates scoped, revocable ac
 
 Status: pre-v0. Stage 5 in progress (see `harness/progress.json`).
 
-## What it does
+Architecture, language choices, trust boundaries: [`docs/arch.md`](docs/arch.md).
+
+---
+
+## 👤 For humans
+
+### What it does
 
 - **Master CLI** (`agentkeys`) — runs on your laptop; owns a session key in the OS keychain; approves pair/recover/scope-change requests.
 - **Sandbox daemon** (`agentkeys-daemon`) — runs inside the agent sandbox; brokers credential reads over MCP + a Unix socket; never exposes raw keys to the agent.
 - **Provisioner** (`agentkeys-provisioner` + `provisioner-scripts`) — Rust orchestrator drives TypeScript/Playwright scrapers to sign up for services and hand the resulting API key back through the trust boundary.
 - **Mock backend** (`agentkeys-mock-server`) — v0-only; mirrors the Heima parachain API so we can build end-to-end before the chain integration lands.
 
-Architecture, language choices, trust boundaries: [`docs/arch.md`](docs/arch.md).
-
-## Workspace layout
+### Workspace layout
 
 ```
 crates/
@@ -31,7 +35,7 @@ harness/                     stage-gated build harness + progress
 
 ~80% Rust, 100% of the security-critical path in Rust. TypeScript is confined to browser automation and (post-MVP) the Web GUI frontend.
 
-## Build & test
+### Build & test
 
 ```
 cargo build
@@ -50,12 +54,56 @@ cargo test -p agentkeys-daemon -p agentkeys-mcp
 cargo test -p agentkeys-provisioner
 ```
 
-## Development
+### First-machine setup
+
+Fresh laptop? Start with [`docs/dev-setup.md`](docs/dev-setup.md) — it walks you through rustup, jj, Node, AWS CLI, browser, and runs the workspace smoke tests.
 
-Staged build plan in [`docs/spec/plans/development-stages.md`](docs/spec/plans/development-stages.md). Each stage has a `harness/stage-N-done.sh` gate that must exit 0 before the stage is marked complete. Contributor workflow: [`CLAUDE.md`](CLAUDE.md).
+### Inner-loop dev
 
-Version control uses [jj (Jujutsu)](https://github.com/jj-vcs/jj), not raw git.
+Iterating on the broker, signer, mock-server, or operator-side scripts? [`docs/spec/broker-and-operator-dev-guide.md`](docs/spec/broker-and-operator-dev-guide.md) covers the local edit-build-test loop: which process to run on which port, how to point harness scripts at `localhost`, how to use `harness/v2-stage*-demo.sh` for resumable step-by-step testing.
 
-## License
+### License
 
 Dual-licensed under **MIT OR Apache-2.0**, at your choice.
+
+---
+
+## 🤖 For AI coding agents
+
+**You must read these before making any change.** They override defaults from your training data and cover the project-specific guardrails.
+
+| Read | Why |
+|---|---|
+| [`CLAUDE.md`](CLAUDE.md) | Project-specific rules: docs layout, /create-pr workflow in worktrees, terminology-source-of-truth, branch push policy, idempotent-remote-setup invariants, runbook-fix-fold-back policy. **Read first, every session.** |
+| [`docs/arch.md`](docs/arch.md) | Single source of truth for component inventory (K1–K11), trust boundaries, HDKD actor tree, per-actor binding ceremonies. When the per-doc detail outgrows arch.md, link outward — never duplicate. |
+| [`docs/spec/plans/development-stages.md`](docs/spec/plans/development-stages.md) | The 8-stage build plan. Each stage has a `harness/stage-N-done.sh` gate; never self-grade — run the gate. |
+| [`docs/spec/plans/execution-plan.md`](docs/spec/plans/execution-plan.md) | Orchestration runbook (ralph, team, ultraqa workflows). |
+| [`docs/spec/broker-and-operator-dev-guide.md`](docs/spec/broker-and-operator-dev-guide.md) | Inner edit-build-test loop for broker + operator-side code. Use this before suggesting changes to the broker's run-time behavior. |
+
+### Hard rules (from CLAUDE.md)
+
+These are non-negotiable. Violating them produces broken PRs / corrupted state.
+
+- **Use `jj` (Jujutsu), never raw `git`.** Common mappings in CLAUDE.md. The one exception: inside a Claude Code `.claude/worktrees/<name>/` worktree, the initial commit must use `git` (jj can't colocate in a git-worktree); then `cd` to the main repo and push via `jj git push`. Never include `Co-Authored-By:` lines in those commits.
+- **Branch `evm` pushes immediately.** On `evm`, push after every `jj describe` — the remote broker host pulls from `origin/evm` to redeploy. "I'll push at the end" silently breaks deploys.
+- **Diagnose before edit.** Reproduce the failure locally first; isolate the layer (shell / client / doc / broker code / network). If the cause is local to the operator's shell, respond with the one-line fix — don't edit the repo.
+- **Land the fix everywhere.** Once a local repro proves a fix is correct, land it the same turn — search the repo for every affected file, commit, push to `origin/evm`. Don't stop at "verified locally" or "fixed one file."
+- **Runbook fix fold-back.** When an operator hits a runbook failure, two things land in the same turn: (1) the targeted fix, (2) a revision to the runbook so the next operator doesn't hit the same trap.
+- **No hardcoded values.** Use env var + default, CLI flag + default, or a config file. If you must hardcode temporarily, log it in [`hardcoded.md`](hardcoded.md) with file:line + reason + what would unblock dynamic.
+- **Idempotent remote setup.** Every script that mutates remote state (AWS / Heima / CI / VM / DNS) must exit 0 on re-run without re-applying. Pre-check with `get-*` before mutating; log `ok | skip <reason> | fail <reason>`.
+- **Plan completion is all-or-nothing.** When implementing a plan, every numbered step must be done — or the PR summary's "What did NOT land" section must explicitly list what was skipped and why.
+- **Terminology source of truth.** Never invent a new name for a concept arch.md already names. If you find divergence, fix it in the same commit or document the alias in arch.md's "Canonical names" section.
+
+### Per-session protocol
+
+1. `jj log --limit 10 && cat harness/progress.json && bash harness/init.sh $(jq -r .current_stage harness/progress.json)`
+2. Read the stage contract for the current stage in `docs/spec/plans/development-stages.md`.
+3. Pick the HIGHEST-PRIORITY incomplete deliverable from `harness/features.json`.
+4. Implement ONE deliverable, run `cargo test -p <crate>`, `jj describe`, update `harness/features.json`, `jj new`.
+
+### Single entry points
+
+Don't reach for ad-hoc `systemctl`, `scp`, or `forge script` — these are wrapped:
+
+- **Remote broker host** (binary upgrades, systemd, nginx, env tweaks): `bash scripts/setup-broker-host.sh`
+- **Heima chain bring-up** (deploy, binding ceremonies, scope grants, K11 enroll, audit-row append, worker smoke): `bash scripts/setup-heima.sh`
diff --git a/docs/ci-setup.md b/docs/ci-setup.md
index 005d77b..0e270bc 100644
--- a/docs/ci-setup.md
+++ b/docs/ci-setup.md
@@ -365,6 +365,122 @@ gh workflow run harness-ci.yml --repo litentry/agentKeys --field stage=3
 
 When the workflow passes against the test stack, CI is live. Every subsequent push to a PR triggers it; you're done.
 
+### 7. (Optional) Wire auto-deploy of the test broker (issue [#101](https://github.com/litentry/agentKeys/issues/101))
+
+Without this step, the workflow validates against the **already-deployed** test broker. If a PR changes broker code (`crates/agentkeys-broker-server/**`, `crates/agentkeys-worker-*/**`, `crates/agentkeys-signer-protocol/**`, `scripts/setup-broker-host.sh*`, or any workspace-shared crate the broker links against), the test broker binary silently drifts from the PR's source tree — the harness then exercises *old* broker code against *new* harness scripts, producing either spurious passes or confusing failures.
+
+Step 7 wires a second OIDC role (`github-actions-agentkeys-deploy`) plus two new GitHub secrets. When activated, the workflow's `detect-changes` job sees broker-affecting paths in the diff, the `deploy-test-broker` job assumes that role, and `aws ssm send-command` drives `setup-broker-host.sh --test --yes` on the test EC2 — re-deploying the broker so `harness-e2e` validates the PR's actual code. The deploy job is **gated three ways**:
+
+1. `paths-filter` boolean (no broker code changed → skip).
+2. Both deploy secrets present (`OIDC_AWS_ROLE_ARN_DEPLOY` + `TEST_BROKER_INSTANCE_ID`).
+3. `preflight.outputs.should_run == 'true'` (test infra fully wired).
+
+If any gate fails, the deploy job is **skipped, not failed** — `harness-e2e` still runs against the existing broker binary. So this step is fully opt-in; partial activation is safe.
+
+#### 7.1 Run the provisioning script
+
+```bash
+awsp agentkeys-admin
+# Look up the test broker EC2 instance ID (one-shot — pin it once):
+TEST_BROKER_INSTANCE_ID=$(aws ec2 describe-instances \
+  --region "$REGION" \
+  --filters "Name=ip-address,Values=$(curl -sS "https://dns.google/resolve?name=$BROKER_HOST&type=A" | jq -r '.Answer[0].data')" \
+  --query 'Reservations[0].Instances[0].InstanceId' --output text)
+echo "$TEST_BROKER_INSTANCE_ID"   # → i-xxxxxxxxxxxxxxxxx
+
+# Idempotent provisioning — safe to re-run. Use --fix-ssm on the FIRST run
+# so the script auto-attaches AmazonSSMManagedInstanceCore to the broker EC2's
+# instance profile if it's missing (a fresh EC2 commonly lacks this policy).
+bash scripts/provision-ci-deploy-role.sh \
+  --test-broker-instance-id "$TEST_BROKER_INSTANCE_ID" \
+  --env-file scripts/operator-workstation.test.env \
+  --fix-ssm
+```
+
+The script:
+
+- Creates / refreshes the `github-actions-agentkeys-deploy` IAM role with a federated trust policy on the GitHub Actions OIDC provider, scoped to `repo:litentry/agentKeys:*` (any branch in this repo can trigger; the workflow's path filter + preflight gate further restrict when the role is actually used).
+- Attaches an inline policy `agentkeys-ci-deploy-ssm` with:
+  - `ssm:SendCommand` on `document/AWS-RunShellScript` + the one instance ARN (so even if the role's session creds leaked, the worst a third party can do is re-run setup-broker-host.sh on the test EC2 — a destructive op there is `terraform apply`-style: idempotent, recoverable, and contained to the test environment).
+  - `ssm:GetCommandInvocation` / `ssm:ListCommandInvocations` / `ssm:DescribeInstanceInformation` for status polling + the workflow's pre-deploy sanity check.
+  - `ec2:DescribeInstances` scoped to the one instance ID, for the workflow's pre-deploy sanity check.
+
+> Already provisioned the role before `ssm:DescribeInstanceInformation` was added to the policy template? Re-run the provisioning script. `put-role-policy` is idempotent — it overwrites the inline policy with the current source-of-truth shape, picking up any added permissions.
+- Verifies the test EC2 is registered with SSM (`PingStatus = Online`). With `--fix-ssm`, auto-remediates the common "instance profile is missing AmazonSSMManagedInstanceCore" case by attaching the policy and polling for up to 3 min for the SSM agent to refresh its creds. Without `--fix-ssm`, just reports the failure with manual fix instructions.
+
+**SSM remediation modes (what `--fix-ssm` covers, what it doesn't):**
+
+| Failure | What `--fix-ssm` does | What it CAN'T fix automatically |
+|---|---|---|
+| Instance profile missing `AmazonSSMManagedInstanceCore` | Attaches the policy, polls for Online | (handled) |
+| Policy already attached, agent process running with stale creds | Polls until agent refreshes (~1-3 min typical) | If poll times out: SSH + `sudo systemctl restart amazon-ssm-agent`, OR `aws ec2 reboot-instances …` |
+| Instance has NO instance profile at all | Creates a dedicated `agentkeys-test-broker-ssm` role + instance profile (EC2 trust + `AmazonSSMManagedInstanceCore`) and associates it with the EC2. IMDS surfaces the new creds within ~30s. Safe because the broker's app-layer AWS access uses static creds from `broker.env`, not IMDS — adding IMDS-served creds can only ADD capability for the SSM agent, not displace anything. | (handled) |
+| SSM Agent not installed (no `amazon-ssm-agent` unit) | Reports state; can't reach the box to install (operator's laptop has no SSH-into-EC2 capability from the provision script) | Re-run `bash scripts/setup-broker-host.sh --test --yes` on the EC2 — it now installs `amazon-ssm-agent` (snap preferred, .deb fallback) as part of broker bootstrap. One-shot manual recovery if you don't want to re-run the full setup: `ssh test-broker 'sudo snap install amazon-ssm-agent --classic && sudo systemctl enable --now snap.amazon-ssm-agent.amazon-ssm-agent.service'` |
+| Private VPC subnet without an SSM VPC endpoint | Reports state | Operator wires the VPC endpoint (unlikely for a public-IP broker, but possible) |
+
+Re-running the script after any of the operator-side fixes is safe (idempotent — every step is `get-*` pre-checked before any mutation).
+
+#### 7.2 Set the two new repo secrets
+
+```bash
+# Print the deploy role ARN you just provisioned (script also prints this):
+role_arn=$(aws iam get-role --role-name github-actions-agentkeys-deploy \
+  --query 'Role.Arn' --output text)
+
+gh secret set OIDC_AWS_ROLE_ARN_DEPLOY --repo litentry/agentKeys --body "$role_arn"
+gh secret set TEST_BROKER_INSTANCE_ID  --repo litentry/agentKeys --body "$TEST_BROKER_INSTANCE_ID"
+```
+
+| Secret | Purpose |
+|---|---|
+| `OIDC_AWS_ROLE_ARN_DEPLOY` | ARN of `github-actions-agentkeys-deploy` — assumed by the `deploy-test-broker` job via GitHub Actions OIDC. |
+| `TEST_BROKER_INSTANCE_ID` | EC2 instance ID (`i-…`) hosting `test-broker.${ZONE}`. The deploy role's inline policy is scoped to *this single instance*. |
+| `TEST_BROKER_REPO_DIR` | **Optional.** Absolute path of the agentKeys git checkout on the EC2 (e.g. `/home/ubuntu/agentKeys`). The deploy workflow auto-discovers across common candidates (`/home/ubuntu/agentKeys`, `/home/ubuntu/agentkeys`, `/opt/agentkeys`, `/srv/agentkeys`, `/root/agentKeys`), so this only needs to be set when the operator cloned to a non-standard path and the workflow's auto-discover step prints `could not locate the agentKeys checkout`. |
+
+#### 7.3 Dry-run validate
+
+Trigger the workflow manually with `force_deploy_broker=true` so the deploy fires regardless of whether the latest commit touched broker paths.
+
+**Pre-merge — `--ref` is required.** `gh workflow run` reads the workflow definition from the *default branch* (`main`) unless you tell it otherwise. Since the `force_deploy_broker` input lives on the PR branch, dispatching without `--ref` fails with `HTTP 422: Unexpected inputs provided: ["force_deploy_broker"]`. Pass `--ref` so GHA reads the workflow YAML (and its inputs) from the PR branch instead:
+
+```bash
+gh workflow run harness-ci.yml --repo litentry/agentKeys \
+  --ref claude/adoring-bell-1b9ca8 \
+  --field stage=1 \
+  --field force_deploy_broker=true
+```
+
+Replace `claude/adoring-bell-1b9ca8` with your actual PR branch name (`git rev-parse --abbrev-ref HEAD` if you're on it locally).
+
+**Post-merge — `--ref` is optional.** Once this PR is on `main`, dispatching without `--ref` will work because the input is part of the default-branch workflow definition. (The `--ref` form still works and lets you target any branch.)
+
+Then in the run logs:
+
+- `deploy-test-broker` should show `SSM agent online on i-…` (sanity check passed).
+- The `SendCommand` step prints the command ID; the next step polls until `Success`.
+- On success: the tail of `StandardOutputContent` shows `setup-broker-host.sh` finishing cleanly (`ok systemd unit … active`, `ok nginx running`, etc.).
+- On failure: stdout + stderr are dumped to the GHA log. The most common cause is `git checkout` failing on the EC2 because the source tree doesn't have the PR branch fetched — fix by ssh-ing into the box and running `sudo -u ubuntu git fetch --prune origin` once.
+
+#### 7.4 Disable / disarm
+
+Remove either secret to disarm — the workflow's `preflight.outputs.deploy_ready` will flip to `false` and the deploy job silently skips:
+
+```bash
+gh secret delete OIDC_AWS_ROLE_ARN_DEPLOY --repo litentry/agentKeys
+# or
+gh secret delete TEST_BROKER_INSTANCE_ID --repo litentry/agentKeys
+```
+
+The IAM role can stay provisioned indefinitely — without the secret it can't be assumed by GHA, and the inline SSM perms are scoped to one instance.
+
+#### Out of scope for issue #101
+
+Per [issue #101](https://github.com/litentry/agentKeys/issues/101) "Out of scope":
+
+- **Prod broker auto-deploy** — never. The prod broker EC2 stays manual via `bash scripts/setup-broker-host.sh --upgrade` from the operator laptop, per CLAUDE.md "Remote broker host (single entry point)".
+- **Auto-deploy of test Heima EVM contracts** — deferred to a follow-up PR (issue #101 rollout plan step 7). Contract redeploys mint new addresses and require the `SECRETS_REWRITE_PAT` token to update six `TEST_*_ADDRESS_HEIMA` secrets — more risk than the broker deploy, so it ships separately.
+- **Mainnet prod contract redeploy** — never automatic. Manual via `bash scripts/setup-heima.sh` only.
+
 ## What the workflow does on every run
 
 1. Restores submodules + Rust toolchain + Foundry + cargo cache.
diff --git a/docs/spec/broker-and-operator-dev-guide.md b/docs/spec/broker-and-operator-dev-guide.md
new file mode 100644
index 0000000..88dcae8
--- /dev/null
+++ b/docs/spec/broker-and-operator-dev-guide.md
@@ -0,0 +1,336 @@
+# Broker + Local Operator Dev Guide
+
+**Audience:** developers iterating on the broker, the workers, or the operator-side scripts (`harness/`, `scripts/heima-*.sh`).
+**Scope:** the inner edit-build-test loop — running the broker stack on your laptop, exercising it with operator scripts, and knowing which knob to turn when something breaks.
+
+This guide is **not** the environment bootstrap doc (see [`docs/dev-setup.md`](../dev-setup.md)) or the deploy-to-real-host runbook (see [`docs/operator-runbook-stage7.md`](../operator-runbook-stage7.md)). Read those first if you have a fresh machine or you're standing up a new broker EC2.
+
+---
+
+## 1. The local stack at a glance
+
+The deployed broker runs five processes on one EC2. For local dev you run the same five processes on `localhost`, on the same ports, with the same env contract. Same code path — only the env values change.
+
+| Process | Default port | Crate | Purpose | Local-dev role |
+|---|---|---|---|---|
+| `agentkeys-mock-server` | `:8090` | `agentkeys-mock-server` | v0 backend; mirrors the Heima parachain extrinsic surface | Stand-in for the chain RPC + the legacy session-validation backend |
+| `agentkeys-broker-server` | `:8091` | `agentkeys-broker-server` | The credential broker — auth, cap-mint, OIDC issuer | The component you're most often editing |
+| `agentkeys-signer` (dev_key_service) | `:8092` | `agentkeys-broker-server` (same binary, different listener) | EVM keypair derivation from `omni_account` via HKDF | Stub for the future TEE signer (see [`signer-protocol.md`](./signer-protocol.md)) |
+| `agentkeys-worker-audit` | `:9092` | `agentkeys-worker-audit` | Merkle-root batching for credential audit | Only matters if you're touching audit code |
+| `agentkeys-worker-email` | `:9093` | `agentkeys-worker-email` | Inbound email handler (SES → cap-mint trigger) | Only matters for email-link auth |
+| `agentkeys-worker-creds` | `:9094` | `agentkeys-worker-creds` | Credential store — STS + S3 PrincipalTag-scoped | The data plane the cap-mint flow leads to |
+| `agentkeys-worker-memory` | `:9095` | `agentkeys-worker-memory` | Memory store — STS + S3 (per-actor isolation) | Symmetric with creds |
+
+In the deployed stack `nginx` fronts the broker + signer + 4 workers on `:443` with public hostnames. Locally you talk to the ports directly — no nginx, no TLS.
+
+---
+
+## 2. First-time local-stack bring-up
+
+After [`docs/dev-setup.md`](../dev-setup.md) §1–§2 (rust, jj, node, `cargo build --workspace --release`), generate the broker's two ES256 keypairs once:
+
+```bash
+mkdir -p ~/.agentkeys/broker
+cargo run -q --release -p agentkeys-broker-server -- keygen --purpose oidc    --out ~/.agentkeys/broker/oidc-keypair.json
+cargo run -q --release -p agentkeys-broker-server -- keygen --purpose session --out ~/.agentkeys/broker/session-keypair.json
+chmod 600 ~/.agentkeys/broker/{oidc,session}-keypair.json
+```
+
+These are the only persistent local state the broker needs. Treat them like any other dev secret — kept under `~/.agentkeys/`, gitignored at the home-directory level, never copied off your laptop. Regenerating them invalidates every previously-derived wallet that depended on the matching session pubkey, so don't `rm` them mid-session.
+
+---
+
+## 3. Inner loop A — edit broker code
+
+The broker reads its config from env vars and the two keypair files. Source a dev env file once per shell, then iterate with `cargo run`.
+
+### 3.1 The dev env
+
+Create `scripts/broker.dev.env` (gitignored — copy + edit from `scripts/broker.env`):
+
+```bash
+# Local-dev broker env — everything points at localhost.
+ACCOUNT_ID=000000000000                                    # placeholder; AWS calls go to mock backend
+BROKER_DATA_ROLE_ARN=arn:aws:iam::000000000000:role/dev    # never assumed in local dev
+BROKER_AWS_REGION=us-east-1                                # any region; not actually hit
+BROKER_OIDC_ISSUER=http://127.0.0.1:8091                   # matches --bind/--port below
+BROKER_OIDC_KEYPAIR_PATH=$HOME/.agentkeys/broker/oidc-keypair.json
+BROKER_SESSION_KEYPAIR_PATH=$HOME/.agentkeys/broker/session-keypair.json
+BROKER_AUTH_METHODS=wallet_sig,email_link
+BROKER_AUDIT_ANCHORS=sqlite                                # sqlite store; never writes to chain
+BROKER_EMAIL_SENDER=stub                                   # in-memory; no SES, no AWS creds needed
+BROKER_EMAIL_FROM_ADDRESS=dev@localhost
+BROKER_BACKEND_URL=http://127.0.0.1:8090                   # points at the local mock-server below
+
+# dev_key_service signer (issue #74 step 1b)
+DEV_KEY_SERVICE_MASTER_SECRET=local-dev-secret-32-bytes-min-length-please
+```
+
+Three lines matter most for local dev:
+
+- `BROKER_EMAIL_SENDER=stub` — skips SES; magic-link tokens land in an in-process `Vec` that you read back via the test harness or a `curl`-driven `/v1/auth/email/list-pending` endpoint (broker test feature).
+- `BROKER_AUDIT_ANCHORS=sqlite` — every audit row lands in a local SQLite file; nothing hits the chain. Set to `evm_testnet` ONLY when you've built with `--features audit-evm` AND you actually want to test the on-chain anchor path (Phase C, not shipped as of PR #102).
+- `BROKER_BACKEND_URL` — the broker calls a "backend" for legacy session validation (the v0 mock-server, or a real chain backend in v0.2+). In local dev this points at `agentkeys-mock-server :8090` started in §3.3 below.
+
+### 3.2 Build the broker with the right features
+
+`cargo run` defaults to debug + workspace default features. The broker MUST be built with `--features auth-email-link` if `BROKER_AUTH_METHODS` includes `email_link` (which the dev env above does) — otherwise the broker boot-fails with `BROKER_AUTH_METHODS="email_link": unknown or feature-gated-out auth method`.
+
+```bash
+# Iteration build (~10s warm, ~3min cold):
+cargo build -p agentkeys-broker-server --features auth-email-link
+
+# Or release for cycle-accurate testing (~30s warm, ~5min cold):
+cargo build --release -p agentkeys-broker-server --features auth-email-link
+```
+
+Cargo footgun (per [`scripts/setup-broker-host.sh:547`](../../scripts/setup-broker-host.sh)): never combine `-p agentkeys-broker-server -p agentkeys-mock-server --features auth-email-link` — cargo silently drops the feature flag. Always build the two binaries in separate `cargo build` invocations.
+
+### 3.3 Run the three foreground processes
+
+Three terminals. Source the dev env in each; pass `--bind 127.0.0.1 --port <p>`:
+
+```bash
+# Terminal 1 — mock-server (v0 backend the broker talks to)
+set -a; source scripts/broker.dev.env; set +a
+cargo run --release -p agentkeys-mock-server -- --bind 127.0.0.1 --port 8090
+
+# Terminal 2 — broker (your usual edit target)
+set -a; source scripts/broker.dev.env; set +a
+RUST_LOG=info,agentkeys_broker_server=debug \
+  cargo run --release -p agentkeys-broker-server --features auth-email-link -- \
+    --bind 127.0.0.1 --port 8091
+
+# Terminal 3 — signer (dev_key_service; serves /dev/derive-address + /dev/sign-*)
+set -a; source scripts/broker.dev.env; set +a
+cargo run --release -p agentkeys-broker-server -- \
+  --bind 127.0.0.1 --port 8092 --signer-only
+```
+
+The signer is the SAME binary as the broker (`agentkeys-broker-server`) with `--signer-only` — it serves only `/dev/*` + `/healthz` and shares the keypair files with the broker process on `:8091`.
+
+Skip workers (`agentkeys-worker-{audit,email,creds,memory}` on `:9092-:9095`) until you're editing them — the broker's hot path doesn't require them for most flows.
+
+### 3.4 Sanity check
+
+```bash
+curl -s http://127.0.0.1:8091/healthz                                # → "ok"
+curl -s http://127.0.0.1:8091/.well-known/openid-configuration | jq . # OIDC discovery doc
+curl -s http://127.0.0.1:8091/.well-known/jwks.json | jq .            # broker's JWKS
+```
+
+If healthz returns `ok` but the JWKS is empty, the keypair files aren't being read — check the paths in your dev env. If the broker boot-fails with `BROKER_AUTH_METHODS=email_link: unknown`, you forgot `--features auth-email-link` on the cargo build.
+
+### 3.5 Hot-reload loop
+
+There's no `cargo watch` in the workspace, but the dev loop is fast enough without it:
+
+1. Edit Rust in `crates/agentkeys-broker-server/src/...`.
+2. `Ctrl-C` Terminal 2's broker.
+3. Re-run the `cargo run -p agentkeys-broker-server ...` command from §3.3 (shell history is your friend).
+4. The first re-run rebuilds the broker (~10s incremental); subsequent runs reuse the artifact.
+
+For a tighter loop while editing a single module, write a unit test next to the module and use `cargo test -p agentkeys-broker-server <test_name>` — typically <2s per iteration.
+
+---
+
+## 4. Inner loop B — edit operator scripts
+
+The operator-side scripts (`harness/v2-stage{1,2,3}-demo.sh`, `scripts/heima-*.sh`, `scripts/agentkeys-*-demo.sh`) are the dev loop for the *operator workflow*: cap-mint, identity bootstrap, scope grants, S3 isolation tests. They run on your laptop and call the broker (local or remote) via plain HTTP + `cast` + `aws`.
+
+### 4.1 Point the operator env at the local broker
+
+Create `scripts/operator-workstation.dev.env` (gitignored — copy + edit from `scripts/operator-workstation.env`):
+
+```bash
+# Local-dev operator env — points the harness scripts at localhost
+ACCOUNT_ID=000000000000
+REGION=us-east-1
+BROKER_HOST=127.0.0.1:8091
+OIDC_ISSUER=http://127.0.0.1:8091
+AGENTKEYS_SIGNER_URL=http://127.0.0.1:8092
+BACKEND_URL=http://127.0.0.1:8090
+
+# Local-stack workers (skip these until you wire them up — broker hot path doesn't need them)
+AGENTKEYS_WORKER_AUDIT_URL=http://127.0.0.1:9092
+AGENTKEYS_WORKER_EMAIL_URL=http://127.0.0.1:9093
+AGENTKEYS_WORKER_CRED_URL=http://127.0.0.1:9094
+AGENTKEYS_WORKER_MEMORY_URL=http://127.0.0.1:9095
+
+# Local chain backbone — pick ONE based on what you're testing:
+#   anvil          — fully local (forge anvil running on 127.0.0.1:8545); fastest
+#   heima-paseo    — Heima testnet; real chain, no real money
+#   heima          — Heima mainnet (production); use with care
+AGENTKEYS_CHAIN=anvil
+```
+
+### 4.2 Run the canonical inner-loop demo
+
+[`harness/v2-stage1-demo.sh`](../../harness/v2-stage1-demo.sh) is the end-to-end exerciser most operator edits land against. It's a 13-step script: install CLI → email-link init → identity bootstrap → S3 envelope smoke test → chain bring-up → device register → agent create → scope grant → K11 enroll → cap-mint roundtrip.
+
+```bash
+set -a; source scripts/operator-workstation.dev.env; set +a
+
+# Full demo against local stack:
+bash harness/v2-stage1-demo.sh --chain anvil
+
+# Re-run just one step you're iterating on:
+bash harness/v2-stage1-demo.sh --only-step 7
+
+# Skip the slow bits (CLI build, chain deploy, S3 provisioning):
+bash harness/v2-stage1-demo.sh --skip-build --skip-deploy --skip-provision
+
+# Stop after a specific step (useful when bisecting a regression):
+bash harness/v2-stage1-demo.sh --to-step 5
+```
+
+The `--from-step N` / `--to-step N` / `--only-step N` triad is the inner-loop primitive — every step prints `[step N/M]` to stderr, every step is idempotent. If step 7 fails after a script edit, fix the script, re-run with `--from-step 7`, you keep the work from steps 1–6.
+
+### 4.3 Anvil for fully-local chain dev
+
+When you don't want to talk to Heima at all, run [foundry](https://book.getfoundry.sh/anvil/) anvil locally:
+
+```bash
+# Terminal 4 — local EVM (anvil) on :8545
+anvil --chain-id 31337 --port 8545
+```
+
+Then `AGENTKEYS_CHAIN=anvil` in your operator env makes every `cast send` hit anvil instead of Heima. The deployer wallet is whichever anvil-prefunded key you point at via `HEIMA_DEPLOYER_KEY` / `HEIMA_DEPLOYER_KEY_FILE`. Anvil's mempool is single-tenant — none of the [PR #102 nonce-contention issues](./plans/issue-101-ci-auto-deploy.md) bite locally.
+
+### 4.4 Editing `setup-broker-host.sh`
+
+`scripts/setup-broker-host.sh` is the canonical "single entry point" for the broker EC2 (per CLAUDE.md "Remote broker host (single entry point)" policy). When you change it, the unit-test is to dry-run it on a throwaway VM, but the practical inner loop is:
+
+1. Edit the script.
+2. `bash -n scripts/setup-broker-host.sh` — syntax check.
+3. SSH into the test broker EC2 (`bash scripts/ssh-broker.sh`), `cd ~/agentKeys`, `git pull`, `bash scripts/setup-broker-host.sh --test --yes` — exercise the full path.
+4. **Or** push to your PR branch and let the [CI auto-deploy](#5-inner-loop-c--ci-auto-deploy-issue-101) (PR #102) drive it on the test EC2.
+
+Step 4 is usually faster — no SSH, you get fresh logs in the GHA run, and the harness validates the deploy end-to-end.
+
+---
+
+## 5. Inner loop C — CI auto-deploy (issue #101)
+
+Per [PR #102](https://github.com/litentry/agentKeys/pull/102), pushing broker-affecting changes to a PR branch auto-deploys to the test EC2 via SSM and runs the full harness against the freshly-deployed broker. You see broker bugs in your own PR, not the next operator's.
+
+What counts as "broker-affecting" — the path-filter list in [`.github/workflows/harness-ci.yml`](../../.github/workflows/harness-ci.yml):
+
+```
+crates/agentkeys-broker-server/**
+crates/agentkeys-worker-*/**
+crates/agentkeys-signer-protocol/**
+crates/agentkeys-types/**
+crates/agentkeys-core/**
+scripts/setup-broker-host.sh
+scripts/setup-broker-host.sh.d/**
+scripts/broker.env
+scripts/broker.test.env
+Cargo.toml
+Cargo.lock
+```
+
+Untouched + auto-deploy is opt-in (gated on `OIDC_AWS_ROLE_ARN_DEPLOY` + `TEST_BROKER_INSTANCE_ID` repo secrets — see [`docs/ci-setup.md`](../ci-setup.md) §7).
+
+To dry-run the deploy without a broker code change, dispatch manually with the override:
+
+```bash
+gh workflow run harness-ci.yml --repo litentry/agentKeys \
+  --ref <your-branch> \
+  --field stage=1 \
+  --field force_deploy_broker=true
+```
+
+---
+
+## 6. Config-file map — which file controls what
+
+Three files, three audiences. The "is the broker reading the right thing" debug usually comes down to which one you sourced.
+
+| File | Where it lives | Who reads it | Local-dev override |
+|---|---|---|---|
+| [`scripts/broker.env`](../../scripts/broker.env) | **Broker host** (EC2 or your laptop's broker process) | `agentkeys-broker-server` (every entry has a matching constant in `crates/agentkeys-broker-server/src/env.rs`) | `scripts/broker.dev.env` (gitignored, copied from `broker.env`, swap hosts to `127.0.0.1`) |
+| [`scripts/operator-workstation.env`](../../scripts/operator-workstation.env) | **Operator laptop** | Every `harness/` + `scripts/heima-*.sh` script | `scripts/operator-workstation.dev.env` (gitignored, swap hosts to `127.0.0.1:809x`) |
+| [`scripts/broker.test.env`](../../scripts/broker.test.env) | **Test broker host** (CI auto-deploy target) | `agentkeys-broker-server` running on the test EC2 | Same shape as `broker.env`; CI workflow materializes per-run values into this on the runner |
+
+Mixing them on the wrong host is the most common config bug. The broker host should NEVER source `operator-workstation.env` — that file has AWS admin tooling vars (BUCKET, OIDC_PROVIDER_ARN) that don't exist as broker-server env vars and would silently shadow what the broker actually reads.
+
+---
+
+## 7. Debugging cheatsheet
+
+### 7.1 Logs
+
+The broker uses `tracing_subscriber` with `EnvFilter` ([`crates/agentkeys-broker-server/src/main.rs:73`](../../crates/agentkeys-broker-server/src/main.rs)). Control via `RUST_LOG`:
+
+```bash
+# Default — only INFO and above
+cargo run -p agentkeys-broker-server -- ...
+
+# Verbose for the broker, quiet for everything else
+RUST_LOG=info,agentkeys_broker_server=debug cargo run -p agentkeys-broker-server -- ...
+
+# Trace-level for one specific module
+RUST_LOG=info,agentkeys_broker_server::handlers::cap=trace cargo run -p agentkeys-broker-server -- ...
+```
+
+On the deployed broker, logs go to systemd journal:
+
+```bash
+ssh broker journalctl -u agentkeys-broker --since '5 min ago' -f
+ssh broker journalctl -u agentkeys-signer --since '5 min ago' -f
+```
+
+### 7.2 Port collisions
+
+If `cargo run` errors with `Address already in use`, find the stuck process:
+
+```bash
+lsof -nP -iTCP:8091 -sTCP:LISTEN     # broker
+lsof -nP -iTCP:8090 -sTCP:LISTEN     # mock-server
+lsof -nP -iTCP:8092 -sTCP:LISTEN     # signer
+```
+
+Kill by PID (the only `kill -9` you should reach for during dev) or by name: `pkill -f agentkeys-broker-server`.
+
+### 7.3 The broker boots, then immediately exits
+
+Common shapes:
+
+| Symptom | Cause | Fix |
+|---|---|---|
+| `BROKER_AUTH_METHODS="email_link": unknown or feature-gated-out auth method` | Built without `--features auth-email-link` | Re-build with the feature; see §3.2 |
+| `failed to read OIDC keypair: No such file` | `BROKER_OIDC_KEYPAIR_PATH` doesn't exist | Re-run the `keygen` from §2 |
+| `BROKER_BACKEND_URL=http://127.0.0.1:8090: connection refused` | Mock-server isn't running on `:8090` | Start it (Terminal 1 in §3.3) |
+| Broker logs are silent | `RUST_LOG` unset and the default filter is too quiet for what you want | Add `RUST_LOG=debug` to your `cargo run` command |
+| `SES GetEmailIdentity: AccessDenied` | `BROKER_EMAIL_SENDER=ses` but no AWS creds in the shell | Set `BROKER_EMAIL_SENDER=stub` for local dev |
+
+### 7.4 The harness fails at a specific step
+
+Re-run with `--from-step N` to keep prior progress, OR `--only-step N` to test one step in isolation. Every step is idempotent — re-running a passed step is a no-op. If `--only-step 7` fails the same way as the full run, the bug is in that step's script; if it passes, the bug is cross-step state that the previous steps mutated.
+
+---
+
+## 8. Chain profile selection
+
+`AGENTKEYS_CHAIN` controls which RPC + which contract addresses every harness script talks to. Default in `v2-stage1-demo.sh` is `heima-paseo`; common alternates:
+
+| Profile | RPC | When to use | Cost |
+|---|---|---|---|
+| `anvil` | `http://127.0.0.1:8545` | Fully local; fastest iteration; no real-world side effects | Free |
+| `heima-paseo` | Heima testnet | Real-chain semantics without real-money cost; default for `v2-stage1-demo.sh` | Testnet HEI (free from faucet) |
+| `heima` | Heima mainnet | The canonical chain; matches what CI's harness-e2e runs against | Real HEI — small per-run cost |
+
+Switch with `--chain` on any harness script. Contract addresses for `heima` and `heima-paseo` live in [`scripts/operator-workstation.env`](../../scripts/operator-workstation.env); add `anvil` ones by running `bash scripts/setup-heima.sh --chain anvil --from-step 4 --to-step 8` after starting your local anvil.
+
+---
+
+## 9. Related docs
+
+- [`docs/arch.md`](../arch.md) — single source of truth for component inventory + trust boundaries.
+- [`docs/dev-setup.md`](../dev-setup.md) — first-time machine bootstrap (rust, jj, node, AWS CLI, browser).
+- [`docs/operator-runbook-stage7.md`](../operator-runbook-stage7.md) — deploy-to-real-EC2 walkthrough (manual; not for local dev).
+- [`docs/ci-setup.md`](../ci-setup.md) — no-LLM CI + auto-deploy of test broker (issue #101 / PR #102).
+- [`docs/spec/signer-protocol.md`](./signer-protocol.md) — wire contract for the signer (TEE swap-in target).
+- [`docs/spec/credential-backend-interface.md`](./credential-backend-interface.md) — the `CredentialBackend` trait; what the broker's storage plug-ins must implement.
+- [`docs/spec/plans/development-stages.md`](./plans/development-stages.md) — the staged build plan + harness gates.
diff --git a/scripts/heima-agent-create.sh b/scripts/heima-agent-create.sh
index b8c1859..4848b60 100755
--- a/scripts/heima-agent-create.sh
+++ b/scripts/heima-agent-create.sh
@@ -200,13 +200,27 @@ if [ "$DRY_RUN" = "1" ]; then
   exit 0
 fi
 
+# Resolve PENDING nonce for the master wallet — same protection as the
+# heima-fund-account.sh fix in PR #102. If the prior run's registerAgentDevice
+# tx is still in the mempool, the default `latest` nonce derivation collides.
+PENDING_NONCE=$(cast nonce "$MASTER_ADDR" --rpc-url "$RPC_HTTP" --block pending 2>/dev/null || echo "")
+if [ -n "$PENDING_NONCE" ]; then
+  log "pending nonce for master = $PENDING_NONCE"
+  CAST_ARGS+=(--nonce "$PENDING_NONCE")
+fi
+
 log "Submitting registerAgentDevice tx via cast send …"
 set +e
 CAST_OUT=$(cast "${CAST_ARGS[@]}" 2>&1)
 CAST_RC=$?
 set -e
 if [ "$CAST_RC" != "0" ]; then
-  echo "    cast send FAILED (exit $CAST_RC). Output:" >&2
+  if printf '%s\n' "$CAST_OUT" | grep -qi "replacement transaction underpriced"; then
+    echo "    cast send FAILED: prior tx with same nonce is pending in Heima mempool." >&2
+    echo "    Wait ~1 minute and re-run. Output:" >&2
+  else
+    echo "    cast send FAILED (exit $CAST_RC). Output:" >&2
+  fi
   echo "$CAST_OUT" >&2
   exit 1
 fi
diff --git a/scripts/heima-fund-account.sh b/scripts/heima-fund-account.sh
index 55fd01a..aaa102d 100755
--- a/scripts/heima-fund-account.sh
+++ b/scripts/heima-fund-account.sh
@@ -125,15 +125,38 @@ if [ "$DRY_RUN" = "1" ]; then
   exit 0
 fi
 
+# Resolve PENDING nonce (defends against the race where a prior run's funding
+# tx is still in the mempool — cast's default `latest` nonce derivation would
+# collide with the stuck pending tx, surfacing as
+# `replacement transaction underpriced`. PR #102 / codex adversarial review.)
+log "Resolving pending nonce for $DEPLOYER_ADDR"
+PENDING_NONCE=$(cast nonce "$DEPLOYER_ADDR" --rpc-url "$RPC_HTTP" --block pending 2>/dev/null || echo "")
+if [ -z "$PENDING_NONCE" ]; then
+  warn "could not resolve pending nonce — proceeding without explicit --nonce (cast will use latest)"
+  NONCE_ARGS=()
+else
+  ok "pending nonce = $PENDING_NONCE"
+  NONCE_ARGS=(--nonce "$PENDING_NONCE")
+fi
+
 log "Submitting transfer via cast send …"
 set +e
 SEND_OUT=$(cast send "$TO_ADDR" --value "$AMOUNT_WEI" \
   --rpc-url "$RPC_HTTP" --chain-id "$LIVE_CHAIN_ID" \
+  "${NONCE_ARGS[@]}" \
   --private-key "$DEPLOYER_KEY" 2>&1)
 SEND_RC=$?
 set -e
 if [ "$SEND_RC" != "0" ]; then
-  echo "    cast send FAILED (exit $SEND_RC). Output:" >&2
+  # Surface the underpriced-replacement case with a specific remediation —
+  # the broader workflow-level concurrency lock SHOULD prevent this from
+  # firing for parallel runs, but a stuck mempool tx still trips it.
+  if printf '%s\n' "$SEND_OUT" | grep -qi "replacement transaction underpriced"; then
+    echo "    cast send FAILED: prior tx with same nonce is pending in Heima mempool." >&2
+    echo "    Wait ~1 minute for it to confirm or drop, then re-run. Output:" >&2
+  else
+    echo "    cast send FAILED (exit $SEND_RC). Output:" >&2
+  fi
   echo "$SEND_OUT" >&2
   exit 1
 fi
diff --git a/scripts/provision-ci-deploy-role.sh b/scripts/provision-ci-deploy-role.sh
new file mode 100755
index 0000000..66b3475
--- /dev/null
+++ b/scripts/provision-ci-deploy-role.sh
@@ -0,0 +1,564 @@
+#!/usr/bin/env bash
+# scripts/provision-ci-deploy-role.sh — idempotent creation of the
+# `github-actions-agentkeys-deploy` IAM role that lets the no-LLM CI
+# workflow drive `setup-broker-host.sh --test --yes` on the test broker
+# EC2 via AWS Systems Manager (SSM).
+#
+# Per arch.md trust posture (issue #101): the role is reachable ONLY
+# via GitHub Actions OIDC from the `litentry/agentKeys` repo, and its
+# inline policy is scoped to:
+#   - `ssm:SendCommand` on document/AWS-RunShellScript + the ONE test
+#     broker instance ARN — so even if the role were stolen, the worst
+#     it can do is queue a shell command on that single EC2.
+#   - `ssm:GetCommandInvocation` + `ssm:ListCommandInvocations` for
+#     status polling (no resource scope, read-only).
+#   - `ec2:DescribeInstances` so the workflow can sanity-check the
+#     instance is reachable before sending the command.
+#
+# Why a separate role from `github-actions-agentkeys-e2e`:
+#   - The e2e role's perms (sts:AssumeRole on test data roles + S3
+#     verify) are read/write into the test environment AS the workload.
+#   - The deploy role's perms (ssm:SendCommand on the broker EC2) are
+#     control-plane: it tells the EC2 to re-deploy the broker binary.
+#   - Separation of duties: a compromise of CI's e2e creds cannot
+#     trigger a broker re-deploy, and vice versa.
+#
+# Out of scope (stays manual per CLAUDE.md "Remote broker host (single
+# entry point)" + "Idempotent remote-setup rule (CLOUD)"):
+#   - The PROD broker EC2 (broker.litentry.org) — no auto-deploy ever.
+#   - The Heima EVM PROD contract redeploy — never automatic.
+#
+# Required env (sourced from $ENV_FILE):
+#   - ACCOUNT_ID
+#   - REGION
+# Required CLI flags:
+#   - --test-broker-instance-id i-xxxxxxxxx (the EC2 hosting the test broker)
+# Optional CLI flags:
+#   - --repo litentry/agentKeys (default; pinned in OIDC sub condition)
+#   - --role-name github-actions-agentkeys-deploy (default)
+#   - --env-file scripts/operator-workstation.test.env (default)
+#   - --fix-ssm  Auto-attach AmazonSSMManagedInstanceCore to the broker EC2's
+#                instance profile role if the SSM agent is offline, then poll
+#                for up to 3 min waiting for the agent to refresh creds.
+#                Safe to pass on every run (idempotent: aws iam attach-role-policy
+#                no-ops on re-attach, and the auto-attach is gated on PingStatus
+#                != Online so a healthy EC2 is untouched).
+#   - --dry-run (print planned changes; no AWS calls that mutate state)
+#
+# Required AWS profile: agentkeys-admin (the script checks caller ARN).
+#
+# Outcomes per step (matches the idempotent-remote-setup rule shape):
+#   - `ok proceeding` → mutation applied
+#   - `skip <reason>` → no-op (e.g. role already present + trust matches)
+#   - `fail <reason>` → hard error, exit non-zero
+
+set -euo pipefail
+
+# ─── CLI parse ────────────────────────────────────────────────────────────────
+DRY_RUN=0
+FIX_SSM=0
+TEST_BROKER_INSTANCE_ID=""
+REPO_SLUG="litentry/agentKeys"
+ROLE_NAME="github-actions-agentkeys-deploy"
+SSM_POLICY_NAME="agentkeys-ci-deploy-ssm"
+REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+ENV_FILE="${ENV_FILE:-$REPO_ROOT/scripts/operator-workstation.test.env}"
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --test-broker-instance-id) TEST_BROKER_INSTANCE_ID="$2"; shift 2 ;;
+    --repo)                    REPO_SLUG="$2"; shift 2 ;;
+    --role-name)               ROLE_NAME="$2"; shift 2 ;;
+    --env-file)                ENV_FILE="$2"; shift 2 ;;
+    --fix-ssm)                 FIX_SSM=1; shift ;;
+    --dry-run)                 DRY_RUN=1; shift ;;
+    --help|-h)
+      sed -n '2,/^set -euo/p' "$0" | sed 's/^# \{0,1\}//' | sed '$d'; exit 0 ;;
+    *) echo "unknown flag: $1 (try --help)" >&2; exit 2 ;;
+  esac
+done
+
+# ─── Logging primitives (mirrors provision-vault-role.sh) ─────────────────────
+if [ -t 2 ]; then
+  C_HEAD='\033[1;36m'; C_OK='\033[1;32m'; C_SKIP='\033[1;33m'
+  C_WARN='\033[1;33m'; C_ERR='\033[1;31m'; C_RESET='\033[0m'
+else
+  C_HEAD=''; C_OK=''; C_SKIP=''; C_WARN=''; C_ERR=''; C_RESET=''
+fi
+log()  { printf "${C_HEAD}==>${C_RESET} %s\n" "$*" >&2; }
+ok()   { printf "    ${C_OK}ok${C_RESET}   %s\n" "$*" >&2; }
+skip() { printf "    ${C_SKIP}skip${C_RESET} %s\n" "$*" >&2; }
+warn() { printf "    ${C_WARN}warn${C_RESET} %s\n" "$*" >&2; }
+die()  { printf "    ${C_ERR}fail${C_RESET} %s\n" "$*" >&2; exit 1; }
+
+# ─── Preconditions ────────────────────────────────────────────────────────────
+[ -f "$ENV_FILE" ] || die "missing $ENV_FILE (pass --env-file <path> to override)"
+set -a; . "$ENV_FILE"; set +a
+
+ACCOUNT_ID="${ACCOUNT_ID:?ACCOUNT_ID required in $ENV_FILE}"
+REGION="${REGION:?REGION required in $ENV_FILE}"
+
+[ -n "$TEST_BROKER_INSTANCE_ID" ] \
+  || die "missing --test-broker-instance-id (look up via: aws ec2 describe-instances --region $REGION --filters 'Name=tag:Name,Values=agentkeys-test-broker' --query 'Reservations[0].Instances[0].InstanceId')"
+
+[[ "$TEST_BROKER_INSTANCE_ID" =~ ^i-[0-9a-f]{8,17}$ ]] \
+  || die "instance ID shape invalid: $TEST_BROKER_INSTANCE_ID (expected i-<8-17 hex chars>)"
+
+[[ "$REPO_SLUG" =~ ^[A-Za-z0-9._-]+/[A-Za-z0-9._-]+$ ]] \
+  || die "repo slug shape invalid: $REPO_SLUG (expected owner/repo)"
+
+command -v jq >/dev/null  || die "jq not found in PATH (brew install jq)"
+command -v aws >/dev/null || die "aws CLI not found in PATH"
+
+# Caller identity must be agentkeys-admin (matches the rest of the provision-*
+# scripts; lowercase compare because the live IAM user is `agentKeys-admin`).
+caller_arn=$(aws sts get-caller-identity --query Arn --output text 2>&1) \
+  || die "aws sts get-caller-identity failed: $caller_arn"
+arn_lc=$(printf '%s' "$caller_arn" | tr '[:upper:]' '[:lower:]')
+case "$arn_lc" in
+  *":user/agentkeys-admin"*) ok "caller is admin: $caller_arn" ;;
+  *) die "caller is $caller_arn — needs agentkeys-admin (try: awsp agentkeys-admin)" ;;
+esac
+
+# ─── Step 1: ensure the GitHub Actions OIDC provider exists in the account ───
+log "OIDC provider: token.actions.githubusercontent.com"
+gha_provider_arn="arn:aws:iam::${ACCOUNT_ID}:oidc-provider/token.actions.githubusercontent.com"
+if aws iam get-open-id-connect-provider --open-id-connect-provider-arn "$gha_provider_arn" >/dev/null 2>&1; then
+  skip "GHA OIDC provider already registered"
+else
+  if [ "$DRY_RUN" = "1" ]; then
+    log "DRY RUN — would create-open-id-connect-provider for token.actions.githubusercontent.com"
+  else
+    # Thumbprint per GitHub's published cert (matches docs/ci-setup.md §4 note).
+    # If the cert chain rolls, this needs a refresh; AWS rejects mismatches.
+    aws iam create-open-id-connect-provider \
+      --url https://token.actions.githubusercontent.com \
+      --client-id-list sts.amazonaws.com \
+      --thumbprint-list 6938fd4d98bab03faadb97b34396831e3780aea1 \
+      >/dev/null \
+      || die "create-open-id-connect-provider failed"
+    ok "GHA OIDC provider registered"
+  fi
+fi
+
+# ─── Step 2: trust policy ─────────────────────────────────────────────────────
+# Federated on the GHA OIDC provider, scoped to the litentry/agentKeys repo.
+# `StringLike` on `sub` lets PR branches AND `refs/heads/*` push events
+# trigger; the workflow itself is the second gate (path filter + concurrency).
+#
+# To tighten further later (e.g. main-branch-only deploys), change the StringLike
+# pattern to `repo:litentry/agentKeys:ref:refs/heads/evm` or similar.
+trust_policy=$(jq -n \
+  --arg provider "$gha_provider_arn" \
+  --arg sub_pattern "repo:${REPO_SLUG}:*" \
+  '{
+    Version: "2012-10-17",
+    Statement: [{
+      Effect: "Allow",
+      Principal: { Federated: $provider },
+      Action: "sts:AssumeRoleWithWebIdentity",
+      Condition: {
+        StringEquals: {
+          "token.actions.githubusercontent.com:aud": "sts.amazonaws.com"
+        },
+        StringLike: {
+          "token.actions.githubusercontent.com:sub": $sub_pattern
+        }
+      }
+    }]
+  }')
+
+# ─── Step 3: role existence ──────────────────────────────────────────────────
+log "Role existence: $ROLE_NAME"
+if aws iam get-role --role-name "$ROLE_NAME" >/dev/null 2>&1; then
+  skip "role already exists"
+  if [ "$DRY_RUN" = "1" ]; then
+    log "DRY RUN — would update-assume-role-policy with: $trust_policy"
+  else
+    log "Refreshing trust policy (idempotent; sub pattern: repo:${REPO_SLUG}:*)"
+    aws iam update-assume-role-policy \
+      --role-name "$ROLE_NAME" \
+      --policy-document "$trust_policy" \
+      || die "update-assume-role-policy failed"
+    ok "trust policy refreshed"
+  fi
+else
+  if [ "$DRY_RUN" = "1" ]; then
+    log "DRY RUN — would create-role $ROLE_NAME with trust: $trust_policy"
+  else
+    log "Creating role $ROLE_NAME"
+    # IAM CreateRole --description allows only printable ASCII + Latin-1
+    # (regex [\t\n\r\x20-\x7e\xa1-\xff]*). Em-dash / en-dash / arrows trip
+    # "Value at 'description' failed to satisfy constraint" at AWS-call time.
+    # Keep this string ASCII-only.
+    aws iam create-role \
+      --role-name "$ROLE_NAME" \
+      --assume-role-policy-document "$trust_policy" \
+      --description "CI deploy role - drives setup-broker-host.sh on the test EC2 via SSM (issue #101)" \
+      >/dev/null \
+      || die "create-role failed"
+    ok "role created"
+  fi
+fi
+
+# ─── Step 4: inline SSM policy ───────────────────────────────────────────────
+# Narrow on purpose: SendCommand limited to the document + the ONE instance
+# ARN. Even a compromised role can only re-run setup-broker-host.sh on the
+# test broker; nothing in prod, nothing on other EC2s.
+instance_arn="arn:aws:ec2:${REGION}:${ACCOUNT_ID}:instance/${TEST_BROKER_INSTANCE_ID}"
+ssm_document_arn="arn:aws:ssm:${REGION}::document/AWS-RunShellScript"
+
+inline_policy=$(jq -n \
+  --arg doc_arn "$ssm_document_arn" \
+  --arg inst_arn "$instance_arn" \
+  --arg inst_id  "$TEST_BROKER_INSTANCE_ID" \
+  '{
+    Version: "2012-10-17",
+    Statement: [
+      {
+        Sid: "SendShellCommandToTestBrokerOnly",
+        Effect: "Allow",
+        Action: "ssm:SendCommand",
+        Resource: [$doc_arn, $inst_arn]
+      },
+      {
+        Sid: "PollCommandStatus",
+        Effect: "Allow",
+        Action: [
+          "ssm:GetCommandInvocation",
+          "ssm:ListCommandInvocations",
+          "ssm:DescribeInstanceInformation"
+        ],
+        Resource: "*"
+      },
+      {
+        Sid: "DescribeTestBrokerInstanceOnly",
+        Effect: "Allow",
+        Action: "ec2:DescribeInstances",
+        Resource: "*",
+        Condition: {
+          StringEquals: {
+            "ec2:InstanceId": [$inst_id]
+          }
+        }
+      }
+    ]
+  }')
+
+log "Inline policy: $SSM_POLICY_NAME"
+if [ "$DRY_RUN" = "1" ]; then
+  log "DRY RUN — would put-role-policy: $inline_policy"
+else
+  aws iam put-role-policy \
+    --role-name "$ROLE_NAME" \
+    --policy-name "$SSM_POLICY_NAME" \
+    --policy-document "$inline_policy" \
+    || die "put-role-policy failed"
+  ok "inline policy applied ($(echo "$inline_policy" | jq '.Statement | length') statements; SendCommand scoped to $TEST_BROKER_INSTANCE_ID)"
+fi
+
+# ─── Step 5: verify the test broker EC2 is SSM-managed ───────────────────────
+# If the instance lacks AmazonSSMManagedInstanceCore (via its instance profile)
+# OR the SSM Agent isn't running, SendCommand will queue the command and time
+# out without delivering it. Fail fast here with a clear remediation path.
+#
+# With --fix-ssm, the script attempts auto-remediation:
+#   - Looks up the EC2's instance profile via DescribeInstances
+#   - Extracts the role name behind the profile
+#   - Attaches AmazonSSMManagedInstanceCore (idempotent: AWS no-ops on re-attach)
+#   - Re-polls PingStatus for up to 3 min waiting for the agent to refresh creds
+#   - If still offline after 3 min: tells operator to reboot or restart the agent
+#
+# The auto-attach is safe because the operator is already running as
+# agentkeys-admin (verified above) — they HAVE iam:AttachRolePolicy. Without
+# --fix-ssm the script just reports + exits (no IAM mutation, no surprises).
+# Creates the dedicated SSM-only instance profile + role and associates
+# it with the EC2 instance. Used when the EC2 has NO profile attached at
+# all — common on test brokers spun up by setup-cloud.sh --test (the
+# broker process authenticates via static creds in /etc/agentkeys/broker.env,
+# so the EC2 was never given an instance profile).
+#
+# Why this is safe to add to an already-running broker:
+#   - The broker's app-layer AWS calls use AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY
+#     from broker.env explicitly; the static creds take precedence over IMDS.
+#   - Adding an IMDS-served instance profile cannot reduce capability — it only
+#     ADDS a credential source for processes that don't already have static creds
+#     (which on the broker EC2 = the SSM agent and not much else).
+#
+# Names:
+#   - Role:    agentkeys-test-broker-ssm
+#   - Profile: agentkeys-test-broker-ssm (same — conventional)
+#
+# Idempotent: every step is get-* pre-checked. Safe to call repeatedly.
+SSM_INSTANCE_ROLE_NAME="agentkeys-test-broker-ssm"
+SSM_INSTANCE_PROFILE_NAME="agentkeys-test-broker-ssm"
+
+create_and_associate_ssm_profile() {
+  local instance_id="$1"
+  local policy_arn="arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
+
+  # ── Role ──
+  if aws iam get-role --role-name "$SSM_INSTANCE_ROLE_NAME" >/dev/null 2>&1; then
+    skip "role $SSM_INSTANCE_ROLE_NAME already exists"
+  else
+    log "Creating role $SSM_INSTANCE_ROLE_NAME (EC2 trust)"
+    local ec2_trust
+    ec2_trust=$(jq -n '{
+      Version: "2012-10-17",
+      Statement: [{
+        Effect: "Allow",
+        Principal: { Service: "ec2.amazonaws.com" },
+        Action: "sts:AssumeRole"
+      }]
+    }')
+    aws iam create-role \
+      --role-name "$SSM_INSTANCE_ROLE_NAME" \
+      --assume-role-policy-document "$ec2_trust" \
+      --description "Lets the test broker EC2 register with AWS SSM (issue #101)" \
+      >/dev/null \
+      || { warn "create-role failed"; return 1; }
+    ok "role $SSM_INSTANCE_ROLE_NAME created"
+  fi
+
+  # ── Managed policy attach (idempotent — AWS no-ops on re-attach) ──
+  local already_attached
+  already_attached=$(aws iam list-attached-role-policies \
+    --role-name "$SSM_INSTANCE_ROLE_NAME" \
+    --query "AttachedPolicies[?PolicyArn=='$policy_arn'].PolicyArn" \
+    --output text 2>/dev/null || echo "")
+  if [ -n "$already_attached" ]; then
+    skip "AmazonSSMManagedInstanceCore already attached to $SSM_INSTANCE_ROLE_NAME"
+  else
+    aws iam attach-role-policy \
+      --role-name "$SSM_INSTANCE_ROLE_NAME" \
+      --policy-arn "$policy_arn" \
+      || { warn "attach-role-policy failed"; return 1; }
+    ok "AmazonSSMManagedInstanceCore attached to $SSM_INSTANCE_ROLE_NAME"
+  fi
+
+  # ── Instance profile ──
+  if aws iam get-instance-profile --instance-profile-name "$SSM_INSTANCE_PROFILE_NAME" >/dev/null 2>&1; then
+    skip "instance profile $SSM_INSTANCE_PROFILE_NAME already exists"
+  else
+    log "Creating instance profile $SSM_INSTANCE_PROFILE_NAME"
+    aws iam create-instance-profile \
+      --instance-profile-name "$SSM_INSTANCE_PROFILE_NAME" \
+      >/dev/null \
+      || { warn "create-instance-profile failed"; return 1; }
+    ok "instance profile $SSM_INSTANCE_PROFILE_NAME created"
+  fi
+
+  # ── Add role to profile ──
+  local profile_role
+  profile_role=$(aws iam get-instance-profile \
+    --instance-profile-name "$SSM_INSTANCE_PROFILE_NAME" \
+    --query 'InstanceProfile.Roles[0].RoleName' \
+    --output text 2>/dev/null || echo "None")
+  if [ "$profile_role" = "$SSM_INSTANCE_ROLE_NAME" ]; then
+    skip "role already added to instance profile"
+  else
+    if [ "$profile_role" != "None" ] && [ -n "$profile_role" ]; then
+      warn "instance profile $SSM_INSTANCE_PROFILE_NAME currently holds role $profile_role (expected $SSM_INSTANCE_ROLE_NAME)"
+      warn "Refusing to swap — operator should reconcile manually."
+      return 1
+    fi
+    aws iam add-role-to-instance-profile \
+      --instance-profile-name "$SSM_INSTANCE_PROFILE_NAME" \
+      --role-name "$SSM_INSTANCE_ROLE_NAME" \
+      || { warn "add-role-to-instance-profile failed"; return 1; }
+    ok "added $SSM_INSTANCE_ROLE_NAME to instance profile"
+    # IAM is eventually consistent — newly-attached role may not show up in
+    # the EC2 associate API for a few seconds. Brief sleep here is the
+    # documented pattern (AWS docs: "may take up to 30s to propagate").
+    log "Waiting 15s for IAM eventual consistency"
+    sleep 15
+  fi
+
+  # ── Associate profile with EC2 ──
+  local current_profile_arn
+  current_profile_arn=$(aws ec2 describe-iam-instance-profile-associations \
+    --region "$REGION" \
+    --filters "Name=instance-id,Values=$instance_id" \
+    --query 'IamInstanceProfileAssociations[?State==`associated` || State==`associating`].IamInstanceProfile.Arn' \
+    --output text 2>/dev/null || echo "")
+  if [ -n "$current_profile_arn" ] && [ "$current_profile_arn" != "None" ]; then
+    skip "instance already has profile associated: $current_profile_arn"
+  else
+    log "Associating $SSM_INSTANCE_PROFILE_NAME with $instance_id"
+    aws ec2 associate-iam-instance-profile \
+      --region "$REGION" \
+      --instance-id "$instance_id" \
+      --iam-instance-profile "Name=$SSM_INSTANCE_PROFILE_NAME" \
+      >/dev/null \
+      || { warn "associate-iam-instance-profile failed"; return 1; }
+    ok "profile associated; EC2 IMDS will surface new creds within ~30s"
+  fi
+
+  return 0
+}
+
+attach_ssm_managed_policy_if_missing() {
+  # Returns 0 if policy was attached or already present; non-zero on hard error.
+  local instance_id="$1"
+  local profile_arn role_name policy_arn already_attached
+
+  policy_arn="arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
+
+  profile_arn=$(aws ec2 describe-instances \
+    --region "$REGION" \
+    --instance-ids "$instance_id" \
+    --query 'Reservations[0].Instances[0].IamInstanceProfile.Arn' \
+    --output text 2>/dev/null || echo "None")
+
+  if [ -z "$profile_arn" ] || [ "$profile_arn" = "None" ] || [ "$profile_arn" = "null" ]; then
+    log "instance $instance_id has NO IAM instance profile — creating + associating one"
+    create_and_associate_ssm_profile "$instance_id" || return 1
+    return 0
+  fi
+
+  # Profile ARN shape: arn:aws:iam::ACCT:instance-profile/<NAME>
+  local profile_name="${profile_arn##*/}"
+  log "instance profile: $profile_name"
+
+  role_name=$(aws iam get-instance-profile \
+    --instance-profile-name "$profile_name" \
+    --query 'InstanceProfile.Roles[0].RoleName' \
+    --output text 2>/dev/null || echo "None")
+
+  if [ -z "$role_name" ] || [ "$role_name" = "None" ]; then
+    warn "instance profile $profile_name has no role attached — auto-remediation is blocked."
+    return 1
+  fi
+  log "role behind profile: $role_name"
+
+  already_attached=$(aws iam list-attached-role-policies \
+    --role-name "$role_name" \
+    --query "AttachedPolicies[?PolicyArn=='$policy_arn'].PolicyArn" \
+    --output text 2>/dev/null || echo "")
+
+  if [ -n "$already_attached" ]; then
+    ok "AmazonSSMManagedInstanceCore already attached to $role_name"
+    return 0
+  fi
+
+  log "Attaching AmazonSSMManagedInstanceCore to $role_name"
+  aws iam attach-role-policy \
+    --role-name "$role_name" \
+    --policy-arn "$policy_arn" \
+    || { warn "attach-role-policy failed"; return 1; }
+  ok "AmazonSSMManagedInstanceCore attached to $role_name"
+  return 0
+}
+
+poll_ssm_online() {
+  local instance_id="$1" max_iters="$2" state
+  for _ in $(seq 1 "$max_iters"); do
+    state=$(aws ssm describe-instance-information \
+      --region "$REGION" \
+      --filters "Key=InstanceIds,Values=$instance_id" \
+      --query 'InstanceInformationList[0].PingStatus' \
+      --output text 2>/dev/null || echo "None")
+    case "$state" in
+      Online) printf '%s' "$state"; return 0 ;;
+    esac
+    sleep 10
+  done
+  printf '%s' "${state:-None}"
+  return 1
+}
+
+log "Verify SSM agent reachable: $TEST_BROKER_INSTANCE_ID"
+if [ "$DRY_RUN" = "1" ]; then
+  log "DRY RUN — would query ssm describe-instance-information for $TEST_BROKER_INSTANCE_ID"
+else
+  # Capture stderr separately so AccessDenied doesn't get silently mapped to
+  # "None" (instance-not-registered). They're distinct failure modes:
+  #   - AccessDenied → caller (agentkeys-admin) lacks ssm:DescribeInstanceInformation.
+  #     Fix the caller's IAM, not the EC2.
+  #   - Empty/None → instance genuinely not registered with SSM. Remediate the EC2.
+  ssm_stderr=$(mktemp /tmp/ssm-describe.XXXXXX.err)
+  ssm_state=$(aws ssm describe-instance-information \
+    --region "$REGION" \
+    --filters "Key=InstanceIds,Values=$TEST_BROKER_INSTANCE_ID" \
+    --query 'InstanceInformationList[0].PingStatus' \
+    --output text 2>"$ssm_stderr" || echo "")
+  if grep -q "AccessDenied" "$ssm_stderr"; then
+    rm -f "$ssm_stderr"
+    die "caller lacks ssm:DescribeInstanceInformation. This is the upstream
+of every 'PingStatus=None' loop — without read perms, the script cannot tell
+'instance not registered with SSM' from 'I have no permission to look'. Fix
+by attaching AmazonSSMReadOnlyAccess to the admin group ONCE:
+    aws iam attach-group-policy \\
+      --group-name AgentKeyAdmin \\
+      --policy-arn arn:aws:iam::aws:policy/AmazonSSMReadOnlyAccess
+Then re-run this script."
+  fi
+  # Empty state = no record found (genuinely not registered).
+  [ -z "$ssm_state" ] && ssm_state="None"
+  rm -f "$ssm_stderr"
+
+  case "$ssm_state" in
+    Online)
+      ok "SSM agent online — workflow can SendCommand"
+      ;;
+    ConnectionLost|Inactive|None|"")
+      if [ "$FIX_SSM" = "1" ]; then
+        log "Auto-remediating (--fix-ssm): attach AmazonSSMManagedInstanceCore + poll"
+        if attach_ssm_managed_policy_if_missing "$TEST_BROKER_INSTANCE_ID"; then
+          log "Polling SSM PingStatus for up to 3 min (agent refresh window)"
+          final_state=$(poll_ssm_online "$TEST_BROKER_INSTANCE_ID" 18) || true
+          if [ "$final_state" = "Online" ]; then
+            ok "SSM agent now online"
+          else
+            warn "SSM agent still $final_state after 3 min — policy attached, but the"
+            warn "agent process hasn't picked up the refreshed creds. Pick ONE:"
+            warn "  a) SSH and bounce the agent:"
+            warn "     ssh test-broker 'sudo systemctl restart amazon-ssm-agent'"
+            warn "  b) Reboot the EC2 (heavier):"
+            warn "     aws ec2 reboot-instances --instance-ids $TEST_BROKER_INSTANCE_ID --region $REGION"
+            warn "Then re-run this script (no flags) to confirm Online."
+            exit 1
+          fi
+        else
+          exit 1
+        fi
+      else
+        die "$TEST_BROKER_INSTANCE_ID is not registered with SSM (state=$ssm_state). Re-run with --fix-ssm
+to attempt auto-remediation (attaches AmazonSSMManagedInstanceCore to the
+EC2's instance profile role, then polls until the SSM agent refreshes).
+Or remediate manually:
+  1. EC2 instance profile is missing AmazonSSMManagedInstanceCore. Fix:
+       aws ec2 describe-instances --region $REGION --instance-ids $TEST_BROKER_INSTANCE_ID \\
+         --query 'Reservations[0].Instances[0].IamInstanceProfile.Arn'
+     Then attach the policy to the role behind that instance profile:
+       aws iam attach-role-policy --role-name <role-from-above> \\
+         --policy-arn arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore
+     Reboot the EC2 (or restart amazon-ssm-agent) to pick up new perms.
+  2. SSM Agent not installed/running. Fix (Ubuntu 22.04+ ships it):
+       ssh test-broker 'sudo systemctl enable --now amazon-ssm-agent'
+  3. Instance is in a private VPC subnet without an SSM VPC endpoint.
+     (Unlikely for a public-IP broker, but worth a glance at the routing.)"
+      fi
+      ;;
+    *)
+      warn "SSM agent state = $ssm_state (unexpected) — proceed with caution"
+      ;;
+  esac
+fi
+
+# ─── Final: print the ARN so the operator can paste it into the GHA secret ──
+role_arn=$(aws iam get-role --role-name "$ROLE_NAME" --query 'Role.Arn' --output text 2>/dev/null || echo "?")
+ok "deploy role ready: $role_arn"
+cat <<EOF >&2
+
+Next:
+  # 1. Set the two GitHub secrets (idempotent — overwrites existing values):
+  gh secret set OIDC_AWS_ROLE_ARN_DEPLOY --repo $REPO_SLUG --body "$role_arn"
+  gh secret set TEST_BROKER_INSTANCE_ID  --repo $REPO_SLUG --body "$TEST_BROKER_INSTANCE_ID"
+
+  # 2. Trigger a workflow_dispatch with broker_changed=true to dry-run the
+  #    deploy path on the test EC2 (see docs/ci-setup.md §7).
+
+EOF
+
+echo "$role_arn"
diff --git a/scripts/setup-broker-host.sh b/scripts/setup-broker-host.sh
index 44b471e..166d01c 100755
--- a/scripts/setup-broker-host.sh
+++ b/scripts/setup-broker-host.sh
@@ -21,6 +21,13 @@
 
 set -euo pipefail
 
+# AWS SSM-driven invocations (harness-ci.yml deploy-test-broker, issue #101)
+# don't export HOME on the remote shell. Under set -u that hits 'HOME: unbound
+# variable' at the rustup `source "$HOME/.cargo/env"` line. Resolve HOME from
+# /etc/passwd if missing so the script is callable from both interactive ssh
+# sessions and SSM SendCommand.
+export HOME="${HOME:-$(getent passwd "$(id -u)" | cut -d: -f6)}"
+
 REPO_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
 
 # ─── Defaults ─────────────────────────────────────────────────────────────────
@@ -790,6 +797,67 @@ EOF
   sudo systemctl reload ssh 2>/dev/null || sudo systemctl reload sshd 2>/dev/null || warn "sshd reload failed — restart manually"
 fi
 
+# ─── AWS SSM Agent (idempotent install) ───────────────────────────────────────
+# Required by harness-ci.yml deploy-test-broker job (issue #101): the GitHub
+# Actions workflow drives `setup-broker-host.sh --test --yes` on the EC2 via
+# `aws ssm send-command`. That path needs amazon-ssm-agent installed AND
+# active here.
+#
+# Some Ubuntu AMIs (including some Canonical / Multipass-derived images
+# downstream of the AWS Marketplace base) ship without amazon-ssm-agent.
+# When that's the case, `systemctl restart amazon-ssm-agent` errors with
+# "Unit amazon-ssm-agent.service not found" — the failure mode the operator
+# hit on 2026-05-23. Fold the install into broker-host bootstrap so every
+# new test broker is SSM-ready out of the box.
+#
+# Two install paths, in priority order:
+#   1) snap (AWS-blessed on Ubuntu 22.04+; service: snap.amazon-ssm-agent.amazon-ssm-agent.service)
+#   2) deb fallback (older / non-snap images; service: amazon-ssm-agent.service)
+#
+# Both produce a unit named `amazon-ssm-agent` in our systemctl alias check
+# below, so subsequent `setup-broker-host.sh --upgrade` re-runs skip.
+ssm_unit_active() {
+  systemctl is-active snap.amazon-ssm-agent.amazon-ssm-agent.service >/dev/null 2>&1 \
+    || systemctl is-active amazon-ssm-agent.service >/dev/null 2>&1
+}
+
+if ssm_unit_active; then
+  log "amazon-ssm-agent already active — skipping install"
+else
+  log "Installing amazon-ssm-agent (required for CI auto-deploy per issue #101)"
+  if command -v snap >/dev/null 2>&1; then
+    # snap install is idempotent — re-running on an already-installed agent
+    # exits 0 with a "snap already installed" message.
+    sudo snap install amazon-ssm-agent --classic >/dev/null \
+      || warn "snap install amazon-ssm-agent failed — falling back to deb"
+    sudo systemctl enable --now snap.amazon-ssm-agent.amazon-ssm-agent.service \
+      >/dev/null 2>&1 || true
+  fi
+
+  if ! ssm_unit_active; then
+    # Snap path didn't take — fall back to the .deb from AWS.
+    REGION_FOR_SSM="${REGION:-us-east-1}"
+    SSM_DEB_URL="https://s3.${REGION_FOR_SSM}.amazonaws.com/amazon-ssm-${REGION_FOR_SSM}/latest/debian_amd64/amazon-ssm-agent.deb"
+    SSM_TMP_DEB=$(mktemp /tmp/amazon-ssm-agent.XXXXXX.deb)
+    if curl -sSfL "$SSM_DEB_URL" -o "$SSM_TMP_DEB"; then
+      sudo dpkg -i "$SSM_TMP_DEB" >/dev/null \
+        || warn "dpkg install amazon-ssm-agent.deb failed"
+      sudo systemctl enable --now amazon-ssm-agent.service \
+        >/dev/null 2>&1 || warn "amazon-ssm-agent enable/start failed"
+    else
+      warn "could not download amazon-ssm-agent.deb from $SSM_DEB_URL"
+    fi
+    rm -f "$SSM_TMP_DEB"
+  fi
+
+  if ssm_unit_active; then
+    log "amazon-ssm-agent installed and active"
+  else
+    warn "amazon-ssm-agent install did not produce an active unit — CI auto-deploy will fail until this is resolved"
+    warn "Manual recovery: sudo snap install amazon-ssm-agent --classic && sudo systemctl enable --now snap.amazon-ssm-agent.amazon-ssm-agent.service"
+  fi
+fi
+
 if [[ "$CRED_MODE" == "profile" ]]; then
   sudo install -d -m 0700 -o agentkeys -g agentkeys /var/lib/agentkeys/.aws
   if [[ ! -f /var/lib/agentkeys/.aws/credentials ]]; then