diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4fdfa4681..f2a2dc1c4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -116,51 +116,297 @@ jobs: bun build apps/paste-service/targets/bun.ts --compile --target=bun-windows-arm64 --outfile plannotator-paste-win32-arm64.exe sha256sum plannotator-paste-win32-arm64.exe > plannotator-paste-win32-arm64.exe.sha256 - - name: Smoke-test linux-x64 binary + - name: Upload artifacts + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: binaries + path: | + plannotator-* + !*.ts + + smoke-binaries: + needs: build + runs-on: ${{ matrix.os }} + permissions: + contents: read + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + binary: plannotator-linux-x64 + - os: windows-latest + binary: plannotator-win32-x64.exe + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Download binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: binaries + path: artifacts + + - name: Smoke-test binary + if: runner.os != 'Windows' + env: + BINARY: artifacts/${{ matrix.binary }} + BROWSER: true run: | - chmod +x plannotator-linux-x64 + set -euo pipefail + chmod +x "$BINARY" - # 1. --help: proves binary loads and arg parsing works - ./plannotator-linux-x64 --help + # 1. --help: proves binary loads and arg parsing works. + "$BINARY" --help - # Helper: start server, poll endpoint for 200, kill smoke_test_server() { - local LABEL="$1" PORT="$2" ENDPOINT="$3" + local label="$1" port="$2" endpoint="$3" shift 3 - # Start the binary in background - PLANNOTATOR_PORT=$PORT "$@" & - local PID=$! - # Poll until the API responds (up to 10s) - local OK=0 - for i in $(seq 1 20); do - if curl -sf "http://localhost:${PORT}${ENDPOINT}" -o /dev/null 2>/dev/null; then - OK=1; break + + PLANNOTATOR_PORT="$port" "$@" & + local pid=$! + local ok=0 + + for _ in $(seq 1 60); do + if curl -sf "http://127.0.0.1:${port}${endpoint}" -o /dev/null 2>/dev/null; then + ok=1 + break fi sleep 0.5 done - kill $PID 2>/dev/null; wait $PID 2>/dev/null || true - if [ "$OK" = "0" ]; then - echo "FAIL: $LABEL did not respond on :${PORT}${ENDPOINT}" + + kill "$pid" 2>/dev/null || true + wait "$pid" 2>/dev/null || true + + if [ "$ok" = "0" ]; then + echo "FAIL: ${label} did not respond on :${port}${endpoint}" exit 1 fi - echo "OK: $LABEL — :${PORT}${ENDPOINT} responded" + + echo "OK: ${label} responded on :${port}${endpoint}" } - # 2. review: exercises full server startup (imports, bundled HTML, git diff, HTTP) + # 2. review: exercises server startup, bundled HTML, git diff, and HTTP. smoke_test_server "plannotator review" 19500 "/api/diff" \ - ./plannotator-linux-x64 review + "$BINARY" review - # 3. annotate: exercises annotate server path with a real file + # 3. annotate: exercises annotate server startup with a real file. smoke_test_server "plannotator annotate" 19501 "/api/plan" \ - ./plannotator-linux-x64 annotate README.md + "$BINARY" annotate README.md - - name: Upload artifacts - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + - name: Smoke-test binary + if: runner.os == 'Windows' + shell: pwsh + env: + BINARY: artifacts/${{ matrix.binary }} + BROWSER: true + run: | + $ErrorActionPreference = "Stop" + $binary = (Resolve-Path $env:BINARY).Path + + # 1. --help: proves binary loads and arg parsing works. + & $binary --help + + function Test-PlannotatorServer { + param( + [string] $Label, + [string] $Port, + [string] $Endpoint, + [string[]] $Arguments + ) + + $env:PLANNOTATOR_PORT = $Port + $stdout = New-TemporaryFile + $stderr = New-TemporaryFile + $process = Start-Process ` + -FilePath $binary ` + -ArgumentList $Arguments ` + -PassThru ` + -NoNewWindow ` + -RedirectStandardOutput $stdout ` + -RedirectStandardError $stderr + $ok = $false + + try { + for ($i = 0; $i -lt 60; $i++) { + try { + Invoke-WebRequest -Uri "http://127.0.0.1:$Port$Endpoint" -UseBasicParsing -TimeoutSec 1 | Out-Null + $ok = $true + break + } catch { + if ($process.HasExited) { + break + } + Start-Sleep -Milliseconds 500 + } + } + } finally { + if (-not $process.HasExited) { + Stop-Process -Id $process.Id -Force + Wait-Process -Id $process.Id -ErrorAction SilentlyContinue + } + Remove-Item Env:\PLANNOTATOR_PORT -ErrorAction SilentlyContinue + } + + if (-not $ok) { + Write-Host "stdout:" + Get-Content $stdout -ErrorAction SilentlyContinue + Write-Host "stderr:" + Get-Content $stderr -ErrorAction SilentlyContinue + throw "FAIL: $Label did not respond on :$Port$Endpoint" + } + + Write-Host "OK: $Label responded on :$Port$Endpoint" + } + + # 2. review: exercises server startup, bundled HTML, git diff, and HTTP. + Test-PlannotatorServer "plannotator review" "19500" "/api/diff" @("review") + + # 3. annotate: exercises annotate server startup with a real file. + Test-PlannotatorServer "plannotator annotate" "19501" "/api/plan" @("annotate", "README.md") + + install-script-smoke: + needs: build + runs-on: ${{ matrix.os }} + permissions: + contents: read + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + artifact: plannotator-linux-x64 + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Download binaries + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: binaries - path: | - plannotator-* - !*.ts + path: artifacts + + - name: Verify installer writes Codex hook config + env: + ARTIFACT_NAME: ${{ matrix.artifact }} + run: | + set -euo pipefail + + tmp_home="$(mktemp -d)" + fake_bin="$(mktemp -d)" + artifact="$PWD/artifacts/$ARTIFACT_NAME" + + cat > "$fake_bin/codex" <<'SH' + #!/usr/bin/env bash + echo "codex stub" + SH + chmod +x "$fake_bin/codex" + + cat > "$fake_bin/curl" <<'SH' + #!/usr/bin/env bash + set -euo pipefail + + out="" + url="" + + while [ "$#" -gt 0 ]; do + case "$1" in + -o|--output) + out="$2" + shift 2 + ;; + -*) + shift + ;; + *) + url="$1" + shift + ;; + esac + done + + if [[ "$url" == *.sha256 ]]; then + if command -v sha256sum >/dev/null 2>&1; then + sha256sum "$ARTIFACT" + else + shasum -a 256 "$ARTIFACT" + fi + exit 0 + fi + + if [ -n "$out" ]; then + cp "$ARTIFACT" "$out" + else + cat "$ARTIFACT" + fi + SH + chmod +x "$fake_bin/curl" + + run_installer() { + HOME="$tmp_home" \ + PATH="$fake_bin:$PATH" \ + SHELL=/bin/bash \ + ARTIFACT="$artifact" \ + bash scripts/install.sh --version v9.9.9 --skip-attestation + } + + run_installer + + test -x "$tmp_home/.local/bin/plannotator" + grep -q 'codex_hooks = true' "$tmp_home/.codex/config.toml" + + HOME="$tmp_home" node <<'NODE' + const fs = require("fs"); + const path = require("path"); + const home = process.env.HOME; + const hooksPath = path.join(home, ".codex", "hooks.json"); + const hooks = JSON.parse(fs.readFileSync(hooksPath, "utf8")); + const command = hooks?.hooks?.Stop?.[0]?.hooks?.[0]?.command; + const timeout = hooks?.hooks?.Stop?.[0]?.hooks?.[0]?.timeout; + const expected = path.join(home, ".local", "bin", "plannotator"); + + if (command !== expected) { + throw new Error(`Expected Stop hook command ${expected}, got ${command}`); + } + if (timeout !== 345600) { + throw new Error(`Expected Stop hook timeout 345600, got ${timeout}`); + } + NODE + + cat > "$tmp_home/.codex/hooks.json" <<'JSON' + { + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "PLANNOTATOR_BROWSER=/usr/bin/true plannotator", + "timeout": 123 + } + ] + } + ] + } + } + JSON + + run_installer + + HOME="$tmp_home" node <<'NODE' + const fs = require("fs"); + const path = require("path"); + const hooksPath = path.join(process.env.HOME, ".codex", "hooks.json"); + const stop = JSON.parse(fs.readFileSync(hooksPath, "utf8"))?.hooks?.Stop; + const hooks = stop?.flatMap((entry) => entry?.hooks ?? []) ?? []; + + if (hooks.length !== 1) { + throw new Error(`Expected one preserved custom Stop hook, got ${hooks.length}`); + } + if (hooks[0].command !== "PLANNOTATOR_BROWSER=/usr/bin/true plannotator") { + throw new Error(`Custom Stop hook command was changed to ${hooks[0].command}`); + } + NODE attest: # Isolated attestation job — runs on tag pushes only and holds the @@ -172,7 +418,10 @@ jobs: # same binaries the build job uploaded; attest-build-provenance # publishes the signed bundle to GitHub's attestation store, so the # release job downstream doesn't need any new artifact handling. - needs: build + needs: + - build + - smoke-binaries + - install-script-smoke if: startsWith(github.ref, 'refs/tags/') runs-on: ubuntu-latest permissions: @@ -235,7 +484,10 @@ jobs: prerelease: ${{ contains(github.ref, '-') }} npm-publish: - needs: build + needs: + - build + - smoke-binaries + - install-script-smoke runs-on: ubuntu-latest permissions: contents: read diff --git a/.gitignore b/.gitignore index b5e156bef..0c5edff31 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ apps/pi-extension/review-core.ts # Claude Code session-local runtime state (lock files, scheduled-task state). # Machine-specific; never belongs in the repo. .claude/ +.playwright-cli/ .serena/ *.ntvs* *.njsproj diff --git a/README.md b/README.md index 77747a7c7..eb78400f2 100644 --- a/README.md +++ b/README.md @@ -217,12 +217,17 @@ See [apps/pi-extension/README.md](apps/pi-extension/README.md) for full usage de curl -fsSL https://plannotator.ai/install.sh | bash ``` +The installer also enables Codex Stop hooks when Codex is installed or `~/.codex` already exists. Restart Codex Desktop +after installing or changing hooks. + **Windows PowerShell:** ```powershell irm https://plannotator.ai/install.ps1 | iex ``` +Codex plan review is automatic on macOS, Linux, and WSL. Codex hooks are currently disabled on Windows in the official Codex docs, so the Windows installer does not enable them automatically; the direct `!plannotator` commands still work. + **Then in Codex — feedback flows back into the agent loop automatically:** ``` @@ -232,7 +237,7 @@ irm https://plannotator.ai/install.ps1 | iex !plannotator last # Annotate the last agent message ``` -Plan mode is not yet supported. +Plan review uses Codex's experimental `Stop` hook on macOS, Linux, and WSL. See [apps/codex/README.md](apps/codex/README.md) for details. diff --git a/apps/codex/README.md b/apps/codex/README.md index c9283a908..4ba93b6c6 100644 --- a/apps/codex/README.md +++ b/apps/codex/README.md @@ -1,6 +1,8 @@ # Plannotator for Codex -Code review and markdown annotation are supported today. Plan mode is not yet supported — it requires hooks to intercept the agent's plan submission, which Codex does not currently expose. +Code review, markdown annotation, and plan review are supported in Codex. + +Plan review uses Codex's experimental `Stop` hook. This is a post-render review flow: when a turn stops, Plannotator reads the current rollout transcript, extracts the latest plan, and opens the normal plan review UI. If you deny the plan, Plannotator returns continuation feedback so Codex revises the plan in the same turn. ## Install @@ -10,14 +12,76 @@ Code review and markdown annotation are supported today. Plan mode is not yet su curl -fsSL https://plannotator.ai/install.sh | bash ``` +The installer adds the `plannotator` binary and, when Codex is installed or `~/.codex` already exists, enables Codex +Stop hooks automatically. + **Windows PowerShell:** ```powershell irm https://plannotator.ai/install.ps1 | iex ``` +Codex hooks are currently disabled on Windows in the official Codex docs. The Windows installer does not enable them +automatically; it only prints manual guidance for future/manual experimentation. + +## Enable Codex hooks + +The installer handles this automatically on macOS, Linux, and WSL. If you are setting it up manually, Codex hooks +require a feature flag. + +Add this to `~/.codex/config.toml` or `/.codex/config.toml`: + +```toml +[features] +codex_hooks = true +``` + +Then create `~/.codex/hooks.json` or `/.codex/hooks.json`: + +```json +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "plannotator", + "timeout": 345600 + } + ] + } + ] + } +} +``` + +Notes: + +- Codex loads `hooks.json` next to active config layers, so either the global `~/.codex` or repo-local `.codex` location works. +- Prefer an absolute `plannotator` command path in `hooks.json` for Codex Desktop, because app-launched processes may not inherit your shell `PATH`. +- This currently depends on Codex hooks, which are experimental and disabled on Windows in the current official docs. +- Because this uses `Stop`, the review happens after Codex renders the plan turn, not at a dedicated `ExitPlanMode` interception point. +- Restart Codex Desktop after installing or changing hooks. + ## Usage +### Plan Review + +Once hooks are enabled, plan review opens automatically whenever a Codex turn ends with a plan. Approving keeps the turn completed. Sending feedback returns a `Stop` continuation reason so Codex revises the plan and Plannotator shows version history and diffs across revisions. + +### Local End-to-End Harness + +From the repo root, you can run a disposable local E2E flow against a real Codex session: + +```bash +./tests/manual/local/test-codex-plan-review-e2e.sh --keep +``` + +This uses a temporary `HOME`, sample git repo, repo-local Codex CLI, and repo-local `plannotator` wrapper so it +doesn't modify your installed Codex or Plannotator state. If you want to automate the opened review UI with Playwright, +set `PLANNOTATOR_BROWSER=/usr/bin/true` before running the script. + ### Code Review Run `!plannotator review` to open the code review UI for your current changes: diff --git a/apps/hook/server/codex-session.test.ts b/apps/hook/server/codex-session.test.ts index 03121acec..c9d749953 100644 --- a/apps/hook/server/codex-session.test.ts +++ b/apps/hook/server/codex-session.test.ts @@ -6,11 +6,11 @@ * Uses synthetic JSONL fixtures matching the real Codex rollout format. */ -import { describe, expect, test, beforeEach, afterEach } from "bun:test"; -import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { describe, expect, test, afterEach } from "bun:test"; +import { mkdtempSync, writeFileSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { getLastCodexMessage } from "./codex-session"; +import { getLastCodexMessage, getLatestCodexPlan } from "./codex-session"; // --- Fixture Helpers --- @@ -86,6 +86,57 @@ function eventMsg(type: string): string { }); } +function turnStarted(turnId: string): string { + return JSON.stringify({ + timestamp: new Date().toISOString(), + type: "event_msg", + payload: { + type: "task_started", + turn_id: turnId, + }, + }); +} + +function turnCompleted(turnId: string): string { + return JSON.stringify({ + timestamp: new Date().toISOString(), + type: "event_msg", + payload: { + type: "task_complete", + turn_id: turnId, + }, + }); +} + +function completedPlanItem(text: string, turnId: string): string { + return JSON.stringify({ + timestamp: new Date().toISOString(), + type: "event_msg", + payload: { + type: "item_completed", + turn_id: turnId, + item: { + type: "Plan", + id: `plan_${crypto.randomUUID().slice(0, 12)}`, + text, + }, + }, + }); +} + +function hookPrompt(text: string): string { + return rolloutLine("response_item", { + type: "message", + role: "user", + content: [ + { + type: "input_text", + text: `${text}`, + }, + ], + }); +} + function buildRollout(...lines: string[]): string { return lines.join("\n"); } @@ -177,6 +228,23 @@ describe("getLastCodexMessage", () => { expect(result!.text).toBe("First part.\nSecond part."); }); + test("ignores non-output assistant text blocks", () => { + const path = writeTempRollout( + buildRollout( + sessionMeta(), + assistantMessage("Renderable response"), + rolloutLine("response_item", { + type: "message", + role: "assistant", + content: [{ type: "refusal", text: "Hidden refusal text" }], + }) + ) + ); + const result = getLastCodexMessage(path); + expect(result).not.toBeNull(); + expect(result!.text).toBe("Renderable response"); + }); + test("skips event_msg and turn_context entries", () => { const path = writeTempRollout( buildRollout( @@ -243,3 +311,147 @@ describe("getLastCodexMessage", () => { expect(result!.text).toBe("Valid message"); }); }); + +describe("getLatestCodexPlan", () => { + test("prefers the latest persisted plan item for the current turn", () => { + const turnId = "turn-plan-item"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + assistantMessage("\nFallback text\n"), + completedPlanItem("Authoritative plan item", turnId) + ) + ); + + const result = getLatestCodexPlan(path, { turnId }); + expect(result).toEqual({ + text: "Authoritative plan item", + source: "plan-item", + }); + }); + + test("falls back to raw proposed_plan blocks for plan-only assistant replies", () => { + const turnId = "turn-plan-only"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + assistantMessage("\n- First\n- Second\n") + ) + ); + + const result = getLatestCodexPlan(path, { turnId }); + expect(result).toEqual({ + text: "- First\n- Second", + source: "assistant-message", + }); + }); + + test("extracts plan blocks surrounded by assistant prose", () => { + const turnId = "turn-prose"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + assistantMessage( + [ + "Here is the plan I recommend.", + "", + "", + "1. Inspect hook payloads", + "2. Launch Plannotator", + "", + "", + "I can revise it if needed.", + ].join("\n") + ) + ) + ); + + const result = getLatestCodexPlan(path, { turnId }); + expect(result).toEqual({ + text: "1. Inspect hook payloads\n2. Launch Plannotator", + source: "assistant-message", + }); + }); + + test("ignores plans from older turns when the current turn has none", () => { + const oldTurnId = "turn-old"; + const currentTurnId = "turn-current"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(oldTurnId), + completedPlanItem("Old plan", oldTurnId), + turnCompleted(oldTurnId), + turnStarted(currentTurnId), + assistantMessage("Just answering a regular question.") + ) + ); + + const result = getLatestCodexPlan(path, { turnId: currentTurnId }); + expect(result).toBeNull(); + }); + + test("returns null when Stop re-entry has no revised plan after the hook prompt", () => { + const turnId = "turn-stop-no-revision"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + completedPlanItem("Original plan", turnId), + hookPrompt("Please revise the plan."), + assistantMessage("I will think through the feedback.") + ) + ); + + const result = getLatestCodexPlan(path, { + turnId, + stopHookActive: true, + }); + expect(result).toBeNull(); + }); + + test("returns null when Stop re-entry repeats the same plan", () => { + const turnId = "turn-stop-duplicate"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + completedPlanItem("Original plan", turnId), + hookPrompt("Please revise the plan."), + completedPlanItem("Original plan", turnId) + ) + ); + + const result = getLatestCodexPlan(path, { + turnId, + stopHookActive: true, + }); + expect(result).toBeNull(); + }); + + test("returns the revised plan after a denied Stop review", () => { + const turnId = "turn-stop-revised"; + const path = writeTempRollout( + buildRollout( + sessionMeta(), + turnStarted(turnId), + completedPlanItem("Original plan", turnId), + hookPrompt("Please revise the plan."), + assistantMessage("\nRevised fallback plan\n"), + completedPlanItem("Revised authoritative plan", turnId) + ) + ); + + const result = getLatestCodexPlan(path, { + turnId, + stopHookActive: true, + }); + expect(result).toEqual({ + text: "Revised authoritative plan", + source: "plan-item", + }); + }); +}); diff --git a/apps/hook/server/codex-session.ts b/apps/hook/server/codex-session.ts index bcb3c2ea4..a3e625d31 100644 --- a/apps/hook/server/codex-session.ts +++ b/apps/hook/server/codex-session.ts @@ -18,6 +18,8 @@ import { homedir } from "node:os"; // --- Types --- +type CodexPlanSource = "plan-item" | "assistant-message"; + interface RolloutEntry { timestamp?: string; type: string; @@ -25,10 +27,35 @@ interface RolloutEntry { type?: string; role?: string; content?: { type: string; text?: string }[]; + turn_id?: string; + item?: { + type?: string; + text?: string; + [key: string]: unknown; + }; [key: string]: unknown; }; } +interface CodexPlanCandidate { + index: number; + text: string; + source: CodexPlanSource; +} + +export interface CodexPlanResult { + text: string; + source: CodexPlanSource; +} + +export interface GetLatestCodexPlanOptions { + turnId?: string; + stopHookActive?: boolean; +} + +const TURN_START_TYPES = new Set(["task_started", "turn_started"]); +const PROPOSED_PLAN_RE = /([\s\S]*?)<\/proposed_plan>/gi; + // --- Rollout File Discovery --- /** @@ -84,6 +111,179 @@ function isDir(path: string): boolean { // --- Message Extraction --- +function parseRolloutEntries(rolloutPath: string): RolloutEntry[] { + const content = readFileSync(rolloutPath, "utf-8"); + if (!content.trim()) return []; + + return content + .split(/\r?\n/) + .filter(Boolean) + .flatMap((line) => { + try { + return [JSON.parse(line) as RolloutEntry]; + } catch { + return []; + } + }); +} + +function getMessageText( + entry: RolloutEntry, + allowedContentTypes: readonly string[] +): string | null { + if (entry.type !== "response_item") return null; + if (entry.payload?.type !== "message") return null; + + const contentBlocks = entry.payload?.content; + if (!Array.isArray(contentBlocks)) return null; + + const textParts = contentBlocks + .filter((block) => allowedContentTypes.includes(block.type)) + .map((block) => (typeof block.text === "string" ? block.text.trim() : "")) + .filter(Boolean); + + if (textParts.length === 0) return null; + + return textParts.join("\n"); +} + +function extractLastProposedPlan(text: string): string | null { + const matches = Array.from(text.matchAll(PROPOSED_PLAN_RE)); + const latest = matches.at(-1)?.[1]?.trim(); + return latest || null; +} + +function normalizePlan(text: string): string { + return text.replace(/\r\n/g, "\n").trim(); +} + +function findLastIndex( + entries: RolloutEntry[], + predicate: (entry: RolloutEntry) => boolean +): number { + for (let i = entries.length - 1; i >= 0; i--) { + if (predicate(entries[i])) return i; + } + return -1; +} + +function findTurnStartIndex(entries: RolloutEntry[], turnId?: string): number { + const matchingTurnStart = findLastIndex( + entries, + (entry) => + entry.type === "event_msg" && + TURN_START_TYPES.has(entry.payload?.type || "") && + (!turnId || entry.payload?.turn_id === turnId) + ); + if (matchingTurnStart !== -1) return matchingTurnStart; + + const matchingTurnContext = findLastIndex( + entries, + (entry) => + entry.type === "turn_context" && + (!turnId || entry.payload?.turn_id === turnId) + ); + if (matchingTurnContext !== -1) return matchingTurnContext; + + const lastTurnStart = findLastIndex( + entries, + (entry) => + entry.type === "event_msg" && + TURN_START_TYPES.has(entry.payload?.type || "") + ); + if (lastTurnStart !== -1) return lastTurnStart; + + const lastTurnContext = findLastIndex( + entries, + (entry) => entry.type === "turn_context" + ); + return lastTurnContext === -1 ? 0 : lastTurnContext; +} + +function isHookPromptMessage(entry: RolloutEntry): boolean { + if (entry.type !== "response_item") return false; + if (entry.payload?.type !== "message") return false; + if (entry.payload?.role !== "user") return false; + + const messageText = getMessageText(entry, ["input_text"]); + return !!messageText?.includes("= Math.max(startIndex, 0); i--) { + if (isHookPromptMessage(entries[i])) return i; + } + return -1; +} + +function getPlanItemText( + entry: RolloutEntry, + turnId?: string +): string | null { + if (entry.type !== "event_msg") return null; + if (entry.payload?.type !== "item_completed") return null; + if (turnId && entry.payload?.turn_id !== turnId) return null; + + const itemType = entry.payload?.item?.type; + if (itemType !== "Plan" && itemType !== "plan") return null; + + const text = entry.payload?.item?.text; + return typeof text === "string" && text.trim() ? text.trim() : null; +} + +function getAssistantProposedPlanText(entry: RolloutEntry): string | null { + if (entry.type !== "response_item") return null; + if (entry.payload?.type !== "message") return null; + if (entry.payload?.role !== "assistant") return null; + + const messageText = getMessageText(entry, ["output_text"]); + if (!messageText) return null; + + return extractLastProposedPlan(messageText); +} + +function collectPlanCandidates( + entries: RolloutEntry[], + startIndex: number, + turnId?: string +): CodexPlanCandidate[] { + const candidates: CodexPlanCandidate[] = []; + + for (let i = Math.max(startIndex, 0); i < entries.length; i++) { + const entry = entries[i]; + + const planItemText = getPlanItemText(entry, turnId); + if (planItemText) { + candidates.push({ index: i, text: planItemText, source: "plan-item" }); + } + + const assistantPlanText = getAssistantProposedPlanText(entry); + if (assistantPlanText) { + candidates.push({ + index: i, + text: assistantPlanText, + source: "assistant-message", + }); + } + } + + return candidates; +} + +function pickLatestPreferredPlan( + candidates: CodexPlanCandidate[] +): CodexPlanCandidate | null { + const latestPlanItem = [...candidates] + .reverse() + .find((candidate) => candidate.source === "plan-item"); + if (latestPlanItem) return latestPlanItem; + + return candidates.at(-1) || null; +} + /** * Extract the last assistant message from a Codex rollout file. * @@ -97,33 +297,87 @@ function isDir(path: string): boolean { export function getLastCodexMessage( rolloutPath: string ): { text: string } | null { - const content = readFileSync(rolloutPath, "utf-8"); - const lines = content.trim().split("\n"); + const entries = parseRolloutEntries(rolloutPath); // Walk backward - for (let i = lines.length - 1; i >= 0; i--) { - let entry: RolloutEntry; - try { - entry = JSON.parse(lines[i]); - } catch { - continue; - } - + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i]; if (entry.type !== "response_item") continue; if (entry.payload?.type !== "message") continue; if (entry.payload?.role !== "assistant") continue; - const contentBlocks = entry.payload?.content; - if (!Array.isArray(contentBlocks)) continue; + const messageText = getMessageText(entry, ["output_text"]); + if (messageText) return { text: messageText }; + } - const textParts = contentBlocks - .filter((b) => b.type === "output_text" && b.text?.trim()) - .map((b) => b.text!); + return null; +} + +/** + * Extract the latest Codex plan from a rollout file. + * + * Primary source: persisted completed TurnItem::Plan events. + * Fallback source: raw assistant response_item messages that still contain a + * block in the rollout transcript. + * + * When stopHookActive is true, this only returns a changed post-feedback plan: + * - no plan after the last hook prompt => null + * - identical plan after the last hook prompt => null + */ +export function getLatestCodexPlan( + rolloutPath: string, + options: GetLatestCodexPlanOptions = {} +): CodexPlanResult | null { + const entries = parseRolloutEntries(rolloutPath); + if (entries.length === 0) return null; - if (textParts.length === 0) continue; + const turnStartIndex = findTurnStartIndex(entries, options.turnId); + const candidates = collectPlanCandidates( + entries, + turnStartIndex, + options.turnId + ); + if (candidates.length === 0) return null; - return { text: textParts.join("\n") }; + if (!options.stopHookActive) { + const latestPlan = pickLatestPreferredPlan(candidates); + return latestPlan + ? { text: latestPlan.text, source: latestPlan.source } + : null; } - return null; + const lastHookPromptIndex = findLastHookPromptIndex(entries, turnStartIndex); + + if (lastHookPromptIndex === -1) { + const latestPlan = pickLatestPreferredPlan(candidates); + return latestPlan + ? { text: latestPlan.text, source: latestPlan.source } + : null; + } + + const plansAfterHookPrompt = candidates.filter( + (candidate) => candidate.index > lastHookPromptIndex + ); + if (plansAfterHookPrompt.length === 0) return null; + + const latestAfterHookPrompt = pickLatestPreferredPlan(plansAfterHookPrompt); + if (!latestAfterHookPrompt) return null; + + const plansBeforeHookPrompt = candidates.filter( + (candidate) => candidate.index < lastHookPromptIndex + ); + const latestBeforeHookPrompt = pickLatestPreferredPlan(plansBeforeHookPrompt); + + if ( + latestBeforeHookPrompt && + normalizePlan(latestBeforeHookPrompt.text) === + normalizePlan(latestAfterHookPrompt.text) + ) { + return null; + } + + return { + text: latestAfterHookPrompt.text, + source: latestAfterHookPrompt.source, + }; } diff --git a/apps/hook/server/index.ts b/apps/hook/server/index.ts index 8d1f30273..c59e7f86d 100644 --- a/apps/hook/server/index.ts +++ b/apps/hook/server/index.ts @@ -1,10 +1,10 @@ /** - * Plannotator CLI for Claude Code & Copilot CLI + * Plannotator CLI for Claude Code, Codex, Gemini CLI, and Copilot CLI * * Supports eight modes: * * 1. Plan Review (default, no args): - * - Spawned by ExitPlanMode hook (Claude Code) + * - Spawned by Claude/Gemini/Codex hook entrypoints * - Reads hook event from stdin, extracts plan content * - Serves UI, returns approve/deny decision to stdout * @@ -90,7 +90,7 @@ import { resolveSessionLogByCwdScan, type RenderedMessage, } from "./session-log"; -import { findCodexRolloutByThreadId, getLastCodexMessage } from "./codex-session"; +import { findCodexRolloutByThreadId, getLastCodexMessage, getLatestCodexPlan } from "./codex-session"; import { findCopilotPlanContent, findCopilotSessionForCwd, getLastCopilotMessage } from "./copilot-session"; import { formatInteractiveNoArgClarification, @@ -1041,36 +1041,105 @@ if (args[0] === "sessions") { // Read hook event from stdin const eventJson = await Bun.stdin.text(); + if (!eventJson.trim()) { + process.exit(0); + } - let planContent = ""; - let permissionMode = "default"; - let isGemini = false; - let planFilename = ""; let event: Record; try { event = JSON.parse(eventJson); + } catch (e: any) { + console.error(`Failed to parse hook event from stdin: ${e?.message || e}`); + process.exit(1); + } + + if (event.hook_event_name === "Stop") { + const rolloutPath = + (typeof event.transcript_path === "string" && event.transcript_path) || + (process.env.CODEX_THREAD_ID + ? findCodexRolloutByThreadId(process.env.CODEX_THREAD_ID) + : null); + + if (!rolloutPath || !existsSync(rolloutPath)) { + process.exit(0); + } + + const latestPlan = getLatestCodexPlan(rolloutPath, { + turnId: typeof event.turn_id === "string" ? event.turn_id : undefined, + stopHookActive: !!event.stop_hook_active, + }); + + if (!latestPlan?.text) { + process.exit(0); + } + + const planProject = (await detectProjectName()) ?? "_unknown"; + const server = await startPlannotatorServer({ + plan: latestPlan.text, + origin: "codex", + sharingEnabled, + shareBaseUrl, + pasteApiUrl, + htmlContent: planHtmlContent, + onReady: async (url, isRemote, port) => { + handleServerReady(url, isRemote, port); + + if (isRemote && sharingEnabled) { + await writeRemoteShareLink(latestPlan.text, shareBaseUrl, "review the plan", "plan only").catch(() => {}); + } + }, + }); + + registerSession({ + pid: process.pid, + port: server.port, + url: server.url, + mode: "plan", + project: planProject, + startedAt: new Date().toISOString(), + label: `plan-${planProject}`, + }); - // Detect harness: Gemini sends plan_filename (file on disk), Claude Code sends plan (inline) - planFilename = event.tool_input?.plan_filename || event.tool_input?.plan_path || ""; - isGemini = !!planFilename; - - if (isGemini) { - // Reconstruct full plan path from transcript_path and session_id: - // transcript_path = /chats/session-...json - // plan lives at = //plans/ - const projectTempDir = path.dirname(path.dirname(event.transcript_path)); - const planFilePath = path.join(projectTempDir, event.session_id, "plans", planFilename); - planContent = await Bun.file(planFilePath).text(); + const result = await server.waitForDecision(); + await Bun.sleep(1500); + server.stop(); + + if (result.approved) { + console.log("{}"); } else { - planContent = event.tool_input?.plan || ""; + console.log( + JSON.stringify({ + decision: "block", + reason: planDenyFeedback(result.feedback || "", "Stop"), + }) + ); } - permissionMode = event.permission_mode || "default"; - } catch (e: any) { - console.error(`Failed to parse hook event from stdin: ${e?.message || e}`); - process.exit(1); + process.exit(0); } + let planContent = ""; + let permissionMode = "default"; + let isGemini = false; + let planFilename = ""; + + // Detect harness: Gemini sends plan_filename (file on disk), Claude Code sends plan (inline) + planFilename = event.tool_input?.plan_filename || event.tool_input?.plan_path || ""; + isGemini = !!planFilename; + + if (isGemini) { + // Reconstruct full plan path from transcript_path and session_id: + // transcript_path = /chats/session-...json + // plan lives at = //plans/ + const projectTempDir = path.dirname(path.dirname(event.transcript_path)); + const planFilePath = path.join(projectTempDir, event.session_id, "plans", planFilename); + planContent = await Bun.file(planFilePath).text(); + } else { + planContent = event.tool_input?.plan || ""; + } + + permissionMode = event.permission_mode || "default"; + if (!planContent) { console.error("No plan content in hook event"); process.exit(1); diff --git a/apps/marketing/src/components/landing/HeroSection.astro b/apps/marketing/src/components/landing/HeroSection.astro index 1c5ddc2a5..bb9c306d5 100644 --- a/apps/marketing/src/components/landing/HeroSection.astro +++ b/apps/marketing/src/components/landing/HeroSection.astro @@ -163,9 +163,6 @@ import AnnoReplace from './AnnoReplace.astro'; var commandEl = document.getElementById('install-command'); var detailEl = document.getElementById('install-detail'); var stepsEl = document.getElementById('install-steps'); - var step1El = document.getElementById('install-step-1'); - var step2El = document.getElementById('install-step-2'); - var step3El = document.getElementById('install-step-3'); var videoLink = document.getElementById('watch-demo-link'); var copyBtn = document.getElementById('install-copy-btn'); @@ -202,7 +199,10 @@ import AnnoReplace from './AnnoReplace.astro'; }, codex: { steps: [ - '!plannotator review', + 'Plan mode hook captures plans for annotation', + 'Restart Codex Desktop or CLI to load hooks', + 'Use Codex plan mode normally', + '!plannotator review ', '!plannotator annotate ', '!plannotator last' ], @@ -280,14 +280,15 @@ import AnnoReplace from './AnnoReplace.astro'; installBlock.style.display = ''; // Clear all steps completely, then rebuild - var allSteps = [step1El, step2El, step3El]; - allSteps.forEach(function(el) { el.innerHTML = ''; el.style.display = 'none'; }); + stepsEl.innerHTML = ''; if (config.steps && config.steps.length) { stepsEl.classList.remove('hidden'); - for (var i = 0; i < config.steps.length && i < allSteps.length; i++) { - allSteps[i].style.display = ''; - makeStep(allSteps[i], config.steps[i]); + for (var i = 0; i < config.steps.length; i++) { + var stepEl = document.createElement('div'); + stepEl.className = 'hero-detail-step'; + stepsEl.appendChild(stepEl); + makeStep(stepEl, config.steps[i]); } } else { stepsEl.classList.add('hidden'); diff --git a/apps/marketing/src/content/blog/plan-diff-see-what-changed.md b/apps/marketing/src/content/blog/plan-diff-see-what-changed.md index 0d321d399..525964121 100644 --- a/apps/marketing/src/content/blog/plan-diff-see-what-changed.md +++ b/apps/marketing/src/content/blog/plan-diff-see-what-changed.md @@ -1,6 +1,6 @@ --- title: "Plan Diff: See What Changed Between Iterations" -description: "When your coding agent revises a plan, Plannotator now shows exactly what changed. Visual diffs, raw markdown diffs, and version history — across Claude Code, OpenCode, and Pi." +description: "When your coding agent revises a plan, Plannotator now shows exactly what changed. Visual diffs, raw markdown diffs, and version history — across Claude Code, Codex, OpenCode, and Pi." date: 2026-02-22 author: "backnotprop" tags: ["plan-diff", "plan-mode", "version-history"] @@ -40,7 +40,7 @@ Right now, diffs are scoped to versions of the same plan. A future release will ## Works everywhere -Plan Diff is available across all three supported agents: Claude Code, OpenCode, and Pi. The diff UI is the same regardless of which agent you're using. If your agent submits a revised plan, you'll see the badge. +Plan Diff is available across the supported plan-review agents: Claude Code, Codex, OpenCode, and Pi. The diff UI is the same regardless of which agent you're using. If your agent submits a revised plan, you'll see the badge. ## This is v1 @@ -58,4 +58,4 @@ Start a planning session, deny a plan with some annotations, and let the agent r ## Plannotator: plan review for coding agents -Plannotator is a free, open-source plan review UI for AI coding agents. Annotate plans visually, review code diffs, share with your team, and now see exactly what changed between iterations with Plan Diff. Coding agents like Claude Code don't show you how a plan changed after revision — Plannotator does. Works with Claude Code, OpenCode, and Pi. Install it in under a minute and start reviewing plans in your browser instead of the terminal. +Plannotator is a free, open-source plan review UI for AI coding agents. Annotate plans visually, review code diffs, share with your team, and now see exactly what changed between iterations with Plan Diff. Coding agents like Claude Code and Codex don't show you how a plan changed after revision — Plannotator does. Works with Claude Code, Codex, OpenCode, and Pi. Install it in under a minute and start reviewing plans in your browser instead of the terminal. diff --git a/apps/marketing/src/content/docs/commands/plan-review.md b/apps/marketing/src/content/docs/commands/plan-review.md index 3fb1be934..b2af24d04 100644 --- a/apps/marketing/src/content/docs/commands/plan-review.md +++ b/apps/marketing/src/content/docs/commands/plan-review.md @@ -1,12 +1,12 @@ --- title: "Plan Review" -description: "The core plan review flow — how Plannotator intercepts ExitPlanMode and presents the annotation UI." +description: "The core plan review flow across Claude Code, Codex, and other supported agent hosts." sidebar: order: 10 section: "Commands" --- -Plan review is the core Plannotator workflow. It's not a slash command — it fires automatically when your agent calls `ExitPlanMode`. +Plan review is the core Plannotator workflow. It's not a slash command. Plannotator opens automatically when the host agent reaches its plan handoff point. ## How it works @@ -29,6 +29,34 @@ Agent resubmits → Plan Diff shows what changed The hook configuration lives at `apps/hook/hooks/hooks.json` and matches the `ExitPlanMode` tool name. +## Codex flow + +Codex does not expose a dedicated `ExitPlanMode` interception point. Instead, Plannotator integrates through Codex's experimental `Stop` hook. + +``` +Codex turn stops + ↓ +Stop hook fires + ↓ +Plannotator reads transcript_path rollout + ↓ +Latest completed plan item is extracted +fallback: raw block from assistant response + ↓ +Browser opens with the normal review UI + ↓ +Approve → turn stays completed +Deny → Stop hook returns continuation feedback + ↓ +Codex revises the plan in the same turn + ↓ +Plannotator reopens only if the revised plan actually changed +``` + +This means Codex plan review is post-render rather than pre-submit, but you still get the same annotations, plan history, diff view, and revision loop. + +The macOS, Linux, and WSL installer enables this hook automatically when Codex is installed or `~/.codex` already exists. Restart Codex Desktop or CLI after installing so the hook configuration is loaded. + ## Annotation types When you select text in the plan, the annotation toolbar appears with these options: @@ -58,7 +86,7 @@ Images are stored as temporary files and referenced by name in the feedback sent **Approve** (no annotations): - Click "Approve" or press `Cmd/Ctrl+Enter` - Optionally saves plan to disk or Obsidian/Bear -- Agent proceeds with implementation +- Agent proceeds through its normal post-plan workflow **Approve with annotations** (Claude Code): - Claude Code doesn't yet support feedback on approval diff --git a/apps/marketing/src/content/docs/getting-started/installation.md b/apps/marketing/src/content/docs/getting-started/installation.md index eeff0e0c4..f04919912 100644 --- a/apps/marketing/src/content/docs/getting-started/installation.md +++ b/apps/marketing/src/content/docs/getting-started/installation.md @@ -1,6 +1,6 @@ --- title: "Installation" -description: "How to install Plannotator for Claude Code, OpenCode, Pi, and other agent hosts." +description: "How to install Plannotator for Claude Code, Codex, OpenCode, Pi, and other agent hosts." sidebar: order: 1 section: "Getting Started" @@ -122,13 +122,58 @@ Coming soon. ## Codex -Plan mode is not yet supported. +Codex plan review is supported through the experimental `Stop` hook. -Install the binary, then use it directly: +This is a post-render review flow: when a Codex turn stops, Plannotator reads the current transcript, extracts the latest plan, and opens the same plan review UI used by the other integrations. If you deny the plan, Plannotator returns a `Stop` continuation reason so Codex can revise the plan in the same turn. +On macOS, Linux, and WSL, the installer enables Codex hooks automatically when Codex is installed or `~/.codex` already exists: + +```bash +curl -fsSL https://plannotator.ai/install.sh | bash +``` + +Restart Codex Desktop after installing or changing hooks. + +For manual setup, enable hooks in `~/.codex/config.toml` or `/.codex/config.toml`: + +```toml +[features] +codex_hooks = true +``` + +Then add `hooks.json` next to that config layer: + +```json +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "plannotator", + "timeout": 345600 + } + ] + } + ] + } +} ``` -!plannotator review # Code review for current changes -!plannotator annotate file.md # Annotate a markdown file + +Notes: + +- Codex discovers hooks from `~/.codex/hooks.json` and `/.codex/hooks.json`, and loads all matching files. +- Prefer an absolute `plannotator` command path in `hooks.json` for Codex Desktop, because app-launched processes may not inherit your shell `PATH`. +- Codex hooks are currently experimental. +- The current official Codex hooks docs say hooks are disabled on Windows, so this flow is currently macOS/Linux/WSL only. + +You can still use the direct commands at any time: + +```bash +!plannotator review +!plannotator annotate file.md +!plannotator last ``` ## Pi diff --git a/apps/marketing/src/content/docs/getting-started/quickstart.md b/apps/marketing/src/content/docs/getting-started/quickstart.md index ce46f9d83..48c05a380 100644 --- a/apps/marketing/src/content/docs/getting-started/quickstart.md +++ b/apps/marketing/src/content/docs/getting-started/quickstart.md @@ -1,6 +1,6 @@ --- title: "Quickstart" -description: "Your first plan review with Plannotator — from ExitPlanMode to approval." +description: "Your first plan review with Plannotator — from agent plan to approval." sidebar: order: 2 section: "Getting Started" @@ -8,20 +8,22 @@ section: "Getting Started" Once Plannotator is installed, it works automatically. Here's what a plan review looks like. -## 1. Claude generates a plan +## 1. Your agent generates a plan -Ask Claude to do something that requires planning. When Claude calls `ExitPlanMode`, the Plannotator hook intercepts the request and opens the review UI in your browser. +Ask your agent to do something that requires planning. When the agent reaches its plan handoff point, Plannotator opens the review UI in your browser. ``` -Claude calls ExitPlanMode +Agent proposes a plan ↓ -PermissionRequest hook fires +Plannotator hook or plugin fires ↓ Plannotator reads the plan from stdin ↓ Browser opens with the plan review UI ``` +Claude Code uses an `ExitPlanMode` hook. Codex uses a `Stop` hook after a plan turn completes. Both flows open Plannotator automatically after installation. + ## 2. Review the plan The plan renders as formatted markdown with syntax-highlighted code blocks. Read through it at your own pace. @@ -47,14 +49,14 @@ Switch between annotation modes using the mode switcher at the top of the docume When you're done reviewing: -- **Approve** (`Cmd/Ctrl+Enter` with no annotations) — Claude proceeds with implementation -- **Send Feedback** (`Cmd/Ctrl+Enter` with annotations) — Your annotations are formatted and sent back to Claude, who revises the plan +- **Approve** (`Cmd/Ctrl+Enter` with no annotations) — The agent proceeds through its normal post-plan flow +- **Send Feedback** (`Cmd/Ctrl+Enter` with annotations) — Your annotations are formatted and sent back to the agent, which revises the plan -Your annotations are exported as structured feedback that Claude can act on directly. +Your annotations are exported as structured feedback that the agent can act on directly. -## 5. Claude continues +## 5. The agent continues -After approval, Claude implements the plan. After feedback, Claude revises the plan and presents it again for review. When the revised plan arrives, a diff badge shows what changed — click it to toggle between normal and diff view. The cycle continues until you approve. +After approval, the agent continues through its native implementation workflow. In interactive Codex, that means Codex can show its normal post-plan implementation prompt. After feedback, the agent revises the plan and presents it again for review. When the revised plan arrives, a diff badge shows what changed — click it to toggle between normal and diff view. The cycle continues until you approve. ## Other commands diff --git a/apps/marketing/src/content/docs/guides/troubleshooting.md b/apps/marketing/src/content/docs/guides/troubleshooting.md index 1c54494c1..6c319d9e2 100644 --- a/apps/marketing/src/content/docs/guides/troubleshooting.md +++ b/apps/marketing/src/content/docs/guides/troubleshooting.md @@ -64,6 +64,20 @@ If `ExitPlanMode` doesn't trigger Plannotator: 3. Verify `plannotator` is on your PATH: `which plannotator` 4. Check that plan mode is enabled in your Claude Code session +## Codex plan review doesn't open + +Codex plan review uses the experimental `Stop` hook, which the macOS, Linux, and WSL installer configures automatically when Codex is installed or `~/.codex` already exists. + +If a Codex plan turn completes without opening Plannotator: + +1. Rerun the installer: `curl -fsSL https://plannotator.ai/install.sh | bash` +2. Restart Codex Desktop or CLI so hooks are reloaded +3. Check `~/.codex/config.toml` contains `codex_hooks = true` under `[features]` +4. Check `~/.codex/hooks.json` has a `Stop` hook whose command points to `plannotator` +5. Run `plannotator sessions` in case the browser failed to open but the session is running + +Codex hooks are currently disabled on Windows in the official Codex docs, so the Windows installer prints manual guidance instead of changing Codex config automatically. + ## OpenCode build agent cannot call `submit_plan` This is expected with the default OpenCode workflow. Plannotator now defaults to `plan-agent`, which keeps `submit_plan` available to OpenCode's `plan` agent and hides or denies it for `build` and other non-planning primary agents. diff --git a/apps/marketing/src/layouts/Base.astro b/apps/marketing/src/layouts/Base.astro index e5b693f4d..04a7f086f 100644 --- a/apps/marketing/src/layouts/Base.astro +++ b/apps/marketing/src/layouts/Base.astro @@ -8,7 +8,7 @@ interface Props { const { title, - description = 'Interactive Plan Review: Mark up and refine your plans using a UI, easily share for team collaboration, automatically integrates with Claude Code plan mode.', + description = 'Interactive Plan Review: Mark up and refine coding-agent plans in a browser UI, share with your team, and integrate with Claude Code, Codex, OpenCode, and more.', ogImage = 'https://plannotator.ai/og-image.webp', canonicalUrl, } = Astro.props; diff --git a/scripts/install.cmd b/scripts/install.cmd index a4f347cb5..d2a90753a 100644 --- a/scripts/install.cmd +++ b/scripts/install.cmd @@ -382,6 +382,26 @@ echo } echo Updated plugin hooks at !PLUGIN_HOOKS! ) +REM Codex hooks on Windows are still experimental upstream. Do not mutate +REM %%USERPROFILE%%\.codex automatically from the cmd installer until that path +REM is verified end-to-end. +where codex >nul 2>&1 +if !ERRORLEVEL! equ 0 ( + echo. + echo Codex detected. + echo Codex plan review hooks are experimental on Windows. To try them manually: + echo. + echo 1. Add this to %%USERPROFILE%%\.codex\config.toml: + echo. + echo [features] + echo codex_hooks = true + echo. + echo 2. Add a Stop hook in %%USERPROFILE%%\.codex\hooks.json that runs: + echo. + echo !INSTALL_PATH! + echo. +) + REM Clear any cached OpenCode plugin to force fresh download on next run if exist "%USERPROFILE%\.cache\opencode\node_modules\@plannotator" rmdir /s /q "%USERPROFILE%\.cache\opencode\node_modules\@plannotator" >nul 2>&1 if exist "%USERPROFILE%\.cache\opencode\packages\@plannotator" rmdir /s /q "%USERPROFILE%\.cache\opencode\packages\@plannotator" >nul 2>&1 diff --git a/scripts/install.ps1 b/scripts/install.ps1 index d521a376b..9e1bc95be 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -266,6 +266,26 @@ if (Test-Path $pluginHooks) { Write-Host "Updated plugin hooks at $pluginHooks" } +# Codex hooks on Windows are still experimental upstream. Do not mutate +# $env:USERPROFILE\.codex automatically from the Windows installer until that +# path is verified end-to-end. +$codexDir = "$env:USERPROFILE\.codex" +if ((Get-Command codex -ErrorAction SilentlyContinue) -or (Test-Path $codexDir)) { + $codexExePath = "$installDir\plannotator.exe" + Write-Host "" + Write-Host "Codex detected." + Write-Host "Codex plan review hooks are experimental on Windows. To try them manually:" + Write-Host "" + Write-Host " 1. Add this to $env:USERPROFILE\.codex\config.toml:" + Write-Host "" + Write-Host " [features]" + Write-Host " codex_hooks = true" + Write-Host "" + Write-Host " 2. Add a Stop hook in $env:USERPROFILE\.codex\hooks.json that runs:" + Write-Host "" + Write-Host " $codexExePath" +} + # Clear OpenCode plugin cache Remove-Item -Recurse -Force "$env:USERPROFILE\.cache\opencode\node_modules\@plannotator" -ErrorAction SilentlyContinue Remove-Item -Recurse -Force "$env:USERPROFILE\.cache\opencode\packages\@plannotator" -ErrorAction SilentlyContinue diff --git a/scripts/install.sh b/scripts/install.sh index 69fa110ba..538b9b8c6 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -313,6 +313,187 @@ if ! echo "$PATH" | tr ':' '\n' | grep -qx "$INSTALL_DIR"; then echo " source ${shell_config}" fi +# --- Codex CLI / Desktop app support (only if Codex is installed or configured) --- +if command -v codex >/dev/null 2>&1 || [ -d "$HOME/.codex" ]; then + CODEX_DIR="$HOME/.codex" + CODEX_CONFIG="$CODEX_DIR/config.toml" + CODEX_HOOKS="$CODEX_DIR/hooks.json" + PLANNOTATOR_BIN="${INSTALL_DIR}/plannotator" + codex_hook_configured=0 + + mkdir -p "$CODEX_DIR" + + enable_codex_hooks_config() { + if [ ! -f "$CODEX_CONFIG" ]; then + cat > "$CODEX_CONFIG" << 'CODEX_CONFIG_EOF' +[features] +codex_hooks = true +CODEX_CONFIG_EOF + echo "Created Codex config at ${CODEX_CONFIG}" + return 0 + fi + + if grep -Eq '^[[:space:]]*features[[:space:]]*=' "$CODEX_CONFIG"; then + echo "" + echo "Codex config uses inline features in ${CODEX_CONFIG}; leaving it unchanged." + echo "Add this manually to enable Plannotator plan review:" + echo "" + echo " [features]" + echo " codex_hooks = true" + return 1 + fi + + tmp_config="$(mktemp)" + if awk ' + function is_table(line) { + return line ~ /^[[:space:]]*\[[^]]+\][[:space:]]*$/ + } + BEGIN { + in_features = 0 + saw_features = 0 + saw_hook = 0 + } + { + if (is_table($0)) { + if (in_features && !saw_hook) { + print "codex_hooks = true" + saw_hook = 1 + } + in_features = ($0 ~ /^[[:space:]]*\[features\][[:space:]]*$/) + if (in_features) saw_features = 1 + } + + if (in_features && $0 ~ /^[[:space:]]*codex_hooks[[:space:]]*=/) { + print "codex_hooks = true" + saw_hook = 1 + next + } + + print + } + END { + if (saw_features && in_features && !saw_hook) { + print "codex_hooks = true" + } else if (!saw_features) { + print "" + print "[features]" + print "codex_hooks = true" + } + } + ' "$CODEX_CONFIG" > "$tmp_config"; then + mv "$tmp_config" "$CODEX_CONFIG" + echo "Enabled codex_hooks in ${CODEX_CONFIG}" + return 0 + fi + + rm -f "$tmp_config" + echo "Could not update ${CODEX_CONFIG}; add codex_hooks manually." >&2 + return 1 + } + + if [ ! -f "$CODEX_HOOKS" ]; then + cat > "$CODEX_HOOKS" << CODEX_HOOKS_EOF +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "${PLANNOTATOR_BIN}", + "timeout": 345600 + } + ] + } + ] + } +} +CODEX_HOOKS_EOF + echo "Created Codex hooks at ${CODEX_HOOKS}" + codex_hook_configured=1 + elif command -v node >/dev/null 2>&1; then + if codex_merge_result=$(node - "$CODEX_HOOKS" "$PLANNOTATOR_BIN" <<'NODE' +const fs = require("fs"); +const path = require("path"); +const [hooksPath, command] = process.argv.slice(2); +const config = JSON.parse(fs.readFileSync(hooksPath, "utf8")); +config.hooks ||= {}; +const stopHooks = Array.isArray(config.hooks.Stop) ? config.hooks.Stop : []; +let updated = false; +let foundCustomPlannotatorHook = false; + +function isManagedPlannotatorCommand(value) { + const current = value.trim(); + if (current === "plannotator" || current === command) return true; + return current.startsWith("/") && path.posix.basename(current) === "plannotator"; +} + +for (const entry of stopHooks) { + const hooks = Array.isArray(entry?.hooks) ? entry.hooks : []; + for (const hook of hooks) { + if (hook?.type !== "command" || typeof hook.command !== "string") continue; + + if (isManagedPlannotatorCommand(hook.command)) { + hook.command = command; + hook.timeout = 345600; + updated = true; + } else if (hook.command.includes("plannotator")) { + foundCustomPlannotatorHook = true; + } + } +} +if (!updated && !foundCustomPlannotatorHook) { + stopHooks.push({ + hooks: [ + { + type: "command", + command, + timeout: 345600, + }, + ], + }); +} +config.hooks.Stop = stopHooks; +if (updated || !foundCustomPlannotatorHook) { + fs.writeFileSync(hooksPath, JSON.stringify(config, null, 2) + "\n"); +} +process.stdout.write(updated ? "updated" : foundCustomPlannotatorHook ? "custom" : "added"); +NODE + ); then + case "$codex_merge_result" in + custom) + echo "Existing custom Codex Plannotator hook found at ${CODEX_HOOKS}; left it unchanged." + ;; + added) + echo "Added Codex hooks at ${CODEX_HOOKS}" + ;; + *) + echo "Updated Codex hooks at ${CODEX_HOOKS}" + ;; + esac + codex_hook_configured=1 + else + echo "" + echo "Codex hooks file already exists at ${CODEX_HOOKS}, but it could not be merged automatically." + echo "Leaving Codex hook support unchanged. Add or update this Stop hook manually:" + echo "" + echo " command: ${PLANNOTATOR_BIN}" + echo " timeout: 345600" + fi + else + echo "" + echo "Codex hooks file already exists at ${CODEX_HOOKS}, but node was not found to merge it safely." + echo "Leaving Codex hook support unchanged. Add or update this Stop hook manually:" + echo "" + echo " command: ${PLANNOTATOR_BIN}" + echo " timeout: 345600" + fi + + if [ "$codex_hook_configured" -eq 1 ]; then + enable_codex_hooks_config || true + fi +fi + # Validate plugin hooks.json if plugin is already installed PLUGIN_HOOKS="${CLAUDE_CONFIG_DIR:-$HOME/.claude}/plugins/marketplaces/plannotator/apps/hook/hooks/hooks.json" if [ -f "$PLUGIN_HOOKS" ]; then diff --git a/scripts/install.test.ts b/scripts/install.test.ts index e9d0d8f44..5b1c49499 100644 --- a/scripts/install.test.ts +++ b/scripts/install.test.ts @@ -67,6 +67,28 @@ describe("install.sh", () => { expect(script).toContain("CLAUDE_COMMANDS_DIR"); expect(script).toContain("OPENCODE_COMMANDS_DIR"); }); + + test("enables Codex hooks only after Stop hook setup succeeds", () => { + const hookSetupIndex = script.indexOf('if [ ! -f "$CODEX_HOOKS" ]; then'); + const enableConfigIndex = script.indexOf('enable_codex_hooks_config || true'); + expect(hookSetupIndex).toBeGreaterThan(0); + expect(enableConfigIndex).toBeGreaterThan(hookSetupIndex); + expect(script).toContain('codex_hook_configured=1'); + expect(script).toContain('if [ "$codex_hook_configured" -eq 1 ]; then'); + expect(script).toContain("Leaving Codex hook support unchanged"); + }); + + test("does not rewrite inline Codex features config", () => { + expect(script).toContain("Codex config uses inline features"); + expect(script).toContain('grep -Eq \'^[[:space:]]*features[[:space:]]*=\' "$CODEX_CONFIG"'); + }); + + test("preserves custom Codex Plannotator hook wrappers", () => { + expect(script).toContain("isManagedPlannotatorCommand"); + expect(script).toContain("foundCustomPlannotatorHook"); + expect(script).toContain("Existing custom Codex Plannotator hook found"); + expect(script).not.toContain('hook.command.includes("plannotator")) {\n hook.command = command;'); + }); }); describe("install.ps1", () => { diff --git a/tests/README.md b/tests/README.md index 93eef4a0f..72effb19a 100644 --- a/tests/README.md +++ b/tests/README.md @@ -11,6 +11,7 @@ These scripts test the UI components and require a browser. ```bash ./tests/manual/local/test-hook.sh # Claude Code simulation ./tests/manual/local/test-hook-2.sh # OpenCode origin badge test +./tests/manual/local/test-codex-plan-review-e2e.sh # Real Codex Stop-hook flow in disposable HOME ``` **Code review UI:** @@ -58,6 +59,23 @@ Options: - `--keep`: Don't clean up sandbox on exit - `--no-git`: Skip git initialization (tests non-git fallback) +**Codex Stop-hook end-to-end harness:** + +```bash +./tests/manual/local/test-codex-plan-review-e2e.sh [--keep] [--skip-build] +``` + +Builds the hook and review apps, creates a disposable `HOME` plus sample git repo, copies your Codex `auth.json`, +enables `codex_hooks`, and runs a real `codex exec` against the sample project. The script writes logs, rollout paths, +history indices, and session URLs into an artifact directory under the temp root. + +Tips: + +- Set `PLANNOTATOR_BROWSER=/usr/bin/true` when you want to drive the opened plan-review session with Playwright + instead of auto-opening a browser. +- The validated workflow is: run the script in one terminal, then point Playwright at the printed session URL from a + second terminal. + **Obsidian utility:** ```bash diff --git a/tests/UI-TESTING.md b/tests/UI-TESTING.md index e01879fa4..c61f3a86d 100644 --- a/tests/UI-TESTING.md +++ b/tests/UI-TESTING.md @@ -157,6 +157,7 @@ UI test scripts simulate plugin behavior locally: # Plan review UI tests ./tests/manual/local/test-hook.sh # Claude Code simulation ./tests/manual/local/test-hook-2.sh # OpenCode origin badge test +./tests/manual/local/test-codex-plan-review-e2e.sh # Real Codex Stop-hook E2E # Code review UI test ./tests/manual/local/test-opencode-review.sh # Code review UI test @@ -187,6 +188,19 @@ UI test scripts simulate plugin behavior locally: 4. Verifies "OpenCode" badge + "Send Feedback" button (not "Copy Feedback") 5. Tests feedback submission flow +**`test-codex-plan-review-e2e.sh`** + +1. Builds the hook + review apps (unless `--skip-build`) +2. Creates a disposable `HOME` and sample git repo +3. Copies your Codex auth into the disposable config +4. Enables `codex_hooks` and registers a `Stop` hook pointing at the local Plannotator entrypoint +5. Runs a real `codex exec` prompt that returns only a `` block +6. Leaves behind rollout logs, Plannotator history, plan files, and session URLs in an artifact directory + +This is the best harness when you want to verify the full Codex deny/revise/approve loop instead of simulating hook +payloads. For browser automation, set `PLANNOTATOR_BROWSER=/usr/bin/true`, keep the script running in one terminal, +and drive the printed session URL with Playwright from another terminal. + See [tests/README.md](../tests/README.md) for additional integration and utility test scripts. ### Manual Testing Workflow diff --git a/tests/manual/local/enter-codex-plan-review-sandbox.sh b/tests/manual/local/enter-codex-plan-review-sandbox.sh new file mode 100755 index 000000000..815ef6f8e --- /dev/null +++ b/tests/manual/local/enter-codex-plan-review-sandbox.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Create a fresh isolated Codex + Plannotator sandbox and open a shell inside it. + +set -euo pipefail + +ROOT_DIR="${1:-/tmp/plannotator-codex-desktop}" +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +WORKSPACE_DIR="$ROOT_DIR/workspace/sample-app" + +echo "Resetting sandbox: $ROOT_DIR" +rm -rf "$ROOT_DIR" + +"$PROJECT_ROOT/tests/manual/local/test-codex-plan-review-e2e.sh" \ + --setup-only \ + --skip-build \ + --root-dir "$ROOT_DIR" + +export HOME="$ROOT_DIR/home" +export CODEX_HOME="$ROOT_DIR/home/.codex" +export PATH="$ROOT_DIR/bin:$PATH" + +cd "$WORKSPACE_DIR" + +cat </dev/null 2>&1; then + echo "Missing required command: $1" >&2 + exit 1 + fi +} + +resolve_cmd() { + local name="$1" + local fallback="${2:-}" + if command -v "$name" >/dev/null 2>&1; then + command -v "$name" + return + fi + if [[ -n "$fallback" && -x "$fallback" ]]; then + printf '%s\n' "$fallback" + return + fi + echo "Missing required command: $name" >&2 + exit 1 +} + +KEEP_SANDBOX=false +DETACH=false +SETUP_ONLY=false +SKIP_BUILD=false +ROOT_DIR="" +MODEL="${PLANNOTATOR_CODEX_MODEL:-gpt-5.4-mini}" +SANDBOX_MODE="${PLANNOTATOR_CODEX_SANDBOX:-read-only}" +CODEX_BIN="${CODEX_BIN:-}" +PROMPT_FILE="" +ORIGINAL_HOME="$HOME" + +while [[ $# -gt 0 ]]; do + case "$1" in + --keep) + KEEP_SANDBOX=true + ;; + --detach) + DETACH=true + KEEP_SANDBOX=true + ;; + --setup-only) + SETUP_ONLY=true + KEEP_SANDBOX=true + ;; + --skip-build) + SKIP_BUILD=true + ;; + --root-dir) + ROOT_DIR="$2" + shift + ;; + --model) + MODEL="$2" + shift + ;; + --sandbox) + SANDBOX_MODE="$2" + shift + ;; + --codex-bin) + CODEX_BIN="$2" + shift + ;; + --prompt-file) + PROMPT_FILE="$2" + shift + ;; + --help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + echo >&2 + usage >&2 + exit 1 + ;; + esac + shift +done + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" +BUN_BIN="$(resolve_cmd bun "$ORIGINAL_HOME/.bun/bin/bun")" +GIT_BIN="$(resolve_cmd git)" +NODE_BIN="$(resolve_cmd node)" +export PATH="$(dirname "$BUN_BIN"):$PATH" + +resolve_codex_js() { + find "$PROJECT_ROOT/node_modules" -path '*/@openai/codex/bin/codex.js' | sort | head -n 1 +} + +declare -a CODEX_CMD=() +if [[ -n "$CODEX_BIN" ]]; then + if [[ "$CODEX_BIN" == *.js ]]; then + CODEX_CMD=("$NODE_BIN" "$CODEX_BIN") + else + CODEX_CMD=("$CODEX_BIN") + fi +else + REPO_CODEX_JS="$(resolve_codex_js)" + if [[ -n "$REPO_CODEX_JS" ]]; then + CODEX_CMD=("$NODE_BIN" "$REPO_CODEX_JS") + CODEX_BIN="$REPO_CODEX_JS" + elif command -v codex >/dev/null 2>&1; then + CODEX_CMD=("$(command -v codex)") + CODEX_BIN="${CODEX_CMD[0]}" + else + echo "Could not find a Codex CLI. Install dependencies or pass --codex-bin PATH." >&2 + exit 1 + fi +fi + +if [[ -n "$PROMPT_FILE" && ! -f "$PROMPT_FILE" ]]; then + echo "Prompt file not found: $PROMPT_FILE" >&2 + exit 1 +fi + +if [[ -z "$ROOT_DIR" ]]; then + ROOT_DIR="$(mktemp -d -t plannotator-codex-stop-e2e-XXXXXX)" +else + mkdir -p "$ROOT_DIR" + ROOT_DIR="$(cd "$ROOT_DIR" && pwd)" +fi + +TEMP_HOME="$ROOT_DIR/home" +WORKSPACE_DIR="$ROOT_DIR/workspace/sample-app" +BIN_DIR="$ROOT_DIR/bin" +ARTIFACTS_DIR="$ROOT_DIR/artifacts" +CODEX_LOG="$ARTIFACTS_DIR/codex-output.log" +METADATA_FILE="$ARTIFACTS_DIR/metadata.env" +PROMPT_PATH="$ARTIFACTS_DIR/prompt.txt" +RUNNER_SCRIPT="$BIN_DIR/run-codex-e2e" + +cleanup() { + local exit_code=$? + echo + if [[ "$KEEP_SANDBOX" == "true" || $exit_code -ne 0 ]]; then + echo "Sandbox preserved at: $ROOT_DIR" + if [[ -f "$METADATA_FILE" ]]; then + echo "Artifact metadata: $METADATA_FILE" + fi + return + fi + echo "Cleaning up sandbox: $ROOT_DIR" + rm -rf "$ROOT_DIR" +} +trap cleanup EXIT + +mkdir -p "$TEMP_HOME/.codex" "$WORKSPACE_DIR/src" "$BIN_DIR" "$ARTIFACTS_DIR" + +AUTH_SRC="${CODEX_AUTH_JSON:-$ORIGINAL_HOME/.codex/auth.json}" +if [[ ! -f "$AUTH_SRC" ]]; then + echo "Codex auth file not found: $AUTH_SRC" >&2 + echo "Set CODEX_AUTH_JSON or run codex login first." >&2 + exit 1 +fi + +cp "$AUTH_SRC" "$TEMP_HOME/.codex/auth.json" +if [[ -f "$ORIGINAL_HOME/.codex/installation_id" ]]; then + cp "$ORIGINAL_HOME/.codex/installation_id" "$TEMP_HOME/.codex/installation_id" +fi + +cat > "$TEMP_HOME/.codex/config.toml" <<'EOF' +[features] +codex_hooks = true +EOF + +cat > "$TEMP_HOME/.codex/hooks.json" <<'EOF' +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "plannotator", + "timeout": 345600 + } + ] + } + ] + } +} +EOF + +cat > "$BIN_DIR/plannotator" < "\$payload_file" +{ + printf -- '--- %s ---\\n' "\$(date -u +%Y-%m-%dT%H:%M:%SZ)" + printf 'pid=%s cwd=%s args=%s\\n' "\$\$" "\$(pwd)" "\$*" + printf 'HOME=%s CODEX_HOME=%s PATH=%s\\n' "\$HOME" "\${CODEX_HOME:-}" "\$PATH" + cat "\$payload_file" + printf '\\n' +} >> "$ARTIFACTS_DIR/plannotator-hook-events.log" +PLANNOTATOR_DEBUG=1 exec "$BUN_BIN" run "$PROJECT_ROOT/apps/hook/server/index.ts" "\$@" < "\$payload_file" 2>> "$ARTIFACTS_DIR/plannotator-hook.stderr.log" +EOF +chmod +x "$BIN_DIR/plannotator" + +cat > "$WORKSPACE_DIR/package.json" <<'EOF' +{ + "name": "sample-app", + "private": true, + "type": "module", + "scripts": { + "test": "echo \"No tests yet\"" + } +} +EOF + +cat > "$WORKSPACE_DIR/README.md" <<'EOF' +# Sample App + +Tiny TypeScript app for exercising Codex plan review through Plannotator. +EOF + +cat > "$WORKSPACE_DIR/src/index.ts" <<'EOF' +export function greet(name: string): string { + return `Hello, ${name}!`; +} + +console.log(greet("World")); +EOF + +( + cd "$WORKSPACE_DIR" + "$GIT_BIN" init -q -b master + "$GIT_BIN" config user.email "test@example.com" + "$GIT_BIN" config user.name "Test User" + "$GIT_BIN" add -A + "$GIT_BIN" commit -q -m "Initial commit" +) + +if [[ -n "$PROMPT_FILE" ]]; then + cp "$PROMPT_FILE" "$PROMPT_PATH" +else + cat > "$PROMPT_PATH" <<'EOF' +Produce a concise implementation plan for adding theme support, tests, and docs to this sample app. Return your final answer ONLY as a ... block and do not implement anything. +EOF +fi + +if [[ "$SKIP_BUILD" != "true" ]]; then + echo "Building hook + review apps..." + ( + cd "$PROJECT_ROOT" + "$BUN_BIN" run build:review >/dev/null + "$BUN_BIN" run build:hook >/dev/null + ) +fi + +echo "Recording Codex metadata..." +env HOME="$TEMP_HOME" "${CODEX_CMD[@]}" --version > "$ARTIFACTS_DIR/codex-version.txt" 2>&1 +env HOME="$TEMP_HOME" "${CODEX_CMD[@]}" features list > "$ARTIFACTS_DIR/codex-features.txt" 2>&1 +env HOME="$TEMP_HOME" "${CODEX_CMD[@]}" login status > "$ARTIFACTS_DIR/codex-login-status.txt" 2>&1 || true + +if ! grep -q 'codex_hooks' "$ARTIFACTS_DIR/codex-features.txt"; then + echo "Selected Codex CLI does not expose codex_hooks." >&2 + echo "See: $ARTIFACTS_DIR/codex-features.txt" >&2 + exit 1 +fi + +cat > "$METADATA_FILE" < "$RUNNER_SCRIPT" +chmod +x "$RUNNER_SCRIPT" + +echo "=== Plannotator Codex Stop-hook E2E ===" +echo "Sandbox root: $ROOT_DIR" +echo "Workspace: $WORKSPACE_DIR" +echo "Artifacts: $ARTIFACTS_DIR" +echo "Codex binary: $CODEX_BIN" +echo "Model: $MODEL" +echo + +if [[ "$SETUP_ONLY" == "true" ]]; then + echo "Setup complete. Codex was not started." + echo + echo "To run the isolated Codex command manually:" + echo " $RUNNER_SCRIPT" + echo + echo "Or enter the isolated workspace yourself:" + echo " export HOME=\"$TEMP_HOME\"" + echo " export CODEX_HOME=\"$TEMP_HOME/.codex\"" + echo " export PATH=\"$BIN_DIR:\$PATH\"" + echo " cd \"$WORKSPACE_DIR\"" + echo + echo "Then run Codex however you want. The sandbox will be preserved." + exit 0 +fi + +if [[ "$DETACH" == "true" ]]; then + nohup "$RUNNER_SCRIPT" >"$CODEX_LOG" 2>&1 < /dev/null & +else + "$RUNNER_SCRIPT" >"$CODEX_LOG" 2>&1 & +fi +CODEX_PID=$! +echo "$CODEX_PID" > "$ARTIFACTS_DIR/codex.pid" + +read_json_field() { + "$NODE_BIN" -e 'const fs=require("fs"); const [file,key]=process.argv.slice(1); const data=JSON.parse(fs.readFileSync(file,"utf8")); const value=data[key]; if (value !== undefined) process.stdout.write(String(value));' "$1" "$2" +} + +FIRST_SESSION_FILE="" +FIRST_SESSION_URL="" +deadline=$((SECONDS + 240)) +while (( SECONDS < deadline )); do + if compgen -G "$TEMP_HOME/.plannotator/sessions/*.json" >/dev/null; then + FIRST_SESSION_FILE="$(find "$TEMP_HOME/.plannotator/sessions" -maxdepth 1 -type f -name '*.json' | sort | tail -n 1)" + FIRST_SESSION_URL="$(read_json_field "$FIRST_SESSION_FILE" url)" + echo "$FIRST_SESSION_FILE" > "$ARTIFACTS_DIR/first-session-file.txt" + printf '%s\n' "$FIRST_SESSION_URL" > "$ARTIFACTS_DIR/first-session-url.txt" + echo "First Plannotator session: $FIRST_SESSION_URL" + break + fi + if ! kill -0 "$CODEX_PID" 2>/dev/null; then + break + fi + sleep 1 +done + +if [[ "$DETACH" == "true" ]]; then + echo + echo "Codex is still running in the background." + echo "PID: $CODEX_PID" + echo "Codex log: $CODEX_LOG" + echo "Metadata: $METADATA_FILE" + echo + echo "To inspect active Plannotator sessions inside the sandbox:" + echo " HOME=\"$TEMP_HOME\" PATH=\"$BIN_DIR:\$PATH\" plannotator sessions" + exit 0 +fi + +set +e +wait "$CODEX_PID" +CODEX_EXIT=$? +set -e +printf '%s\n' "$CODEX_EXIT" > "$ARTIFACTS_DIR/codex-exit-code.txt" + +ROLLOUT_PATH="$(find "$TEMP_HOME/.codex/sessions" -type f -name 'rollout-*.jsonl' | sort | tail -n 1 || true)" +if [[ -n "$ROLLOUT_PATH" ]]; then + printf '%s\n' "$ROLLOUT_PATH" > "$ARTIFACTS_DIR/rollout-path.txt" +fi + +if [[ -d "$TEMP_HOME/.plannotator/history" ]]; then + find "$TEMP_HOME/.plannotator/history" -type f | sort > "$ARTIFACTS_DIR/plannotator-history-files.txt" +fi + +if [[ -d "$TEMP_HOME/.plannotator/plans" ]]; then + find "$TEMP_HOME/.plannotator/plans" -type f | sort > "$ARTIFACTS_DIR/plannotator-plan-files.txt" +fi + +echo +echo "Codex exit code: $CODEX_EXIT" +echo "Codex log: $CODEX_LOG" +if [[ -n "$ROLLOUT_PATH" ]]; then + echo "Rollout: $ROLLOUT_PATH" +fi +if [[ -f "$ARTIFACTS_DIR/plannotator-history-files.txt" ]]; then + echo "History index: $ARTIFACTS_DIR/plannotator-history-files.txt" +fi +if [[ -f "$ARTIFACTS_DIR/plannotator-plan-files.txt" ]]; then + echo "Plan index: $ARTIFACTS_DIR/plannotator-plan-files.txt" +fi + +exit "$CODEX_EXIT"