From 99693722253c3927cdb0a9f2567645165a3915a8 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Sat, 13 Jun 2026 14:02:16 +0100 Subject: [PATCH 1/3] fix(run): emit session_error for session failures --- packages/cli/src/cli/cmd/run.errors.ts | 10 +++++-- packages/cli/src/cli/cmd/run.ts | 6 +++++ .../test/cli/classify-session-error.test.ts | 26 +++++++++++++++++++ packages/cli/test/cli/run-schema-v1.test.ts | 10 +++++++ 4 files changed, 50 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/cli/cmd/run.errors.ts b/packages/cli/src/cli/cmd/run.errors.ts index adb031e..229c061 100644 --- a/packages/cli/src/cli/cmd/run.errors.ts +++ b/packages/cli/src/cli/cmd/run.errors.ts @@ -15,6 +15,7 @@ export function classifySessionError(err: unknown): ClassifiedSessionError { if (status === 429) return { reason: "rate_limit", code: "429", message } if (status === 401 || status === 403) return { reason: "auth", code: String(status), message } + if (name === "ProviderAuthError") return { reason: "auth", code: status ? String(status) : undefined, message } if (name === "AbortError" || /timeout/i.test(message)) { return { reason: "timeout", code: status ? String(status) : undefined, message } } @@ -31,13 +32,18 @@ function extractMessage(err: unknown): string { if (err instanceof Error) return err.message if (typeof err === "string") return err if (err && typeof err === "object" && "message" in err) return String((err as { message: unknown }).message) + if (err && typeof err === "object" && "data" in err) { + const data = (err as { data: unknown }).data + if (data && typeof data === "object" && "message" in data) return String((data as { message: unknown }).message) + } return String(err) } function extractStatus(err: unknown): number | undefined { if (err && typeof err === "object") { - const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown } } - const raw = e.status ?? e.statusCode ?? e.response?.status + const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown }; data?: unknown } + const data = e.data && typeof e.data === "object" ? (e.data as { status?: unknown; statusCode?: unknown }) : undefined + const raw = e.status ?? e.statusCode ?? e.response?.status ?? data?.status ?? data?.statusCode if (typeof raw === "number") return raw if (typeof raw === "string" && /^\d+$/.test(raw)) return Number(raw) } diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index a4cfb2b..f237a1c 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -571,6 +571,12 @@ export const RunCommand = cmd({ // spuriously-green job. process.exitCode (not process.exit) lets the // loop drain to session.status idle and emit session_complete first. process.exitCode = 1 + const classified = classifySessionError(props.error) + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) } if (emit("error", { error: props.error, sourceSessionID: props.sessionID })) continue UI.error(err) diff --git a/packages/cli/test/cli/classify-session-error.test.ts b/packages/cli/test/cli/classify-session-error.test.ts index b4a3d1d..f88c532 100644 --- a/packages/cli/test/cli/classify-session-error.test.ts +++ b/packages/cli/test/cli/classify-session-error.test.ts @@ -15,6 +15,18 @@ describe("classifySessionError (#63)", () => { expect(res.code).toBe("401") }) + test("stored ProviderAuthError → auth", () => { + const res = classifySessionError({ + name: "ProviderAuthError", + data: { + providerID: "openai", + message: "Invalid API key", + }, + }) + expect(res.reason).toBe("auth") + expect(res.message).toBe("Invalid API key") + }) + test("AbortError → timeout", () => { const err = new Error("aborted") err.name = "AbortError" @@ -34,4 +46,18 @@ describe("classifySessionError (#63)", () => { const res = classifySessionError({ status: 500, message: "internal" }) expect(res.reason).toBe("provider") }) + + test("stored APIError statusCode → provider", () => { + const res = classifySessionError({ + name: "APIError", + data: { + message: "internal", + statusCode: 500, + isRetryable: true, + }, + }) + expect(res.reason).toBe("provider") + expect(res.code).toBe("500") + expect(res.message).toBe("internal") + }) }) diff --git a/packages/cli/test/cli/run-schema-v1.test.ts b/packages/cli/test/cli/run-schema-v1.test.ts index 51a7b87..ecdd0f1 100644 --- a/packages/cli/test/cli/run-schema-v1.test.ts +++ b/packages/cli/test/cli/run-schema-v1.test.ts @@ -42,6 +42,16 @@ describe("run.ts v1 schema emissions (#63)", () => { expect(errIdx).toBeLessThan(source.lastIndexOf('emit("session_complete"')) }) + test("primary session.error events are surfaced as session_error", async () => { + const source = await Bun.file(RUN_SRC).text() + const idx = source.indexOf('if (event.type === "session.error")') + expect(idx).toBeGreaterThan(-1) + const block = source.slice(idx, idx + 1400) + expect(block).toContain("classifySessionError(props.error)") + expect(block).toContain('emit("session_error"') + expect(block).toContain('emit("error"') + }) + test("text / reasoning / tool_use include sequenceNum", async () => { const source = await Bun.file(RUN_SRC).text() expect(source).toContain("sequenceNum") From 58e48e420afdcc7df2828319899aad1523730079 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Mon, 22 Jun 2026 08:00:47 +0100 Subject: [PATCH 2/3] fix(run): address code-review findings from PR #81 - Add behavioral test (run-session-error-emit.test.ts): spawns the CLI with --format json against a failing model and asserts the session_error JSON event is emitted with a classified reason/message. Closes the source-text-grep gap flagged by the bot. - Fix brittle 1400-char window in run-schema-v1.test.ts: anchor the assertion window to the next event handler boundary instead of a magic character count, matching the pattern used in run-session-error-exit.test.ts. - Add clarifying comment in run.ts session.error handler explaining that session_error (structured/classified) and error (raw/legacy) are intentionally distinct channels serving different consumers, not a double-count bug. Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ --- packages/cli/src/cli/cmd/run.ts | 5 ++ packages/cli/test/cli/run-schema-v1.test.ts | 6 +- .../test/cli/run-session-error-emit.test.ts | 84 +++++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 packages/cli/test/cli/run-session-error-emit.test.ts diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index f237a1c..c82d2f8 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -572,6 +572,11 @@ export const RunCommand = cmd({ // loop drain to session.status idle and emit session_complete first. process.exitCode = 1 const classified = classifySessionError(props.error) + // Structured session_error (classified reason/code/message) is emitted for the + // primary session only. The generic "error" event below fires for both primary + // and child-session failures and carries the raw error object — these are + // intentionally distinct channels: session_error is for structured telemetry/CI + // consumers, "error" is the legacy observable for raw error pass-through. emit("session_error", { reason: classified.reason, code: classified.code, diff --git a/packages/cli/test/cli/run-schema-v1.test.ts b/packages/cli/test/cli/run-schema-v1.test.ts index ecdd0f1..a26252f 100644 --- a/packages/cli/test/cli/run-schema-v1.test.ts +++ b/packages/cli/test/cli/run-schema-v1.test.ts @@ -46,7 +46,11 @@ describe("run.ts v1 schema emissions (#63)", () => { const source = await Bun.file(RUN_SRC).text() const idx = source.indexOf('if (event.type === "session.error")') expect(idx).toBeGreaterThan(-1) - const block = source.slice(idx, idx + 1400) + // Bound the window to the *next* event handler so the assertions don't + // depend on a magic character count that must be bumped as the block grows. + const after = source.slice(idx + 1) + const nextHandlerOffset = after.indexOf('if (event.type === "') + const block = nextHandlerOffset === -1 ? after : after.slice(0, nextHandlerOffset) expect(block).toContain("classifySessionError(props.error)") expect(block).toContain('emit("session_error"') expect(block).toContain('emit("error"') diff --git a/packages/cli/test/cli/run-session-error-emit.test.ts b/packages/cli/test/cli/run-session-error-emit.test.ts new file mode 100644 index 0000000..1829e05 --- /dev/null +++ b/packages/cli/test/cli/run-session-error-emit.test.ts @@ -0,0 +1,84 @@ +import path from "path" +import { describe, expect, test } from "bun:test" +import { fileURLToPath } from "url" + +// Behavioral regression test for aictrl-dev/cli#81: +// Verify that `session_error` is actually emitted as a JSON event when the +// session fails, not just that the source text contains the right substrings. +// The source-text assertions in run-schema-v1.test.ts cannot catch a mistyped +// event key, a missing import, or broken control flow — this test drives a +// real session failure and observes the emitted payload. + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const CLI_ENTRY = path.resolve(__dirname, "../../src/index.ts") + +describe("run --format json session_error emission (#81)", () => { + test("session_error event is emitted with a classified reason on session failure", async () => { + // Run with a nonexistent model to trigger a session failure. + // The CLI must emit a { type: "session_error", reason: string } JSON line + // to stdout before exiting non-zero. + const proc = Bun.spawn( + [ + "bun", + "run", + "--conditions=browser", + CLI_ENTRY, + "run", + "--format", + "json", + "test prompt", + "--model", + "nonexistent-provider/nonexistent-model", + ], + { + cwd: process.cwd(), + env: { + ...process.env, + ANTHROPIC_API_KEY: "", + OPENAI_API_KEY: "", + }, + stdout: "pipe", + stderr: "pipe", + }, + ) + + const timeout = setTimeout(() => { + proc.kill() + }, 15_000) + + const [stdout] = await Promise.all([ + new Response(proc.stdout).text(), + proc.exited, + ]) + clearTimeout(timeout) + + // Parse JSON lines from stdout, skip non-JSON lines defensively + const events: Array> = [] + for (const line of stdout.split("\n")) { + const trimmed = line.trim() + if (!trimmed) continue + try { + events.push(JSON.parse(trimmed)) + } catch { + // ignore non-JSON diagnostic output + } + } + + // At least one session_error event must have been emitted + const sessionErrorEvents = events.filter((e) => e.type === "session_error") + expect(sessionErrorEvents.length).toBeGreaterThan(0) + + // The emitted event must carry a classified reason (one of the known values) + const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"] + for (const ev of sessionErrorEvents) { + expect(typeof ev.reason).toBe("string") + expect(VALID_REASONS).toContain(ev.reason as string) + // message must be present and non-empty + expect(typeof ev.message).toBe("string") + expect((ev.message as string).length).toBeGreaterThan(0) + } + + // Process must have exited non-zero (the session failed) + expect(await proc.exited).not.toBe(0) + }, 20_000) +}) From b09fec9d35deafd2b551258031042ca5a40d17f4 Mon Sep 17 00:00:00 2001 From: Bulat Yapparov Date: Mon, 22 Jun 2026 10:40:21 +0100 Subject: [PATCH 3/3] fix(run): address round-2 review findings for PR #81 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finding 1 (TRUE/FIX): replace subprocess test that used nonexistent-model (exercised pre-existing promptResult rejection handler, not the new in-loop emit) with a mock-server --attach test that injects a session.error event mid-stream. Test now fails if the new in-loop emit("session_error",…) is removed. Finding 2 (TRUE/FIX): drop --conditions=browser from the test subprocess. The flag was cargo-culted from the dev script; no packages in this repo export browser-specific conditions, so it was harmless but unexplained and potentially misleading. Removed without replacement. Finding 3 (TRUE/FIX): add a shared `let sessionErrorEmitted = false` flag in execute() scope. All three session_error emit sites (in-loop session.error handler, loopDone.catch, promptResult rejection handler) now short-circuit on the flag so at most one session_error is emitted per run regardless of which failure paths fire. New test also asserts count === 1. --- packages/cli/src/cli/cmd/run.ts | 56 ++-- .../test/cli/run-session-error-emit.test.ts | 255 +++++++++++++----- 2 files changed, 218 insertions(+), 93 deletions(-) diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index c82d2f8..b65e0c7 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -446,6 +446,11 @@ export const RunCommand = cmd({ const events = await sdk.event.subscribe() let error: string | undefined + // Guard: at most one session_error is emitted per run regardless of which + // failure path fires first (in-loop session.error handler, loopDone.catch, + // or promptResult rejection). Without this a mid-run session.error event + // can fire site 1 while promptResult/loop() also rejects and fires site 2/3. + let sessionErrorEmitted = false const startTime = Date.now() const childSessions = new Set() const seqBySession = new Map() @@ -571,17 +576,20 @@ export const RunCommand = cmd({ // spuriously-green job. process.exitCode (not process.exit) lets the // loop drain to session.status idle and emit session_complete first. process.exitCode = 1 - const classified = classifySessionError(props.error) - // Structured session_error (classified reason/code/message) is emitted for the - // primary session only. The generic "error" event below fires for both primary - // and child-session failures and carries the raw error object — these are - // intentionally distinct channels: session_error is for structured telemetry/CI - // consumers, "error" is the legacy observable for raw error pass-through. - emit("session_error", { - reason: classified.reason, - code: classified.code, - message: classified.message, - }) + if (!sessionErrorEmitted) { + sessionErrorEmitted = true + const classified = classifySessionError(props.error) + // Structured session_error (classified reason/code/message) is emitted for the + // primary session only. The generic "error" event below fires for both primary + // and child-session failures and carries the raw error object — these are + // intentionally distinct channels: session_error is for structured telemetry/CI + // consumers, "error" is the legacy observable for raw error pass-through. + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) + } } if (emit("error", { error: props.error, sourceSessionID: props.sessionID })) continue UI.error(err) @@ -724,11 +732,14 @@ export const RunCommand = cmd({ }) .catch((e) => { const classified = classifySessionError(e) - emit("session_error", { - reason: classified.reason, - code: classified.code, - message: classified.message, - }) + if (!sessionErrorEmitted) { + sessionErrorEmitted = true + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) + } emit("session_complete", { durationMs: Date.now() - startTime, error: classified.message, @@ -771,11 +782,14 @@ export const RunCommand = cmd({ (e) => { const classified = classifySessionError(e) error = error ? error + EOL + classified.message : classified.message - emit("session_error", { - reason: classified.reason, - code: classified.code, - message: classified.message, - }) + if (!sessionErrorEmitted) { + sessionErrorEmitted = true + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) + } emit("session_complete", { durationMs: Date.now() - startTime, error: classified.message, diff --git a/packages/cli/test/cli/run-session-error-emit.test.ts b/packages/cli/test/cli/run-session-error-emit.test.ts index 1829e05..1e4c60b 100644 --- a/packages/cli/test/cli/run-session-error-emit.test.ts +++ b/packages/cli/test/cli/run-session-error-emit.test.ts @@ -3,82 +3,193 @@ import { describe, expect, test } from "bun:test" import { fileURLToPath } from "url" // Behavioral regression test for aictrl-dev/cli#81: -// Verify that `session_error` is actually emitted as a JSON event when the -// session fails, not just that the source text contains the right substrings. -// The source-text assertions in run-schema-v1.test.ts cannot catch a mistyped -// event key, a missing import, or broken control flow — this test drives a -// real session failure and observes the emitted payload. +// Verify that `session_error` is emitted when a `session.error` event is +// observed DURING the event loop (the new in-loop handler), not merely that +// the source text contains the right substrings. +// +// Round-1 test used --model nonexistent-provider/nonexistent-model which +// fails BEFORE the loop (model-resolution), exercising the pre-existing +// promptResult rejection handler at run.ts:~774 — NOT the new in-loop emit +// added in #81. Removing the new emit left the round-1 test green. +// +// This test uses --attach to a controlled mock server that emits a +// session.error event mid-stream, so the assertion fails if the new +// in-loop emit is removed. The test also asserts at-most-one session_error +// per run, validating the sessionErrorEmitted guard added in round-2. const __dirname = path.dirname(fileURLToPath(import.meta.url)) const CLI_ENTRY = path.resolve(__dirname, "../../src/index.ts") -describe("run --format json session_error emission (#81)", () => { - test("session_error event is emitted with a classified reason on session failure", async () => { - // Run with a nonexistent model to trigger a session failure. - // The CLI must emit a { type: "session_error", reason: string } JSON line - // to stdout before exiting non-zero. - const proc = Bun.spawn( - [ - "bun", - "run", - "--conditions=browser", - CLI_ENTRY, - "run", - "--format", - "json", - "test prompt", - "--model", - "nonexistent-provider/nonexistent-model", - ], - { - cwd: process.cwd(), - env: { - ...process.env, - ANTHROPIC_API_KEY: "", - OPENAI_API_KEY: "", - }, - stdout: "pipe", - stderr: "pipe", - }, - ) - - const timeout = setTimeout(() => { - proc.kill() - }, 15_000) - - const [stdout] = await Promise.all([ - new Response(proc.stdout).text(), - proc.exited, - ]) - clearTimeout(timeout) - - // Parse JSON lines from stdout, skip non-JSON lines defensively - const events: Array> = [] - for (const line of stdout.split("\n")) { - const trimmed = line.trim() - if (!trimmed) continue +const SESSION_ID = "sess-test-001" + +/** + * Minimal HTTP server that simulates an aictrl backend. + * Returns the server URL and a stop() function. + * + * The SDK wraps response bodies as { data: } automatically, so the + * server returns the raw data shapes (without an extra "data" envelope). + * Event payloads are returned in SSE format; the SDK parses the JSON lines. + */ +async function startMockServer(): Promise<{ url: string; stop: () => void }> { + const server = Bun.serve({ + port: 0, // OS-assigned free port + async fetch(req) { + const url = new URL(req.url) + + // List sessions (used by --continue; we return empty) + if (req.method === "GET" && url.pathname === "/session") { + return new Response(JSON.stringify([]), { + headers: { "content-type": "application/json" }, + }) + } + + // Create session + if (req.method === "POST" && url.pathname === "/session") { + return new Response(JSON.stringify({ id: SESSION_ID }), { + headers: { "content-type": "application/json" }, + }) + } + + // Get config (used by share check — empty data means auto-share is off) + if (req.method === "GET" && url.pathname === "/config") { + return new Response(JSON.stringify({}), { + headers: { "content-type": "application/json" }, + }) + } + + // Accept the prompt (session appears to start; any 2xx is fine) + if (req.method === "POST" && url.pathname.endsWith("/message")) { + return new Response(JSON.stringify({}), { + headers: { "content-type": "application/json" }, + }) + } + + // SSE event stream: emit session.error then session.status idle. + // session.error triggers the NEW in-loop handler added in PR #81. + // session.status idle breaks the loop so the process exits cleanly. + if (req.method === "GET" && url.pathname === "/event") { + const sessionError = { + type: "session.error", + properties: { + sessionID: SESSION_ID, + error: { + name: "ProviderAuthError", + message: "mock auth failure injected by test server", + }, + }, + } + const sessionIdle = { + type: "session.status", + properties: { + sessionID: SESSION_ID, + status: { type: "idle" }, + }, + } + const body = + `data: ${JSON.stringify(sessionError)}\n\n` + + `data: ${JSON.stringify(sessionIdle)}\n\n` + return new Response(body, { + headers: { + "content-type": "text/event-stream", + "cache-control": "no-cache", + }, + }) + } + + // Catch-all 404 (SDK surfaces as non-throwing error, not a panic) + return new Response(JSON.stringify({ error: "not found" }), { + status: 404, + headers: { "content-type": "application/json" }, + }) + }, + }) + + const url = `http://localhost:${server.port}` + const stop = () => server.stop(true) + return { url, stop } +} + +describe("run --format json session_error emission (#81 in-loop path)", () => { + test( + "session_error is emitted when session.error event fires during the loop (not just on pre-loop failure)", + async () => { + // Acceptance criterion: removing the emit("session_error", …) call inside + // the `if (event.type === "session.error")` block in run.ts must cause + // this test to fail. + const { url: serverUrl, stop } = await startMockServer() + try { - events.push(JSON.parse(trimmed)) - } catch { - // ignore non-JSON diagnostic output + // --attach routes through the SDK event stream; promptResult stays + // Promise.resolve() so the promptResult rejection handler (the pre-existing + // emit site) cannot fire here — only the in-loop handler can. + const proc = Bun.spawn( + [ + "bun", + "run", + CLI_ENTRY, + "run", + "--format", + "json", + "--attach", + serverUrl, + "test prompt", + ], + { + cwd: process.cwd(), + env: { + ...process.env, + ANTHROPIC_API_KEY: "", + OPENAI_API_KEY: "", + }, + stdout: "pipe", + stderr: "pipe", + }, + ) + + const timeout = setTimeout(() => { + proc.kill() + }, 15_000) + + const [stdout] = await Promise.all([ + new Response(proc.stdout).text(), + proc.exited, + ]) + clearTimeout(timeout) + + // Parse JSON lines from stdout, skip non-JSON diagnostic lines + const events: Array> = [] + for (const line of stdout.split("\n")) { + const trimmed = line.trim() + if (!trimmed) continue + try { + events.push(JSON.parse(trimmed)) + } catch { + // ignore non-JSON diagnostic output + } + } + + // The in-loop session.error handler must have emitted session_error + const sessionErrorEvents = events.filter((e) => e.type === "session_error") + expect(sessionErrorEvents.length).toBeGreaterThan(0) + + // The event must carry a classified reason from classifySessionError + const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"] + for (const ev of sessionErrorEvents) { + expect(typeof ev.reason).toBe("string") + expect(VALID_REASONS).toContain(ev.reason as string) + expect(typeof ev.message).toBe("string") + expect((ev.message as string).length).toBeGreaterThan(0) + } + + // At most one session_error per run — validates the sessionErrorEmitted guard + expect(sessionErrorEvents.length).toBe(1) + + // Process must have exited non-zero (the session failed) + expect(await proc.exited).not.toBe(0) + } finally { + stop() } - } - - // At least one session_error event must have been emitted - const sessionErrorEvents = events.filter((e) => e.type === "session_error") - expect(sessionErrorEvents.length).toBeGreaterThan(0) - - // The emitted event must carry a classified reason (one of the known values) - const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"] - for (const ev of sessionErrorEvents) { - expect(typeof ev.reason).toBe("string") - expect(VALID_REASONS).toContain(ev.reason as string) - // message must be present and non-empty - expect(typeof ev.message).toBe("string") - expect((ev.message as string).length).toBeGreaterThan(0) - } - - // Process must have exited non-zero (the session failed) - expect(await proc.exited).not.toBe(0) - }, 20_000) + }, + 20_000, + ) })