diff --git a/packages/cli/src/cli/cmd/run.errors.ts b/packages/cli/src/cli/cmd/run.errors.ts index adb031e..229c061 100644 --- a/packages/cli/src/cli/cmd/run.errors.ts +++ b/packages/cli/src/cli/cmd/run.errors.ts @@ -15,6 +15,7 @@ export function classifySessionError(err: unknown): ClassifiedSessionError { if (status === 429) return { reason: "rate_limit", code: "429", message } if (status === 401 || status === 403) return { reason: "auth", code: String(status), message } + if (name === "ProviderAuthError") return { reason: "auth", code: status ? String(status) : undefined, message } if (name === "AbortError" || /timeout/i.test(message)) { return { reason: "timeout", code: status ? String(status) : undefined, message } } @@ -31,13 +32,18 @@ function extractMessage(err: unknown): string { if (err instanceof Error) return err.message if (typeof err === "string") return err if (err && typeof err === "object" && "message" in err) return String((err as { message: unknown }).message) + if (err && typeof err === "object" && "data" in err) { + const data = (err as { data: unknown }).data + if (data && typeof data === "object" && "message" in data) return String((data as { message: unknown }).message) + } return String(err) } function extractStatus(err: unknown): number | undefined { if (err && typeof err === "object") { - const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown } } - const raw = e.status ?? e.statusCode ?? e.response?.status + const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown }; data?: unknown } + const data = e.data && typeof e.data === "object" ? (e.data as { status?: unknown; statusCode?: unknown }) : undefined + const raw = e.status ?? e.statusCode ?? e.response?.status ?? data?.status ?? data?.statusCode if (typeof raw === "number") return raw if (typeof raw === "string" && /^\d+$/.test(raw)) return Number(raw) } diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts index a4cfb2b..b65e0c7 100644 --- a/packages/cli/src/cli/cmd/run.ts +++ b/packages/cli/src/cli/cmd/run.ts @@ -446,6 +446,11 @@ export const RunCommand = cmd({ const events = await sdk.event.subscribe() let error: string | undefined + // Guard: at most one session_error is emitted per run regardless of which + // failure path fires first (in-loop session.error handler, loopDone.catch, + // or promptResult rejection). Without this a mid-run session.error event + // can fire site 1 while promptResult/loop() also rejects and fires site 2/3. + let sessionErrorEmitted = false const startTime = Date.now() const childSessions = new Set() const seqBySession = new Map() @@ -571,6 +576,20 @@ export const RunCommand = cmd({ // spuriously-green job. process.exitCode (not process.exit) lets the // loop drain to session.status idle and emit session_complete first. process.exitCode = 1 + if (!sessionErrorEmitted) { + sessionErrorEmitted = true + const classified = classifySessionError(props.error) + // Structured session_error (classified reason/code/message) is emitted for the + // primary session only. The generic "error" event below fires for both primary + // and child-session failures and carries the raw error object — these are + // intentionally distinct channels: session_error is for structured telemetry/CI + // consumers, "error" is the legacy observable for raw error pass-through. + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) + } } if (emit("error", { error: props.error, sourceSessionID: props.sessionID })) continue UI.error(err) @@ -713,11 +732,14 @@ export const RunCommand = cmd({ }) .catch((e) => { const classified = classifySessionError(e) - emit("session_error", { - reason: classified.reason, - code: classified.code, - message: classified.message, - }) + if (!sessionErrorEmitted) { + sessionErrorEmitted = true + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) + } emit("session_complete", { durationMs: Date.now() - startTime, error: classified.message, @@ -760,11 +782,14 @@ export const RunCommand = cmd({ (e) => { const classified = classifySessionError(e) error = error ? error + EOL + classified.message : classified.message - emit("session_error", { - reason: classified.reason, - code: classified.code, - message: classified.message, - }) + if (!sessionErrorEmitted) { + sessionErrorEmitted = true + emit("session_error", { + reason: classified.reason, + code: classified.code, + message: classified.message, + }) + } emit("session_complete", { durationMs: Date.now() - startTime, error: classified.message, diff --git a/packages/cli/test/cli/classify-session-error.test.ts b/packages/cli/test/cli/classify-session-error.test.ts index b4a3d1d..f88c532 100644 --- a/packages/cli/test/cli/classify-session-error.test.ts +++ b/packages/cli/test/cli/classify-session-error.test.ts @@ -15,6 +15,18 @@ describe("classifySessionError (#63)", () => { expect(res.code).toBe("401") }) + test("stored ProviderAuthError → auth", () => { + const res = classifySessionError({ + name: "ProviderAuthError", + data: { + providerID: "openai", + message: "Invalid API key", + }, + }) + expect(res.reason).toBe("auth") + expect(res.message).toBe("Invalid API key") + }) + test("AbortError → timeout", () => { const err = new Error("aborted") err.name = "AbortError" @@ -34,4 +46,18 @@ describe("classifySessionError (#63)", () => { const res = classifySessionError({ status: 500, message: "internal" }) expect(res.reason).toBe("provider") }) + + test("stored APIError statusCode → provider", () => { + const res = classifySessionError({ + name: "APIError", + data: { + message: "internal", + statusCode: 500, + isRetryable: true, + }, + }) + expect(res.reason).toBe("provider") + expect(res.code).toBe("500") + expect(res.message).toBe("internal") + }) }) diff --git a/packages/cli/test/cli/run-schema-v1.test.ts b/packages/cli/test/cli/run-schema-v1.test.ts index 51a7b87..a26252f 100644 --- a/packages/cli/test/cli/run-schema-v1.test.ts +++ b/packages/cli/test/cli/run-schema-v1.test.ts @@ -42,6 +42,20 @@ describe("run.ts v1 schema emissions (#63)", () => { expect(errIdx).toBeLessThan(source.lastIndexOf('emit("session_complete"')) }) + test("primary session.error events are surfaced as session_error", async () => { + const source = await Bun.file(RUN_SRC).text() + const idx = source.indexOf('if (event.type === "session.error")') + expect(idx).toBeGreaterThan(-1) + // Bound the window to the *next* event handler so the assertions don't + // depend on a magic character count that must be bumped as the block grows. + const after = source.slice(idx + 1) + const nextHandlerOffset = after.indexOf('if (event.type === "') + const block = nextHandlerOffset === -1 ? after : after.slice(0, nextHandlerOffset) + expect(block).toContain("classifySessionError(props.error)") + expect(block).toContain('emit("session_error"') + expect(block).toContain('emit("error"') + }) + test("text / reasoning / tool_use include sequenceNum", async () => { const source = await Bun.file(RUN_SRC).text() expect(source).toContain("sequenceNum") diff --git a/packages/cli/test/cli/run-session-error-emit.test.ts b/packages/cli/test/cli/run-session-error-emit.test.ts new file mode 100644 index 0000000..1e4c60b --- /dev/null +++ b/packages/cli/test/cli/run-session-error-emit.test.ts @@ -0,0 +1,195 @@ +import path from "path" +import { describe, expect, test } from "bun:test" +import { fileURLToPath } from "url" + +// Behavioral regression test for aictrl-dev/cli#81: +// Verify that `session_error` is emitted when a `session.error` event is +// observed DURING the event loop (the new in-loop handler), not merely that +// the source text contains the right substrings. +// +// Round-1 test used --model nonexistent-provider/nonexistent-model which +// fails BEFORE the loop (model-resolution), exercising the pre-existing +// promptResult rejection handler at run.ts:~774 — NOT the new in-loop emit +// added in #81. Removing the new emit left the round-1 test green. +// +// This test uses --attach to a controlled mock server that emits a +// session.error event mid-stream, so the assertion fails if the new +// in-loop emit is removed. The test also asserts at-most-one session_error +// per run, validating the sessionErrorEmitted guard added in round-2. + +const __dirname = path.dirname(fileURLToPath(import.meta.url)) +const CLI_ENTRY = path.resolve(__dirname, "../../src/index.ts") + +const SESSION_ID = "sess-test-001" + +/** + * Minimal HTTP server that simulates an aictrl backend. + * Returns the server URL and a stop() function. + * + * The SDK wraps response bodies as { data: } automatically, so the + * server returns the raw data shapes (without an extra "data" envelope). + * Event payloads are returned in SSE format; the SDK parses the JSON lines. + */ +async function startMockServer(): Promise<{ url: string; stop: () => void }> { + const server = Bun.serve({ + port: 0, // OS-assigned free port + async fetch(req) { + const url = new URL(req.url) + + // List sessions (used by --continue; we return empty) + if (req.method === "GET" && url.pathname === "/session") { + return new Response(JSON.stringify([]), { + headers: { "content-type": "application/json" }, + }) + } + + // Create session + if (req.method === "POST" && url.pathname === "/session") { + return new Response(JSON.stringify({ id: SESSION_ID }), { + headers: { "content-type": "application/json" }, + }) + } + + // Get config (used by share check — empty data means auto-share is off) + if (req.method === "GET" && url.pathname === "/config") { + return new Response(JSON.stringify({}), { + headers: { "content-type": "application/json" }, + }) + } + + // Accept the prompt (session appears to start; any 2xx is fine) + if (req.method === "POST" && url.pathname.endsWith("/message")) { + return new Response(JSON.stringify({}), { + headers: { "content-type": "application/json" }, + }) + } + + // SSE event stream: emit session.error then session.status idle. + // session.error triggers the NEW in-loop handler added in PR #81. + // session.status idle breaks the loop so the process exits cleanly. + if (req.method === "GET" && url.pathname === "/event") { + const sessionError = { + type: "session.error", + properties: { + sessionID: SESSION_ID, + error: { + name: "ProviderAuthError", + message: "mock auth failure injected by test server", + }, + }, + } + const sessionIdle = { + type: "session.status", + properties: { + sessionID: SESSION_ID, + status: { type: "idle" }, + }, + } + const body = + `data: ${JSON.stringify(sessionError)}\n\n` + + `data: ${JSON.stringify(sessionIdle)}\n\n` + return new Response(body, { + headers: { + "content-type": "text/event-stream", + "cache-control": "no-cache", + }, + }) + } + + // Catch-all 404 (SDK surfaces as non-throwing error, not a panic) + return new Response(JSON.stringify({ error: "not found" }), { + status: 404, + headers: { "content-type": "application/json" }, + }) + }, + }) + + const url = `http://localhost:${server.port}` + const stop = () => server.stop(true) + return { url, stop } +} + +describe("run --format json session_error emission (#81 in-loop path)", () => { + test( + "session_error is emitted when session.error event fires during the loop (not just on pre-loop failure)", + async () => { + // Acceptance criterion: removing the emit("session_error", …) call inside + // the `if (event.type === "session.error")` block in run.ts must cause + // this test to fail. + const { url: serverUrl, stop } = await startMockServer() + + try { + // --attach routes through the SDK event stream; promptResult stays + // Promise.resolve() so the promptResult rejection handler (the pre-existing + // emit site) cannot fire here — only the in-loop handler can. + const proc = Bun.spawn( + [ + "bun", + "run", + CLI_ENTRY, + "run", + "--format", + "json", + "--attach", + serverUrl, + "test prompt", + ], + { + cwd: process.cwd(), + env: { + ...process.env, + ANTHROPIC_API_KEY: "", + OPENAI_API_KEY: "", + }, + stdout: "pipe", + stderr: "pipe", + }, + ) + + const timeout = setTimeout(() => { + proc.kill() + }, 15_000) + + const [stdout] = await Promise.all([ + new Response(proc.stdout).text(), + proc.exited, + ]) + clearTimeout(timeout) + + // Parse JSON lines from stdout, skip non-JSON diagnostic lines + const events: Array> = [] + for (const line of stdout.split("\n")) { + const trimmed = line.trim() + if (!trimmed) continue + try { + events.push(JSON.parse(trimmed)) + } catch { + // ignore non-JSON diagnostic output + } + } + + // The in-loop session.error handler must have emitted session_error + const sessionErrorEvents = events.filter((e) => e.type === "session_error") + expect(sessionErrorEvents.length).toBeGreaterThan(0) + + // The event must carry a classified reason from classifySessionError + const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"] + for (const ev of sessionErrorEvents) { + expect(typeof ev.reason).toBe("string") + expect(VALID_REASONS).toContain(ev.reason as string) + expect(typeof ev.message).toBe("string") + expect((ev.message as string).length).toBeGreaterThan(0) + } + + // At most one session_error per run — validates the sessionErrorEmitted guard + expect(sessionErrorEvents.length).toBe(1) + + // Process must have exited non-zero (the session failed) + expect(await proc.exited).not.toBe(0) + } finally { + stop() + } + }, + 20_000, + ) +})