Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions packages/cli/src/cli/cmd/run.errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export function classifySessionError(err: unknown): ClassifiedSessionError {

if (status === 429) return { reason: "rate_limit", code: "429", message }
if (status === 401 || status === 403) return { reason: "auth", code: String(status), message }
if (name === "ProviderAuthError") return { reason: "auth", code: status ? String(status) : undefined, message }
if (name === "AbortError" || /timeout/i.test(message)) {
return { reason: "timeout", code: status ? String(status) : undefined, message }
}
Expand All @@ -31,13 +32,18 @@ function extractMessage(err: unknown): string {
if (err instanceof Error) return err.message
if (typeof err === "string") return err
if (err && typeof err === "object" && "message" in err) return String((err as { message: unknown }).message)
if (err && typeof err === "object" && "data" in err) {
const data = (err as { data: unknown }).data
if (data && typeof data === "object" && "message" in data) return String((data as { message: unknown }).message)
}
return String(err)
}

function extractStatus(err: unknown): number | undefined {
if (err && typeof err === "object") {
const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown } }
const raw = e.status ?? e.statusCode ?? e.response?.status
const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown }; data?: unknown }
const data = e.data && typeof e.data === "object" ? (e.data as { status?: unknown; statusCode?: unknown }) : undefined
const raw = e.status ?? e.statusCode ?? e.response?.status ?? data?.status ?? data?.statusCode
if (typeof raw === "number") return raw
if (typeof raw === "string" && /^\d+$/.test(raw)) return Number(raw)
}
Expand Down
45 changes: 35 additions & 10 deletions packages/cli/src/cli/cmd/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,11 @@ export const RunCommand = cmd({

const events = await sdk.event.subscribe()
let error: string | undefined
// Guard: at most one session_error is emitted per run regardless of which
// failure path fires first (in-loop session.error handler, loopDone.catch,
// or promptResult rejection). Without this a mid-run session.error event
// can fire site 1 while promptResult/loop() also rejects and fires site 2/3.
let sessionErrorEmitted = false
const startTime = Date.now()
const childSessions = new Set<string>()
const seqBySession = new Map<string, number>()
Expand Down Expand Up @@ -571,6 +576,20 @@ export const RunCommand = cmd({
// spuriously-green job. process.exitCode (not process.exit) lets the
// loop drain to session.status idle and emit session_complete first.
process.exitCode = 1
if (!sessionErrorEmitted) {
sessionErrorEmitted = true
const classified = classifySessionError(props.error)
// Structured session_error (classified reason/code/message) is emitted for the
// primary session only. The generic "error" event below fires for both primary
// and child-session failures and carries the raw error object — these are
// intentionally distinct channels: session_error is for structured telemetry/CI
// consumers, "error" is the legacy observable for raw error pass-through.
emit("session_error", {
reason: classified.reason,
code: classified.code,
message: classified.message,
})
}
}
if (emit("error", { error: props.error, sourceSessionID: props.sessionID })) continue
UI.error(err)
Expand Down Expand Up @@ -713,11 +732,14 @@ export const RunCommand = cmd({
})
.catch((e) => {
const classified = classifySessionError(e)
emit("session_error", {
reason: classified.reason,
code: classified.code,
message: classified.message,
})
if (!sessionErrorEmitted) {
sessionErrorEmitted = true
emit("session_error", {
reason: classified.reason,
code: classified.code,
message: classified.message,
})
}
emit("session_complete", {
durationMs: Date.now() - startTime,
error: classified.message,
Expand Down Expand Up @@ -760,11 +782,14 @@ export const RunCommand = cmd({
(e) => {
const classified = classifySessionError(e)
error = error ? error + EOL + classified.message : classified.message
emit("session_error", {
reason: classified.reason,
code: classified.code,
message: classified.message,
})
if (!sessionErrorEmitted) {
sessionErrorEmitted = true
emit("session_error", {
reason: classified.reason,
code: classified.code,
message: classified.message,
})
}
emit("session_complete", {
durationMs: Date.now() - startTime,
error: classified.message,
Expand Down
26 changes: 26 additions & 0 deletions packages/cli/test/cli/classify-session-error.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ describe("classifySessionError (#63)", () => {
expect(res.code).toBe("401")
})

test("stored ProviderAuthError → auth", () => {
const res = classifySessionError({
name: "ProviderAuthError",
data: {
providerID: "openai",
message: "Invalid API key",
},
})
expect(res.reason).toBe("auth")
expect(res.message).toBe("Invalid API key")
})

test("AbortError → timeout", () => {
const err = new Error("aborted")
err.name = "AbortError"
Expand All @@ -34,4 +46,18 @@ describe("classifySessionError (#63)", () => {
const res = classifySessionError({ status: 500, message: "internal" })
expect(res.reason).toBe("provider")
})

test("stored APIError statusCode → provider", () => {
const res = classifySessionError({
name: "APIError",
data: {
message: "internal",
statusCode: 500,
isRetryable: true,
},
})
expect(res.reason).toBe("provider")
expect(res.code).toBe("500")
expect(res.message).toBe("internal")
})
})
14 changes: 14 additions & 0 deletions packages/cli/test/cli/run-schema-v1.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,20 @@ describe("run.ts v1 schema emissions (#63)", () => {
expect(errIdx).toBeLessThan(source.lastIndexOf('emit("session_complete"'))
})

test("primary session.error events are surfaced as session_error", async () => {
const source = await Bun.file(RUN_SRC).text()
const idx = source.indexOf('if (event.type === "session.error")')
expect(idx).toBeGreaterThan(-1)
// Bound the window to the *next* event handler so the assertions don't
// depend on a magic character count that must be bumped as the block grows.
const after = source.slice(idx + 1)
const nextHandlerOffset = after.indexOf('if (event.type === "')
const block = nextHandlerOffset === -1 ? after : after.slice(0, nextHandlerOffset)
expect(block).toContain("classifySessionError(props.error)")
expect(block).toContain('emit("session_error"')
expect(block).toContain('emit("error"')
})

test("text / reasoning / tool_use include sequenceNum", async () => {
const source = await Bun.file(RUN_SRC).text()
expect(source).toContain("sequenceNum")
Expand Down
195 changes: 195 additions & 0 deletions packages/cli/test/cli/run-session-error-emit.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
import path from "path"
import { describe, expect, test } from "bun:test"
import { fileURLToPath } from "url"

// Behavioral regression test for aictrl-dev/cli#81:
// Verify that `session_error` is emitted when a `session.error` event is
// observed DURING the event loop (the new in-loop handler), not merely that
// the source text contains the right substrings.
//
// Round-1 test used --model nonexistent-provider/nonexistent-model which
// fails BEFORE the loop (model-resolution), exercising the pre-existing
// promptResult rejection handler at run.ts:~774 — NOT the new in-loop emit
// added in #81. Removing the new emit left the round-1 test green.
//
// This test uses --attach to a controlled mock server that emits a
// session.error event mid-stream, so the assertion fails if the new
// in-loop emit is removed. The test also asserts at-most-one session_error
// per run, validating the sessionErrorEmitted guard added in round-2.

const __dirname = path.dirname(fileURLToPath(import.meta.url))
const CLI_ENTRY = path.resolve(__dirname, "../../src/index.ts")

const SESSION_ID = "sess-test-001"

/**
* Minimal HTTP server that simulates an aictrl backend.
* Returns the server URL and a stop() function.
*
* The SDK wraps response bodies as { data: <body> } automatically, so the
* server returns the raw data shapes (without an extra "data" envelope).
* Event payloads are returned in SSE format; the SDK parses the JSON lines.
*/
async function startMockServer(): Promise<{ url: string; stop: () => void }> {
const server = Bun.serve({
port: 0, // OS-assigned free port
async fetch(req) {
const url = new URL(req.url)

// List sessions (used by --continue; we return empty)
if (req.method === "GET" && url.pathname === "/session") {
return new Response(JSON.stringify([]), {
headers: { "content-type": "application/json" },
})
}

// Create session
if (req.method === "POST" && url.pathname === "/session") {
return new Response(JSON.stringify({ id: SESSION_ID }), {
headers: { "content-type": "application/json" },
})
}

// Get config (used by share check — empty data means auto-share is off)
if (req.method === "GET" && url.pathname === "/config") {
return new Response(JSON.stringify({}), {
headers: { "content-type": "application/json" },
})
}

// Accept the prompt (session appears to start; any 2xx is fine)
if (req.method === "POST" && url.pathname.endsWith("/message")) {
return new Response(JSON.stringify({}), {
headers: { "content-type": "application/json" },
})
}

// SSE event stream: emit session.error then session.status idle.
// session.error triggers the NEW in-loop handler added in PR #81.
// session.status idle breaks the loop so the process exits cleanly.
if (req.method === "GET" && url.pathname === "/event") {
const sessionError = {
type: "session.error",
properties: {
sessionID: SESSION_ID,
error: {
name: "ProviderAuthError",
message: "mock auth failure injected by test server",
},
},
}
const sessionIdle = {
type: "session.status",
properties: {
sessionID: SESSION_ID,
status: { type: "idle" },
},
}
const body =
`data: ${JSON.stringify(sessionError)}\n\n` +
`data: ${JSON.stringify(sessionIdle)}\n\n`
return new Response(body, {
headers: {
"content-type": "text/event-stream",
"cache-control": "no-cache",
},
})
}

// Catch-all 404 (SDK surfaces as non-throwing error, not a panic)
return new Response(JSON.stringify({ error: "not found" }), {
status: 404,
headers: { "content-type": "application/json" },
})
},
})

const url = `http://localhost:${server.port}`
const stop = () => server.stop(true)
return { url, stop }
}

describe("run --format json session_error emission (#81 in-loop path)", () => {
test(
"session_error is emitted when session.error event fires during the loop (not just on pre-loop failure)",
async () => {
// Acceptance criterion: removing the emit("session_error", …) call inside
// the `if (event.type === "session.error")` block in run.ts must cause
// this test to fail.
const { url: serverUrl, stop } = await startMockServer()

try {
// --attach routes through the SDK event stream; promptResult stays
// Promise.resolve() so the promptResult rejection handler (the pre-existing
// emit site) cannot fire here — only the in-loop handler can.
const proc = Bun.spawn(
[
"bun",
"run",
CLI_ENTRY,
"run",
"--format",
"json",
"--attach",
serverUrl,
"test prompt",
],
{
cwd: process.cwd(),
env: {
...process.env,
ANTHROPIC_API_KEY: "",
OPENAI_API_KEY: "",
},
stdout: "pipe",
stderr: "pipe",
},
)

const timeout = setTimeout(() => {
proc.kill()
}, 15_000)

const [stdout] = await Promise.all([
new Response(proc.stdout).text(),
proc.exited,
])
clearTimeout(timeout)

// Parse JSON lines from stdout, skip non-JSON diagnostic lines
const events: Array<Record<string, unknown>> = []
for (const line of stdout.split("\n")) {
const trimmed = line.trim()
if (!trimmed) continue
try {
events.push(JSON.parse(trimmed))
} catch {
// ignore non-JSON diagnostic output
}
}

// The in-loop session.error handler must have emitted session_error
const sessionErrorEvents = events.filter((e) => e.type === "session_error")
expect(sessionErrorEvents.length).toBeGreaterThan(0)

// The event must carry a classified reason from classifySessionError
const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"]
for (const ev of sessionErrorEvents) {
expect(typeof ev.reason).toBe("string")
expect(VALID_REASONS).toContain(ev.reason as string)
expect(typeof ev.message).toBe("string")
expect((ev.message as string).length).toBeGreaterThan(0)
}

// At most one session_error per run — validates the sessionErrorEmitted guard
expect(sessionErrorEvents.length).toBe(1)

// Process must have exited non-zero (the session failed)
expect(await proc.exited).not.toBe(0)
} finally {
stop()
}
},
20_000,
)
})