From 99693722253c3927cdb0a9f2567645165a3915a8 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Sat, 13 Jun 2026 14:02:16 +0100
Subject: [PATCH 1/3] fix(run): emit session_error for session failures

---
 packages/cli/src/cli/cmd/run.errors.ts        | 10 +++++--
 packages/cli/src/cli/cmd/run.ts               |  6 +++++
 .../test/cli/classify-session-error.test.ts   | 26 +++++++++++++++++++
 packages/cli/test/cli/run-schema-v1.test.ts   | 10 +++++++
 4 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/cli/cmd/run.errors.ts b/packages/cli/src/cli/cmd/run.errors.ts
index adb031e..229c061 100644
--- a/packages/cli/src/cli/cmd/run.errors.ts
+++ b/packages/cli/src/cli/cmd/run.errors.ts
@@ -15,6 +15,7 @@ export function classifySessionError(err: unknown): ClassifiedSessionError {
 
   if (status === 429) return { reason: "rate_limit", code: "429", message }
   if (status === 401 || status === 403) return { reason: "auth", code: String(status), message }
+  if (name === "ProviderAuthError") return { reason: "auth", code: status ? String(status) : undefined, message }
   if (name === "AbortError" || /timeout/i.test(message)) {
     return { reason: "timeout", code: status ? String(status) : undefined, message }
   }
@@ -31,13 +32,18 @@ function extractMessage(err: unknown): string {
   if (err instanceof Error) return err.message
   if (typeof err === "string") return err
   if (err && typeof err === "object" && "message" in err) return String((err as { message: unknown }).message)
+  if (err && typeof err === "object" && "data" in err) {
+    const data = (err as { data: unknown }).data
+    if (data && typeof data === "object" && "message" in data) return String((data as { message: unknown }).message)
+  }
   return String(err)
 }
 
 function extractStatus(err: unknown): number | undefined {
   if (err && typeof err === "object") {
-    const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown } }
-    const raw = e.status ?? e.statusCode ?? e.response?.status
+    const e = err as { status?: unknown; statusCode?: unknown; response?: { status?: unknown }; data?: unknown }
+    const data = e.data && typeof e.data === "object" ? (e.data as { status?: unknown; statusCode?: unknown }) : undefined
+    const raw = e.status ?? e.statusCode ?? e.response?.status ?? data?.status ?? data?.statusCode
     if (typeof raw === "number") return raw
     if (typeof raw === "string" && /^\d+$/.test(raw)) return Number(raw)
   }
diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
index a4cfb2b..f237a1c 100644
--- a/packages/cli/src/cli/cmd/run.ts
+++ b/packages/cli/src/cli/cmd/run.ts
@@ -571,6 +571,12 @@ export const RunCommand = cmd({
               // spuriously-green job. process.exitCode (not process.exit) lets the
               // loop drain to session.status idle and emit session_complete first.
               process.exitCode = 1
+              const classified = classifySessionError(props.error)
+              emit("session_error", {
+                reason: classified.reason,
+                code: classified.code,
+                message: classified.message,
+              })
             }
             if (emit("error", { error: props.error, sourceSessionID: props.sessionID })) continue
             UI.error(err)
diff --git a/packages/cli/test/cli/classify-session-error.test.ts b/packages/cli/test/cli/classify-session-error.test.ts
index b4a3d1d..f88c532 100644
--- a/packages/cli/test/cli/classify-session-error.test.ts
+++ b/packages/cli/test/cli/classify-session-error.test.ts
@@ -15,6 +15,18 @@ describe("classifySessionError (#63)", () => {
     expect(res.code).toBe("401")
   })
 
+  test("stored ProviderAuthError → auth", () => {
+    const res = classifySessionError({
+      name: "ProviderAuthError",
+      data: {
+        providerID: "openai",
+        message: "Invalid API key",
+      },
+    })
+    expect(res.reason).toBe("auth")
+    expect(res.message).toBe("Invalid API key")
+  })
+
   test("AbortError → timeout", () => {
     const err = new Error("aborted")
     err.name = "AbortError"
@@ -34,4 +46,18 @@ describe("classifySessionError (#63)", () => {
     const res = classifySessionError({ status: 500, message: "internal" })
     expect(res.reason).toBe("provider")
   })
+
+  test("stored APIError statusCode → provider", () => {
+    const res = classifySessionError({
+      name: "APIError",
+      data: {
+        message: "internal",
+        statusCode: 500,
+        isRetryable: true,
+      },
+    })
+    expect(res.reason).toBe("provider")
+    expect(res.code).toBe("500")
+    expect(res.message).toBe("internal")
+  })
 })
diff --git a/packages/cli/test/cli/run-schema-v1.test.ts b/packages/cli/test/cli/run-schema-v1.test.ts
index 51a7b87..ecdd0f1 100644
--- a/packages/cli/test/cli/run-schema-v1.test.ts
+++ b/packages/cli/test/cli/run-schema-v1.test.ts
@@ -42,6 +42,16 @@ describe("run.ts v1 schema emissions (#63)", () => {
     expect(errIdx).toBeLessThan(source.lastIndexOf('emit("session_complete"'))
   })
 
+  test("primary session.error events are surfaced as session_error", async () => {
+    const source = await Bun.file(RUN_SRC).text()
+    const idx = source.indexOf('if (event.type === "session.error")')
+    expect(idx).toBeGreaterThan(-1)
+    const block = source.slice(idx, idx + 1400)
+    expect(block).toContain("classifySessionError(props.error)")
+    expect(block).toContain('emit("session_error"')
+    expect(block).toContain('emit("error"')
+  })
+
   test("text / reasoning / tool_use include sequenceNum", async () => {
     const source = await Bun.file(RUN_SRC).text()
     expect(source).toContain("sequenceNum")

From 58e48e420afdcc7df2828319899aad1523730079 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Mon, 22 Jun 2026 08:00:47 +0100
Subject: [PATCH 2/3] fix(run): address code-review findings from PR #81

- Add behavioral test (run-session-error-emit.test.ts): spawns the CLI
  with --format json against a failing model and asserts the session_error
  JSON event is emitted with a classified reason/message. Closes the
  source-text-grep gap flagged by the bot.
- Fix brittle 1400-char window in run-schema-v1.test.ts: anchor the
  assertion window to the next event handler boundary instead of a magic
  character count, matching the pattern used in run-session-error-exit.test.ts.
- Add clarifying comment in run.ts session.error handler explaining that
  session_error (structured/classified) and error (raw/legacy) are
  intentionally distinct channels serving different consumers, not a
  double-count bug.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0187MsfK1upr6K2BKVbmaebQ
---
 packages/cli/src/cli/cmd/run.ts               |  5 ++
 packages/cli/test/cli/run-schema-v1.test.ts   |  6 +-
 .../test/cli/run-session-error-emit.test.ts   | 84 +++++++++++++++++++
 3 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 packages/cli/test/cli/run-session-error-emit.test.ts

diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
index f237a1c..c82d2f8 100644
--- a/packages/cli/src/cli/cmd/run.ts
+++ b/packages/cli/src/cli/cmd/run.ts
@@ -572,6 +572,11 @@ export const RunCommand = cmd({
               // loop drain to session.status idle and emit session_complete first.
               process.exitCode = 1
               const classified = classifySessionError(props.error)
+              // Structured session_error (classified reason/code/message) is emitted for the
+              // primary session only. The generic "error" event below fires for both primary
+              // and child-session failures and carries the raw error object — these are
+              // intentionally distinct channels: session_error is for structured telemetry/CI
+              // consumers, "error" is the legacy observable for raw error pass-through.
               emit("session_error", {
                 reason: classified.reason,
                 code: classified.code,
diff --git a/packages/cli/test/cli/run-schema-v1.test.ts b/packages/cli/test/cli/run-schema-v1.test.ts
index ecdd0f1..a26252f 100644
--- a/packages/cli/test/cli/run-schema-v1.test.ts
+++ b/packages/cli/test/cli/run-schema-v1.test.ts
@@ -46,7 +46,11 @@ describe("run.ts v1 schema emissions (#63)", () => {
     const source = await Bun.file(RUN_SRC).text()
     const idx = source.indexOf('if (event.type === "session.error")')
     expect(idx).toBeGreaterThan(-1)
-    const block = source.slice(idx, idx + 1400)
+    // Bound the window to the *next* event handler so the assertions don't
+    // depend on a magic character count that must be bumped as the block grows.
+    const after = source.slice(idx + 1)
+    const nextHandlerOffset = after.indexOf('if (event.type === "')
+    const block = nextHandlerOffset === -1 ? after : after.slice(0, nextHandlerOffset)
     expect(block).toContain("classifySessionError(props.error)")
     expect(block).toContain('emit("session_error"')
     expect(block).toContain('emit("error"')
diff --git a/packages/cli/test/cli/run-session-error-emit.test.ts b/packages/cli/test/cli/run-session-error-emit.test.ts
new file mode 100644
index 0000000..1829e05
--- /dev/null
+++ b/packages/cli/test/cli/run-session-error-emit.test.ts
@@ -0,0 +1,84 @@
+import path from "path"
+import { describe, expect, test } from "bun:test"
+import { fileURLToPath } from "url"
+
+// Behavioral regression test for aictrl-dev/cli#81:
+//   Verify that `session_error` is actually emitted as a JSON event when the
+//   session fails, not just that the source text contains the right substrings.
+//   The source-text assertions in run-schema-v1.test.ts cannot catch a mistyped
+//   event key, a missing import, or broken control flow — this test drives a
+//   real session failure and observes the emitted payload.
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url))
+const CLI_ENTRY = path.resolve(__dirname, "../../src/index.ts")
+
+describe("run --format json session_error emission (#81)", () => {
+  test("session_error event is emitted with a classified reason on session failure", async () => {
+    // Run with a nonexistent model to trigger a session failure.
+    // The CLI must emit a { type: "session_error", reason: string } JSON line
+    // to stdout before exiting non-zero.
+    const proc = Bun.spawn(
+      [
+        "bun",
+        "run",
+        "--conditions=browser",
+        CLI_ENTRY,
+        "run",
+        "--format",
+        "json",
+        "test prompt",
+        "--model",
+        "nonexistent-provider/nonexistent-model",
+      ],
+      {
+        cwd: process.cwd(),
+        env: {
+          ...process.env,
+          ANTHROPIC_API_KEY: "",
+          OPENAI_API_KEY: "",
+        },
+        stdout: "pipe",
+        stderr: "pipe",
+      },
+    )
+
+    const timeout = setTimeout(() => {
+      proc.kill()
+    }, 15_000)
+
+    const [stdout] = await Promise.all([
+      new Response(proc.stdout).text(),
+      proc.exited,
+    ])
+    clearTimeout(timeout)
+
+    // Parse JSON lines from stdout, skip non-JSON lines defensively
+    const events: Array<Record<string, unknown>> = []
+    for (const line of stdout.split("\n")) {
+      const trimmed = line.trim()
+      if (!trimmed) continue
+      try {
+        events.push(JSON.parse(trimmed))
+      } catch {
+        // ignore non-JSON diagnostic output
+      }
+    }
+
+    // At least one session_error event must have been emitted
+    const sessionErrorEvents = events.filter((e) => e.type === "session_error")
+    expect(sessionErrorEvents.length).toBeGreaterThan(0)
+
+    // The emitted event must carry a classified reason (one of the known values)
+    const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"]
+    for (const ev of sessionErrorEvents) {
+      expect(typeof ev.reason).toBe("string")
+      expect(VALID_REASONS).toContain(ev.reason as string)
+      // message must be present and non-empty
+      expect(typeof ev.message).toBe("string")
+      expect((ev.message as string).length).toBeGreaterThan(0)
+    }
+
+    // Process must have exited non-zero (the session failed)
+    expect(await proc.exited).not.toBe(0)
+  }, 20_000)
+})

From b09fec9d35deafd2b551258031042ca5a40d17f4 Mon Sep 17 00:00:00 2001
From: Bulat Yapparov <by4pparov@yandex.ru>
Date: Mon, 22 Jun 2026 10:40:21 +0100
Subject: [PATCH 3/3] fix(run): address round-2 review findings for PR #81
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Finding 1 (TRUE/FIX): replace subprocess test that used nonexistent-model
(exercised pre-existing promptResult rejection handler, not the new in-loop
emit) with a mock-server --attach test that injects a session.error event
mid-stream. Test now fails if the new in-loop emit("session_error",…) is removed.

Finding 2 (TRUE/FIX): drop --conditions=browser from the test subprocess.
The flag was cargo-culted from the dev script; no packages in this repo export
browser-specific conditions, so it was harmless but unexplained and potentially
misleading. Removed without replacement.

Finding 3 (TRUE/FIX): add a shared `let sessionErrorEmitted = false` flag in
execute() scope. All three session_error emit sites (in-loop session.error
handler, loopDone.catch, promptResult rejection handler) now short-circuit on
the flag so at most one session_error is emitted per run regardless of which
failure paths fire. New test also asserts count === 1.
---
 packages/cli/src/cli/cmd/run.ts               |  56 ++--
 .../test/cli/run-session-error-emit.test.ts   | 255 +++++++++++++-----
 2 files changed, 218 insertions(+), 93 deletions(-)

diff --git a/packages/cli/src/cli/cmd/run.ts b/packages/cli/src/cli/cmd/run.ts
index c82d2f8..b65e0c7 100644
--- a/packages/cli/src/cli/cmd/run.ts
+++ b/packages/cli/src/cli/cmd/run.ts
@@ -446,6 +446,11 @@ export const RunCommand = cmd({
 
       const events = await sdk.event.subscribe()
       let error: string | undefined
+      // Guard: at most one session_error is emitted per run regardless of which
+      // failure path fires first (in-loop session.error handler, loopDone.catch,
+      // or promptResult rejection). Without this a mid-run session.error event
+      // can fire site 1 while promptResult/loop() also rejects and fires site 2/3.
+      let sessionErrorEmitted = false
       const startTime = Date.now()
       const childSessions = new Set<string>()
       const seqBySession = new Map<string, number>()
@@ -571,17 +576,20 @@ export const RunCommand = cmd({
               // spuriously-green job. process.exitCode (not process.exit) lets the
               // loop drain to session.status idle and emit session_complete first.
               process.exitCode = 1
-              const classified = classifySessionError(props.error)
-              // Structured session_error (classified reason/code/message) is emitted for the
-              // primary session only. The generic "error" event below fires for both primary
-              // and child-session failures and carries the raw error object — these are
-              // intentionally distinct channels: session_error is for structured telemetry/CI
-              // consumers, "error" is the legacy observable for raw error pass-through.
-              emit("session_error", {
-                reason: classified.reason,
-                code: classified.code,
-                message: classified.message,
-              })
+              if (!sessionErrorEmitted) {
+                sessionErrorEmitted = true
+                const classified = classifySessionError(props.error)
+                // Structured session_error (classified reason/code/message) is emitted for the
+                // primary session only. The generic "error" event below fires for both primary
+                // and child-session failures and carries the raw error object — these are
+                // intentionally distinct channels: session_error is for structured telemetry/CI
+                // consumers, "error" is the legacy observable for raw error pass-through.
+                emit("session_error", {
+                  reason: classified.reason,
+                  code: classified.code,
+                  message: classified.message,
+                })
+              }
             }
             if (emit("error", { error: props.error, sourceSessionID: props.sessionID })) continue
             UI.error(err)
@@ -724,11 +732,14 @@ export const RunCommand = cmd({
         })
         .catch((e) => {
           const classified = classifySessionError(e)
-          emit("session_error", {
-            reason: classified.reason,
-            code: classified.code,
-            message: classified.message,
-          })
+          if (!sessionErrorEmitted) {
+            sessionErrorEmitted = true
+            emit("session_error", {
+              reason: classified.reason,
+              code: classified.code,
+              message: classified.message,
+            })
+          }
           emit("session_complete", {
             durationMs: Date.now() - startTime,
             error: classified.message,
@@ -771,11 +782,14 @@ export const RunCommand = cmd({
           (e) => {
             const classified = classifySessionError(e)
             error = error ? error + EOL + classified.message : classified.message
-            emit("session_error", {
-              reason: classified.reason,
-              code: classified.code,
-              message: classified.message,
-            })
+            if (!sessionErrorEmitted) {
+              sessionErrorEmitted = true
+              emit("session_error", {
+                reason: classified.reason,
+                code: classified.code,
+                message: classified.message,
+              })
+            }
             emit("session_complete", {
               durationMs: Date.now() - startTime,
               error: classified.message,
diff --git a/packages/cli/test/cli/run-session-error-emit.test.ts b/packages/cli/test/cli/run-session-error-emit.test.ts
index 1829e05..1e4c60b 100644
--- a/packages/cli/test/cli/run-session-error-emit.test.ts
+++ b/packages/cli/test/cli/run-session-error-emit.test.ts
@@ -3,82 +3,193 @@ import { describe, expect, test } from "bun:test"
 import { fileURLToPath } from "url"
 
 // Behavioral regression test for aictrl-dev/cli#81:
-//   Verify that `session_error` is actually emitted as a JSON event when the
-//   session fails, not just that the source text contains the right substrings.
-//   The source-text assertions in run-schema-v1.test.ts cannot catch a mistyped
-//   event key, a missing import, or broken control flow — this test drives a
-//   real session failure and observes the emitted payload.
+//   Verify that `session_error` is emitted when a `session.error` event is
+//   observed DURING the event loop (the new in-loop handler), not merely that
+//   the source text contains the right substrings.
+//
+//   Round-1 test used --model nonexistent-provider/nonexistent-model which
+//   fails BEFORE the loop (model-resolution), exercising the pre-existing
+//   promptResult rejection handler at run.ts:~774 — NOT the new in-loop emit
+//   added in #81.  Removing the new emit left the round-1 test green.
+//
+//   This test uses --attach to a controlled mock server that emits a
+//   session.error event mid-stream, so the assertion fails if the new
+//   in-loop emit is removed.  The test also asserts at-most-one session_error
+//   per run, validating the sessionErrorEmitted guard added in round-2.
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url))
 const CLI_ENTRY = path.resolve(__dirname, "../../src/index.ts")
 
-describe("run --format json session_error emission (#81)", () => {
-  test("session_error event is emitted with a classified reason on session failure", async () => {
-    // Run with a nonexistent model to trigger a session failure.
-    // The CLI must emit a { type: "session_error", reason: string } JSON line
-    // to stdout before exiting non-zero.
-    const proc = Bun.spawn(
-      [
-        "bun",
-        "run",
-        "--conditions=browser",
-        CLI_ENTRY,
-        "run",
-        "--format",
-        "json",
-        "test prompt",
-        "--model",
-        "nonexistent-provider/nonexistent-model",
-      ],
-      {
-        cwd: process.cwd(),
-        env: {
-          ...process.env,
-          ANTHROPIC_API_KEY: "",
-          OPENAI_API_KEY: "",
-        },
-        stdout: "pipe",
-        stderr: "pipe",
-      },
-    )
-
-    const timeout = setTimeout(() => {
-      proc.kill()
-    }, 15_000)
-
-    const [stdout] = await Promise.all([
-      new Response(proc.stdout).text(),
-      proc.exited,
-    ])
-    clearTimeout(timeout)
-
-    // Parse JSON lines from stdout, skip non-JSON lines defensively
-    const events: Array<Record<string, unknown>> = []
-    for (const line of stdout.split("\n")) {
-      const trimmed = line.trim()
-      if (!trimmed) continue
+const SESSION_ID = "sess-test-001"
+
+/**
+ * Minimal HTTP server that simulates an aictrl backend.
+ * Returns the server URL and a stop() function.
+ *
+ * The SDK wraps response bodies as { data: <body> } automatically, so the
+ * server returns the raw data shapes (without an extra "data" envelope).
+ * Event payloads are returned in SSE format; the SDK parses the JSON lines.
+ */
+async function startMockServer(): Promise<{ url: string; stop: () => void }> {
+  const server = Bun.serve({
+    port: 0, // OS-assigned free port
+    async fetch(req) {
+      const url = new URL(req.url)
+
+      // List sessions (used by --continue; we return empty)
+      if (req.method === "GET" && url.pathname === "/session") {
+        return new Response(JSON.stringify([]), {
+          headers: { "content-type": "application/json" },
+        })
+      }
+
+      // Create session
+      if (req.method === "POST" && url.pathname === "/session") {
+        return new Response(JSON.stringify({ id: SESSION_ID }), {
+          headers: { "content-type": "application/json" },
+        })
+      }
+
+      // Get config (used by share check — empty data means auto-share is off)
+      if (req.method === "GET" && url.pathname === "/config") {
+        return new Response(JSON.stringify({}), {
+          headers: { "content-type": "application/json" },
+        })
+      }
+
+      // Accept the prompt (session appears to start; any 2xx is fine)
+      if (req.method === "POST" && url.pathname.endsWith("/message")) {
+        return new Response(JSON.stringify({}), {
+          headers: { "content-type": "application/json" },
+        })
+      }
+
+      // SSE event stream: emit session.error then session.status idle.
+      // session.error triggers the NEW in-loop handler added in PR #81.
+      // session.status idle breaks the loop so the process exits cleanly.
+      if (req.method === "GET" && url.pathname === "/event") {
+        const sessionError = {
+          type: "session.error",
+          properties: {
+            sessionID: SESSION_ID,
+            error: {
+              name: "ProviderAuthError",
+              message: "mock auth failure injected by test server",
+            },
+          },
+        }
+        const sessionIdle = {
+          type: "session.status",
+          properties: {
+            sessionID: SESSION_ID,
+            status: { type: "idle" },
+          },
+        }
+        const body =
+          `data: ${JSON.stringify(sessionError)}\n\n` +
+          `data: ${JSON.stringify(sessionIdle)}\n\n`
+        return new Response(body, {
+          headers: {
+            "content-type": "text/event-stream",
+            "cache-control": "no-cache",
+          },
+        })
+      }
+
+      // Catch-all 404 (SDK surfaces as non-throwing error, not a panic)
+      return new Response(JSON.stringify({ error: "not found" }), {
+        status: 404,
+        headers: { "content-type": "application/json" },
+      })
+    },
+  })
+
+  const url = `http://localhost:${server.port}`
+  const stop = () => server.stop(true)
+  return { url, stop }
+}
+
+describe("run --format json session_error emission (#81 in-loop path)", () => {
+  test(
+    "session_error is emitted when session.error event fires during the loop (not just on pre-loop failure)",
+    async () => {
+      // Acceptance criterion: removing the emit("session_error", …) call inside
+      // the `if (event.type === "session.error")` block in run.ts must cause
+      // this test to fail.
+      const { url: serverUrl, stop } = await startMockServer()
+
       try {
-        events.push(JSON.parse(trimmed))
-      } catch {
-        // ignore non-JSON diagnostic output
+        // --attach routes through the SDK event stream; promptResult stays
+        // Promise.resolve() so the promptResult rejection handler (the pre-existing
+        // emit site) cannot fire here — only the in-loop handler can.
+        const proc = Bun.spawn(
+          [
+            "bun",
+            "run",
+            CLI_ENTRY,
+            "run",
+            "--format",
+            "json",
+            "--attach",
+            serverUrl,
+            "test prompt",
+          ],
+          {
+            cwd: process.cwd(),
+            env: {
+              ...process.env,
+              ANTHROPIC_API_KEY: "",
+              OPENAI_API_KEY: "",
+            },
+            stdout: "pipe",
+            stderr: "pipe",
+          },
+        )
+
+        const timeout = setTimeout(() => {
+          proc.kill()
+        }, 15_000)
+
+        const [stdout] = await Promise.all([
+          new Response(proc.stdout).text(),
+          proc.exited,
+        ])
+        clearTimeout(timeout)
+
+        // Parse JSON lines from stdout, skip non-JSON diagnostic lines
+        const events: Array<Record<string, unknown>> = []
+        for (const line of stdout.split("\n")) {
+          const trimmed = line.trim()
+          if (!trimmed) continue
+          try {
+            events.push(JSON.parse(trimmed))
+          } catch {
+            // ignore non-JSON diagnostic output
+          }
+        }
+
+        // The in-loop session.error handler must have emitted session_error
+        const sessionErrorEvents = events.filter((e) => e.type === "session_error")
+        expect(sessionErrorEvents.length).toBeGreaterThan(0)
+
+        // The event must carry a classified reason from classifySessionError
+        const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"]
+        for (const ev of sessionErrorEvents) {
+          expect(typeof ev.reason).toBe("string")
+          expect(VALID_REASONS).toContain(ev.reason as string)
+          expect(typeof ev.message).toBe("string")
+          expect((ev.message as string).length).toBeGreaterThan(0)
+        }
+
+        // At most one session_error per run — validates the sessionErrorEmitted guard
+        expect(sessionErrorEvents.length).toBe(1)
+
+        // Process must have exited non-zero (the session failed)
+        expect(await proc.exited).not.toBe(0)
+      } finally {
+        stop()
       }
-    }
-
-    // At least one session_error event must have been emitted
-    const sessionErrorEvents = events.filter((e) => e.type === "session_error")
-    expect(sessionErrorEvents.length).toBeGreaterThan(0)
-
-    // The emitted event must carry a classified reason (one of the known values)
-    const VALID_REASONS = ["rate_limit", "auth", "timeout", "oom", "provider", "unknown"]
-    for (const ev of sessionErrorEvents) {
-      expect(typeof ev.reason).toBe("string")
-      expect(VALID_REASONS).toContain(ev.reason as string)
-      // message must be present and non-empty
-      expect(typeof ev.message).toBe("string")
-      expect((ev.message as string).length).toBeGreaterThan(0)
-    }
-
-    // Process must have exited non-zero (the session failed)
-    expect(await proc.exited).not.toBe(0)
-  }, 20_000)
+    },
+    20_000,
+  )
 })