From 959e93583c746f8644bc27c4e8bb171ed6dd0921 Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 17 Jun 2026 11:29:31 +0200
Subject: [PATCH 1/3] fix(results): handle replay row schema aliases

---
 apps/cli/src/commands/eval/artifact-writer.ts |  23 ++-
 apps/cli/src/commands/inspect/filter.ts       |   5 +-
 apps/cli/src/commands/inspect/search.ts       |   8 +-
 apps/cli/src/commands/inspect/utils.ts        |  10 +-
 apps/cli/src/commands/results/manifest.ts     |  23 ++-
 .../src/commands/results/result-row-schema.ts | 163 ++++++++++++++++++
 .../commands/eval/artifact-writer.test.ts     |  29 ++++
 apps/cli/test/commands/inspect/filter.test.ts |  26 +++
 apps/cli/test/commands/results/shared.test.ts |  27 +++
 .../fixtures/results/camel-replay/index.jsonl |   1 +
 10 files changed, 296 insertions(+), 19 deletions(-)
 create mode 100644 apps/cli/src/commands/results/result-row-schema.ts
 create mode 100644 apps/cli/test/fixtures/results/camel-replay/index.jsonl

diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts
index f322a2cd3..e3d2137d4 100644
--- a/apps/cli/src/commands/eval/artifact-writer.ts
+++ b/apps/cli/src/commands/eval/artifact-writer.ts
@@ -15,6 +15,7 @@ import {
   toTraceEnvelopeWire,
   traceToTranscriptJsonLines,
 } from '@agentv/core';
+import { normalizeResultRow } from '../results/result-row-schema.js';
 import { RESULT_INDEX_FILENAME } from './result-layout.js';
 import {
   type MaterializedTaskBundlePaths,
@@ -1091,21 +1092,27 @@ function normalizeParsedResult(value: unknown): ParsedEvaluationResult | undefin
 export function parseJsonlResults(content: string): EvaluationResult[] {
   const results: EvaluationResult[] = [];
   const lines = content.split('\n');
-  for (const line of lines) {
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
     const trimmed = line.trim();
     if (trimmed.length === 0) {
       continue;
     }
+    let parsed: unknown;
     try {
-      const parsed = JSON.parse(trimmed);
-      // JSONL files from AgentV use snake_case; convert back to camelCase
-      const camelCased = toCamelCaseDeep(parsed);
-      const normalized = normalizeParsedResult(camelCased);
-      if (normalized) {
-        results.push(normalized);
-      }
+      parsed = JSON.parse(trimmed);
     } catch {
       // Skip malformed lines
+      continue;
+    }
+
+    // JSONL files from AgentV use snake_case; convert supported historical aliases
+    // to canonical snake_case before mapping into TypeScript internals.
+    const canonicalRow = normalizeResultRow(parsed, { lineNumber: i + 1 });
+    const camelCased = toCamelCaseDeep(canonicalRow);
+    const normalized = normalizeParsedResult(camelCased);
+    if (normalized) {
+      results.push(normalized);
     }
   }
   return results;
diff --git a/apps/cli/src/commands/inspect/filter.ts b/apps/cli/src/commands/inspect/filter.ts
index 89f81c77d..011723266 100644
--- a/apps/cli/src/commands/inspect/filter.ts
+++ b/apps/cli/src/commands/inspect/filter.ts
@@ -14,6 +14,7 @@
 import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
 import path from 'node:path';
 import { command, number, oneOf, option, optional, positional, string } from 'cmd-ts';
+import { normalizeResultRow } from '../results/result-row-schema.js';
 import { c, formatScore, padLeft, padRight } from './utils.js';
 
 /** A lightweight result record with fields needed for filtering. */
@@ -105,13 +106,15 @@ export function parseFilterableRecords(filePath: string): FilterableRecord[] {
   const lines = content.split('\n').filter((line) => line.trim());
   const records: FilterableRecord[] = [];
 
-  for (const line of lines) {
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
     let raw: Record<string, unknown>;
     try {
       raw = JSON.parse(line) as Record<string, unknown>;
     } catch {
       continue;
     }
+    raw = normalizeResultRow(raw, { lineNumber: i + 1, sourceLabel: filePath });
 
     // Determine experiment from record or from directory path
     let experiment = typeof raw.experiment === 'string' ? raw.experiment : undefined;
diff --git a/apps/cli/src/commands/inspect/search.ts b/apps/cli/src/commands/inspect/search.ts
index 7c551742c..480bc6025 100644
--- a/apps/cli/src/commands/inspect/search.ts
+++ b/apps/cli/src/commands/inspect/search.ts
@@ -110,9 +110,11 @@ export function searchJsonlFile(
     const testId =
       typeof record.test_id === 'string'
         ? record.test_id
-        : typeof record.source === 'object' && record.source !== null
-          ? ((record.source as Record<string, unknown>).session_id as string | undefined)
-          : undefined;
+        : typeof record.testId === 'string'
+          ? record.testId
+          : typeof record.source === 'object' && record.source !== null
+            ? ((record.source as Record<string, unknown>).session_id as string | undefined)
+            : undefined;
 
     // Apply metadata filters before regex search
     if (targetFilter && target !== targetFilter) continue;
diff --git a/apps/cli/src/commands/inspect/utils.ts b/apps/cli/src/commands/inspect/utils.ts
index 25399ed20..ae2e34ec9 100644
--- a/apps/cli/src/commands/inspect/utils.ts
+++ b/apps/cli/src/commands/inspect/utils.ts
@@ -9,6 +9,7 @@ import {
   resolveWorkspaceOrFilePath,
 } from '../eval/result-layout.js';
 import { loadManifestResults } from '../results/manifest.js';
+import { normalizeResultRow } from '../results/result-row-schema.js';
 
 // ANSI color codes (no dependency needed)
 const colors = {
@@ -133,11 +134,10 @@ function loadJsonlRecords(filePath: string): RawResult[] {
     .filter((line) => line.trim());
 
   return lines.map((line, i) => {
-    const record = JSON.parse(line) as RawResult;
-    if (typeof record.score !== 'number') {
-      throw new Error(`Missing or invalid score in result at line ${i + 1}: ${line.slice(0, 100)}`);
-    }
-    return record;
+    return normalizeResultRow(JSON.parse(line), {
+      lineNumber: i + 1,
+      sourceLabel: filePath,
+    }) as unknown as RawResult;
   });
 }
 
diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts
index 99dd71993..65044552e 100644
--- a/apps/cli/src/commands/results/manifest.ts
+++ b/apps/cli/src/commands/results/manifest.ts
@@ -3,8 +3,10 @@ import path from 'node:path';
 
 import {
   type EvaluationResult,
+  type TraceSummary,
   type TranscriptJsonLine,
   buildTraceFromMessages,
+  toCamelCaseDeep,
   traceFromTranscriptJsonLines,
 } from '@agentv/core';
 
@@ -14,6 +16,7 @@ import {
   isDirectoryPath,
   resolveRunManifestPath,
 } from '../eval/result-layout.js';
+import { normalizeResultRow } from './result-row-schema.js';
 
 export interface ResultManifestRecord {
   readonly timestamp?: string;
@@ -33,6 +36,7 @@ export interface ResultManifestRecord {
     readonly output?: number;
     readonly reasoning?: number;
   };
+  readonly trace?: Record<string, unknown>;
   readonly grading_path?: string;
   readonly timing_path?: string;
   readonly input_path?: string;
@@ -57,6 +61,20 @@ function parseJsonlLines<T>(content: string): T[] {
     .map((line) => JSON.parse(line) as T);
 }
 
+function parseResultRows(content: string, sourceLabel?: string): ResultManifestRecord[] {
+  return content
+    .split(/\r?\n/)
+    .map((line, index) => ({ line: line.trim(), lineNumber: index + 1 }))
+    .filter(({ line }) => line.length > 0)
+    .map(
+      ({ line, lineNumber }) =>
+        normalizeResultRow(JSON.parse(line), {
+          lineNumber,
+          sourceLabel,
+        }) as unknown as ResultManifestRecord,
+    );
+}
+
 function parseMarkdownMessages(content: string): { role: string; content: string }[] {
   const trimmed = content.trim();
   if (!trimmed.startsWith('@[')) {
@@ -138,6 +156,7 @@ function hydrateTrace(baseDir: string, record: ResultManifestRecord): Evaluation
   return buildTraceFromMessages({
     input: hydrateInput(baseDir, record),
     output: output ? [{ role: 'assistant', content: output }] : [],
+    summary: record.trace ? (toCamelCaseDeep(record.trace) as TraceSummary) : undefined,
     finalOutput: output,
     target: record.target,
     testId: record.test_id,
@@ -205,7 +224,7 @@ function hydrateManifestRecord(baseDir: string, record: ResultManifestRecord): E
 }
 
 export function parseResultManifest(content: string): ResultManifestRecord[] {
-  return parseJsonlLines<ResultManifestRecord>(content);
+  return parseResultRows(content);
 }
 
 export function resolveResultSourcePath(source: string, cwd?: string): string {
@@ -219,7 +238,7 @@ export function resolveResultSourcePath(source: string, cwd?: string): string {
 export function loadManifestResults(sourceFile: string): EvaluationResult[] {
   const resolvedSourceFile = resolveRunManifestPath(sourceFile);
   const content = readFileSync(resolvedSourceFile, 'utf8');
-  const records = parseResultManifest(content);
+  const records = parseResultRows(content, resolvedSourceFile);
   const baseDir = path.dirname(resolvedSourceFile);
   return records.map((record) => hydrateManifestRecord(baseDir, record));
 }
diff --git a/apps/cli/src/commands/results/result-row-schema.ts b/apps/cli/src/commands/results/result-row-schema.ts
new file mode 100644
index 000000000..7ff85cdac
--- /dev/null
+++ b/apps/cli/src/commands/results/result-row-schema.ts
@@ -0,0 +1,163 @@
+/**
+ * Result JSONL row schema used by CLI result readers.
+ *
+ * Canonical AgentV run manifests are `index.jsonl` files with snake_case keys
+ * and a numeric `score`. Historical rows produced from TypeScript
+ * `EvaluationResult` objects may contain a small set of camelCase aliases.
+ * Normalize those aliases only at this file/CLI boundary; callers should work
+ * with the canonical snake_case row shape or convert once into TypeScript
+ * internals.
+ *
+ * Eval case rows are not result rows: they usually contain `id`/`prompt` but no
+ * `score`. Reject them with migration guidance instead of treating them as
+ * failed results with an unknown test ID.
+ */
+
+export class ResultRowSchemaError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = 'ResultRowSchemaError';
+  }
+}
+
+const MIGRATION_GUIDANCE =
+  'Expected an AgentV result row with a numeric score. Eval-case JSONL is input data, not a results artifact. Run `agentv eval <eval-file> --output <run-dir>` and pass the run workspace or its index.jsonl manifest.';
+
+const RESULT_ROW_ALIASES = {
+  answerPath: 'answer_path',
+  artifactDir: 'artifact_dir',
+  conversationId: 'conversation_id',
+  costUsd: 'cost_usd',
+  durationMs: 'duration_ms',
+  endTime: 'end_time',
+  evalPath: 'eval_path',
+  executionStatus: 'execution_status',
+  failureReasonCode: 'failure_reason_code',
+  failureStage: 'failure_stage',
+  filesPath: 'files_path',
+  gradersPath: 'graders_path',
+  gradingPath: 'grading_path',
+  inputPath: 'input_path',
+  outputPath: 'output_path',
+  responsePath: 'response_path',
+  startTime: 'start_time',
+  targetsPath: 'targets_path',
+  taskDir: 'task_dir',
+  testId: 'test_id',
+  timingPath: 'timing_path',
+  tokenUsage: 'token_usage',
+  transcriptPath: 'transcript_path',
+  workspacePath: 'workspace_path',
+} as const;
+
+const TRACE_SUMMARY_ALIASES = {
+  costUsd: 'cost_usd',
+  durationMs: 'duration_ms',
+  errorCount: 'error_count',
+  eventCount: 'event_count',
+  llmCallCount: 'llm_call_count',
+  tokenUsage: 'token_usage',
+  toolCalls: 'tool_calls',
+  toolDurations: 'tool_durations',
+} as const;
+
+const MESSAGE_ALIASES = {
+  durationMs: 'duration_ms',
+  endTime: 'end_time',
+  startTime: 'start_time',
+  tokenUsage: 'token_usage',
+  toolCalls: 'tool_calls',
+} as const;
+
+const TOOL_CALL_ALIASES = {
+  durationMs: 'duration_ms',
+  endTime: 'end_time',
+  startTime: 'start_time',
+} as const;
+
+type AliasMap = Readonly<Record<string, string>>;
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function normalizeKnownAliases(value: Record<string, unknown>, aliases: AliasMap) {
+  const normalized = { ...value };
+  for (const [camelKey, snakeKey] of Object.entries(aliases)) {
+    if (normalized[snakeKey] === undefined && normalized[camelKey] !== undefined) {
+      normalized[snakeKey] = normalized[camelKey];
+    }
+    if (camelKey !== snakeKey) {
+      delete normalized[camelKey];
+    }
+  }
+  return normalized;
+}
+
+function normalizeToolCall(value: unknown): unknown {
+  if (!isRecord(value)) {
+    return value;
+  }
+  return normalizeKnownAliases(value, TOOL_CALL_ALIASES);
+}
+
+function normalizeMessage(value: unknown): unknown {
+  if (!isRecord(value)) {
+    return value;
+  }
+
+  const normalized = normalizeKnownAliases(value, MESSAGE_ALIASES);
+  if (Array.isArray(normalized.tool_calls)) {
+    normalized.tool_calls = normalized.tool_calls.map(normalizeToolCall);
+  }
+  return normalized;
+}
+
+function normalizeTraceSummary(value: unknown): unknown {
+  if (!isRecord(value)) {
+    return value;
+  }
+
+  const normalized = normalizeKnownAliases(value, TRACE_SUMMARY_ALIASES);
+  if (Array.isArray(normalized.messages)) {
+    normalized.messages = normalized.messages.map(normalizeMessage);
+  }
+  return normalized;
+}
+
+function normalizeOutput(value: unknown): unknown {
+  if (!Array.isArray(value)) {
+    return value;
+  }
+  return value.map(normalizeMessage);
+}
+
+function buildSchemaError(context: {
+  lineNumber?: number;
+  sourceLabel?: string;
+}): ResultRowSchemaError {
+  const location = [
+    context.sourceLabel ? ` in ${context.sourceLabel}` : '',
+    context.lineNumber !== undefined ? ` at line ${context.lineNumber}` : '',
+  ].join('');
+  return new ResultRowSchemaError(`Unsupported result row${location}. ${MIGRATION_GUIDANCE}`);
+}
+
+export function normalizeResultRow(
+  value: unknown,
+  context: { lineNumber?: number; sourceLabel?: string } = {},
+): Record<string, unknown> {
+  if (!isRecord(value)) {
+    throw buildSchemaError(context);
+  }
+
+  const normalized = normalizeKnownAliases(value, RESULT_ROW_ALIASES);
+  normalized.trace = normalizeTraceSummary(normalized.trace);
+  normalized.output = normalizeOutput(normalized.output);
+
+  if (typeof normalized.score !== 'number' || !Number.isFinite(normalized.score)) {
+    throw buildSchemaError(context);
+  }
+
+  return normalized;
+}
diff --git a/apps/cli/test/commands/eval/artifact-writer.test.ts b/apps/cli/test/commands/eval/artifact-writer.test.ts
index 8926f2c6f..c86f29ed8 100644
--- a/apps/cli/test/commands/eval/artifact-writer.test.ts
+++ b/apps/cli/test/commands/eval/artifact-writer.test.ts
@@ -562,6 +562,29 @@ describe('parseJsonlResults', () => {
     expect(results[1].testId).toBe('b');
   });
 
+  it('normalizes historical camelCase result row aliases', () => {
+    const content = `${JSON.stringify({
+      testId: 'wtg-replay-fail',
+      target: 'codex',
+      score: 0.4,
+      executionStatus: 'quality_failure',
+      durationMs: 1234,
+      tokenUsage: { input: 10, output: 5 },
+      costUsd: 0.012,
+      trace: { eventCount: 1, toolCalls: { rg: 1 }, errorCount: 0 },
+    })}\n`;
+
+    const results = parseJsonlResults(content);
+
+    expect(results).toHaveLength(1);
+    expect(results[0].testId).toBe('wtg-replay-fail');
+    expect(results[0].executionStatus).toBe('quality_failure');
+    expect(results[0].durationMs).toBe(1234);
+    expect(results[0].tokenUsage).toEqual({ input: 10, output: 5 });
+    expect(results[0].costUsd).toBe(0.012);
+    expect(results[0].trace.toolCalls).toEqual({ rg: 1 });
+  });
+
   it('handles empty content', () => {
     expect(parseJsonlResults('')).toHaveLength(0);
   });
@@ -577,6 +600,12 @@ describe('parseJsonlResults', () => {
     const content = `${good}\nnot json\n`;
     expect(parseJsonlResults(content)).toHaveLength(1);
   });
+
+  it('rejects eval-case-only rows with migration guidance', () => {
+    const content = `${JSON.stringify({ id: 'case-a', prompt: 'What is 2 + 2?' })}\n`;
+
+    expect(() => parseJsonlResults(content)).toThrow(/Eval-case JSONL is input data/);
+  });
 });
 
 // ---------------------------------------------------------------------------
diff --git a/apps/cli/test/commands/inspect/filter.test.ts b/apps/cli/test/commands/inspect/filter.test.ts
index 7bbb56c03..3a06e2d42 100644
--- a/apps/cli/test/commands/inspect/filter.test.ts
+++ b/apps/cli/test/commands/inspect/filter.test.ts
@@ -109,6 +109,25 @@ describe('inspect filter', () => {
       expect(records[0].tool_names).toContain('read_file');
     });
 
+    it('normalizes historical camelCase trace tool summaries', () => {
+      const record = JSON.stringify({
+        testId: 'wtg-replay-fail',
+        target: 'codex',
+        score: 0.4,
+        executionStatus: 'quality_failure',
+        trace: { toolCalls: { rg: 1 } },
+      });
+      const filePath = path.join(tempDir, 'index.jsonl');
+      writeFileSync(filePath, `${record}\n`);
+
+      const records = parseFilterableRecords(filePath);
+
+      expect(records).toHaveLength(1);
+      expect(records[0].test_id).toBe('wtg-replay-fail');
+      expect(records[0].execution_status).toBe('quality_failure');
+      expect(records[0].tool_names).toContain('rg');
+    });
+
     it('returns empty array for unreadable files', () => {
       const records = parseFilterableRecords(path.join(tempDir, 'nonexistent.jsonl'));
 
@@ -154,6 +173,13 @@ describe('inspect filter', () => {
       expect(records).toHaveLength(1);
       expect(records[0].test_id).toBe('unknown');
     });
+
+    it('rejects eval-case-only rows with migration guidance', () => {
+      const filePath = path.join(tempDir, 'index.jsonl');
+      writeFileSync(filePath, '{"id":"case-a","prompt":"What is 2 + 2?"}\n');
+
+      expect(() => parseFilterableRecords(filePath)).toThrow(/Eval-case JSONL is input data/);
+    });
   });
 
   describe('buildFilterPredicate', () => {
diff --git a/apps/cli/test/commands/results/shared.test.ts b/apps/cli/test/commands/results/shared.test.ts
index 2cd110fc3..ee32aa5eb 100644
--- a/apps/cli/test/commands/results/shared.test.ts
+++ b/apps/cli/test/commands/results/shared.test.ts
@@ -4,6 +4,7 @@ import { tmpdir } from 'node:os';
 import path from 'node:path';
 
 import { resolveRunManifestPath } from '../../../src/commands/eval/result-layout.js';
+import { loadManifestResults } from '../../../src/commands/results/manifest.js';
 import { resolveSourceFile } from '../../../src/commands/results/shared.js';
 
 describe('results shared source resolution', () => {
@@ -60,4 +61,30 @@ describe('results shared source resolution', () => {
       'Expected a run workspace directory or index.jsonl manifest',
     );
   });
+
+  it('normalizes historical camelCase replay rows when loading manifests', () => {
+    const fixturePath = path.join(
+      process.cwd(),
+      'apps/cli/test/fixtures/results/camel-replay/index.jsonl',
+    );
+
+    const results = loadManifestResults(fixturePath);
+
+    expect(results).toHaveLength(1);
+    expect(results[0].testId).toBe('wtg-replay-fail');
+    expect(results[0].executionStatus).toBe('quality_failure');
+    expect(results[0].durationMs).toBe(1234);
+    expect(results[0].tokenUsage).toEqual({ input: 10, output: 5 });
+    expect(results[0].costUsd).toBe(0.012);
+    expect(results[0].trace.toolCalls).toEqual({ rg: 1 });
+  });
+
+  it('rejects eval-case-only rows with migration guidance', () => {
+    const runDir = path.join(tempDir, '.agentv', 'results', 'runs', '2026-03-25T10-00-00-000Z');
+    mkdirSync(runDir, { recursive: true });
+    const indexPath = path.join(runDir, 'index.jsonl');
+    writeFileSync(indexPath, '{"id":"case-a","prompt":"What is 2 + 2?"}\n');
+
+    expect(() => loadManifestResults(indexPath)).toThrow(/Eval-case JSONL is input data/);
+  });
 });
diff --git a/apps/cli/test/fixtures/results/camel-replay/index.jsonl b/apps/cli/test/fixtures/results/camel-replay/index.jsonl
new file mode 100644
index 000000000..c4f57237e
--- /dev/null
+++ b/apps/cli/test/fixtures/results/camel-replay/index.jsonl
@@ -0,0 +1 @@
+{"testId":"wtg-replay-fail","target":"codex","score":0.4,"executionStatus":"quality_failure","durationMs":1234,"tokenUsage":{"input":10,"output":5},"costUsd":0.012,"trace":{"eventCount":1,"toolCalls":{"rg":1},"errorCount":0}}

From c234fd045a549f4f85f4e9d07a5b70a3e13b1e6d Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 17 Jun 2026 12:47:07 +0200
Subject: [PATCH 2/3] fix(inspect): preserve trace invalid score errors

---
 apps/cli/src/commands/inspect/utils.ts | 38 ++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/apps/cli/src/commands/inspect/utils.ts b/apps/cli/src/commands/inspect/utils.ts
index ae2e34ec9..6ea549678 100644
--- a/apps/cli/src/commands/inspect/utils.ts
+++ b/apps/cli/src/commands/inspect/utils.ts
@@ -9,7 +9,7 @@ import {
   resolveWorkspaceOrFilePath,
 } from '../eval/result-layout.js';
 import { loadManifestResults } from '../results/manifest.js';
-import { normalizeResultRow } from '../results/result-row-schema.js';
+import { ResultRowSchemaError, normalizeResultRow } from '../results/result-row-schema.js';
 
 // ANSI color codes (no dependency needed)
 const colors = {
@@ -126,6 +126,24 @@ function resolveTraceResultPath(filePath: string): string {
   return resolveWorkspaceOrFilePath(filePath);
 }
 
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function shouldUseTraceScoreError(value: unknown): boolean {
+  if (!isRecord(value)) {
+    return false;
+  }
+
+  return (
+    typeof value.test_id === 'string' ||
+    typeof value.testId === 'string' ||
+    Object.hasOwn(value, 'score') ||
+    Object.hasOwn(value, 'trace') ||
+    Object.hasOwn(value, 'spans')
+  );
+}
+
 function loadJsonlRecords(filePath: string): RawResult[] {
   const content = readFileSync(filePath, 'utf8');
   const lines = content
@@ -134,10 +152,20 @@ function loadJsonlRecords(filePath: string): RawResult[] {
     .filter((line) => line.trim());
 
   return lines.map((line, i) => {
-    return normalizeResultRow(JSON.parse(line), {
-      lineNumber: i + 1,
-      sourceLabel: filePath,
-    }) as unknown as RawResult;
+    const parsed = JSON.parse(line) as unknown;
+    try {
+      return normalizeResultRow(parsed, {
+        lineNumber: i + 1,
+        sourceLabel: filePath,
+      }) as unknown as RawResult;
+    } catch (error) {
+      if (error instanceof ResultRowSchemaError && shouldUseTraceScoreError(parsed)) {
+        throw new Error(
+          `Missing or invalid score in result at line ${i + 1}: ${line.slice(0, 100)}`,
+        );
+      }
+      throw error;
+    }
   });
 }
 

From 239ab033bb2d8ad1c9594f048a695247e2147ffc Mon Sep 17 00:00:00 2001
From: Christopher Tso <christso@gmail.com>
Date: Wed, 17 Jun 2026 12:55:39 +0200
Subject: [PATCH 3/3] fix(results): preserve result-score validation errors

---
 .../src/commands/results/result-row-schema.ts | 34 +++++++++++++++++--
 apps/cli/test/commands/results/shared.test.ts |  8 ++---
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/apps/cli/src/commands/results/result-row-schema.ts b/apps/cli/src/commands/results/result-row-schema.ts
index 7ff85cdac..cb3d40356 100644
--- a/apps/cli/src/commands/results/result-row-schema.ts
+++ b/apps/cli/src/commands/results/result-row-schema.ts
@@ -143,6 +143,29 @@ function buildSchemaError(context: {
   return new ResultRowSchemaError(`Unsupported result row${location}. ${MIGRATION_GUIDANCE}`);
 }
 
+function buildInvalidScoreError(context: {
+  lineNumber?: number;
+  sourceLabel?: string;
+}): ResultRowSchemaError {
+  const location = [
+    context.sourceLabel ? ` in ${context.sourceLabel}` : '',
+    context.lineNumber !== undefined ? ` at line ${context.lineNumber}` : '',
+  ].join('');
+  return new ResultRowSchemaError(`Missing or invalid score in result row${location}.`);
+}
+
+function looksLikeResultRow(value: Record<string, unknown>): boolean {
+  return (
+    typeof value.test_id === 'string' ||
+    Object.hasOwn(value, 'score') ||
+    Object.hasOwn(value, 'trace') ||
+    Object.hasOwn(value, 'spans') ||
+    Object.hasOwn(value, 'target') ||
+    Object.hasOwn(value, 'grading_path') ||
+    Object.hasOwn(value, 'timing_path')
+  );
+}
+
 export function normalizeResultRow(
   value: unknown,
   context: { lineNumber?: number; sourceLabel?: string } = {},
@@ -152,10 +175,17 @@ export function normalizeResultRow(
   }
 
   const normalized = normalizeKnownAliases(value, RESULT_ROW_ALIASES);
-  normalized.trace = normalizeTraceSummary(normalized.trace);
-  normalized.output = normalizeOutput(normalized.output);
+  if (normalized.trace !== undefined) {
+    normalized.trace = normalizeTraceSummary(normalized.trace);
+  }
+  if (normalized.output !== undefined) {
+    normalized.output = normalizeOutput(normalized.output);
+  }
 
   if (typeof normalized.score !== 'number' || !Number.isFinite(normalized.score)) {
+    if (looksLikeResultRow(normalized)) {
+      throw buildInvalidScoreError(context);
+    }
     throw buildSchemaError(context);
   }
 
diff --git a/apps/cli/test/commands/results/shared.test.ts b/apps/cli/test/commands/results/shared.test.ts
index ee32aa5eb..12f64f61e 100644
--- a/apps/cli/test/commands/results/shared.test.ts
+++ b/apps/cli/test/commands/results/shared.test.ts
@@ -2,11 +2,14 @@ import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
 import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import path from 'node:path';
+import { fileURLToPath } from 'node:url';
 
 import { resolveRunManifestPath } from '../../../src/commands/eval/result-layout.js';
 import { loadManifestResults } from '../../../src/commands/results/manifest.js';
 import { resolveSourceFile } from '../../../src/commands/results/shared.js';
 
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
 describe('results shared source resolution', () => {
   let tempDir: string;
 
@@ -63,10 +66,7 @@ describe('results shared source resolution', () => {
   });
 
   it('normalizes historical camelCase replay rows when loading manifests', () => {
-    const fixturePath = path.join(
-      process.cwd(),
-      'apps/cli/test/fixtures/results/camel-replay/index.jsonl',
-    );
+    const fixturePath = path.join(__dirname, '../../fixtures/results/camel-replay/index.jsonl');
 
     const results = loadManifestResults(fixturePath);