diff --git a/apps/dashboard/src/components/EvalSuiteLabel.tsx b/apps/dashboard/src/components/EvalSuiteLabel.tsx
new file mode 100644
index 00000000..d8529081
--- /dev/null
+++ b/apps/dashboard/src/components/EvalSuiteLabel.tsx
@@ -0,0 +1,20 @@
+import { formatSuiteDisplay } from '~/lib/run-detail-context';
+
+interface EvalSuiteLabelProps {
+ suite?: string;
+ className?: string;
+}
+
+export function EvalSuiteLabel({ suite, className = '' }: EvalSuiteLabelProps) {
+ const display = formatSuiteDisplay(suite);
+ if (!display) return null;
+
+ return (
+
+ {display.label}
+
+ );
+}
diff --git a/apps/dashboard/src/components/RunDetail.tsx b/apps/dashboard/src/components/RunDetail.tsx
index 538b212b..78574ad9 100644
--- a/apps/dashboard/src/components/RunDetail.tsx
+++ b/apps/dashboard/src/components/RunDetail.tsx
@@ -23,8 +23,9 @@ import type { EvalResult } from '~/lib/types';
import { isPassing, useRunLog, useStudioConfig } from '~/lib/api';
import { isExecutionError, summarizeQuality } from '~/lib/result-summary';
-import { formatCategoryDisplay } from '~/lib/run-detail-context';
+import { formatCategoryDisplay, shouldShowSuiteLabels } from '~/lib/run-detail-context';
+import { EvalSuiteLabel } from './EvalSuiteLabel';
import { PassRatePill } from './PassRatePill';
import { StatsCards } from './StatsCards';
@@ -118,6 +119,7 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
const totalCost = results.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
const categories = buildCategoryGroups(results, passThreshold);
+ const showSuiteLabels = shouldShowSuiteLabels(results);
if (total === 0) {
return (
@@ -268,25 +270,30 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
)}
- {projectId ? (
-
- {result.testId}
-
- ) : (
-
- {result.testId}
-
- )}
+
+ {projectId ? (
+
+ {result.testId}
+
+ ) : (
+
+ {result.testId}
+
+ )}
+ {showSuiteLabels ? (
+
+ ) : null}
+
|
wrapper. Handles mobile overlay and desktop static placement. */
function SidebarShell({ children }: { children: ReactNode }) {
@@ -98,6 +101,32 @@ function SidebarRunText({ display }: { display: ReturnType
+
+ {passed ? '\u2713' : '\u2717'}
+
+
+ {result.testId}
+ {showSuiteLabel ? (
+
+ ) : null}
+
+ >
+ );
+}
+
type ProjectTabId = 'runs' | 'experiments' | 'analytics' | 'targets';
const projectNavItems: { id: ProjectTabId; label: string; description: string }[] = [
@@ -381,6 +410,7 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s
const { data } = useRunDetail(runId);
const { data: config } = useStudioConfig();
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
+ const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []);
return (
@@ -405,23 +435,23 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s
{data?.results.map((result) => {
const isActive = result.testId === currentEvalId;
- const passed = isPassing(result.score, passThreshold);
return (
-
- {passed ? '\u2713' : '\u2717'}
-
- {result.testId}
+
);
})}
@@ -580,6 +610,7 @@ function ProjectEvalSidebar({
const { data } = useProjectRunDetail(projectId, runId);
const { data: config } = useStudioConfig(projectId);
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
+ const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []);
return (
@@ -602,22 +633,22 @@ function ProjectEvalSidebar({
{data?.results.map((result) => {
const isActive = result.testId === currentEvalId;
- const passed = isPassing(result.score, passThreshold);
return (
-
- {passed ? '\u2713' : '\u2717'}
-
- {result.testId}
+
);
})}
diff --git a/apps/dashboard/src/lib/run-detail-context.test.ts b/apps/dashboard/src/lib/run-detail-context.test.ts
index 29aa9f23..cc54b726 100644
--- a/apps/dashboard/src/lib/run-detail-context.test.ts
+++ b/apps/dashboard/src/lib/run-detail-context.test.ts
@@ -2,7 +2,12 @@ import { describe, expect, it } from 'bun:test';
import type { EvalResult } from './types';
-import { buildRunDetailHeader, formatCategoryDisplay } from './run-detail-context';
+import {
+ buildRunDetailHeader,
+ formatCategoryDisplay,
+ formatSuiteDisplay,
+ shouldShowSuiteLabels,
+} from './run-detail-context';
const remoteRunDetailFixture = {
runId: 'remote::smoke-wtg-2026-06-04T02-19-00Z',
@@ -75,3 +80,33 @@ describe('formatCategoryDisplay', () => {
expect(formatCategoryDisplay('examples/showcase')).toEqual({ label: 'examples/showcase' });
});
});
+
+describe('formatSuiteDisplay', () => {
+ it('uses compact file labels for path-like eval suites', () => {
+ expect(formatSuiteDisplay('evals/github-actions.eval.yaml')).toEqual({
+ label: 'github-actions',
+ title: 'evals/github-actions.eval.yaml',
+ });
+ });
+
+ it('leaves named suites intact', () => {
+ expect(formatSuiteDisplay('wtg-smoke')).toEqual({
+ label: 'wtg-smoke',
+ title: 'wtg-smoke',
+ });
+ });
+});
+
+describe('shouldShowSuiteLabels', () => {
+ it('shows labels for mixed-suite runs', () => {
+ expect(
+ shouldShowSuiteLabels([{ suite: 'evals/a.eval.yaml' }, { suite: 'evals/b.eval.yaml' }]),
+ ).toBe(true);
+ });
+
+ it('suppresses repeated labels for single-suite runs', () => {
+ expect(
+ shouldShowSuiteLabels([{ suite: 'evals/a.eval.yaml' }, { suite: 'evals/a.eval.yaml' }]),
+ ).toBe(false);
+ });
+});
diff --git a/apps/dashboard/src/lib/run-detail-context.ts b/apps/dashboard/src/lib/run-detail-context.ts
index 1425dbef..e88764a7 100644
--- a/apps/dashboard/src/lib/run-detail-context.ts
+++ b/apps/dashboard/src/lib/run-detail-context.ts
@@ -5,6 +5,10 @@
* runs carry extra source identity (`source_label`, results repo). Keep that
* presentation logic here so route components stay thin and tests can pin
* the remote-context contract without rendering React.
+ *
+ * Suite labels are displayed only when a run mixes suites or has partial suite
+ * metadata. Keep the table/sidebar dense by suppressing repeated labels for
+ * single-suite runs.
*/
import type { EvalResult, RunDetailResponse } from './types';
@@ -12,6 +16,7 @@ import type { EvalResult, RunDetailResponse } from './types';
type RunSource = RunDetailResponse['source'];
type HeaderResult = Pick;
+type SuiteLabelResult = Pick;
export interface RunDetailHeaderInput {
runId: string;
@@ -40,6 +45,11 @@ export interface CategoryDisplay {
mutedLabel?: string;
}
+export interface SuiteDisplay {
+ label: string;
+ title: string;
+}
+
function nonDefaultExperiment(experiment: string | undefined): string | undefined {
return experiment && experiment !== 'default' ? experiment : undefined;
}
@@ -127,3 +137,34 @@ export function formatCategoryDisplay(category: string | undefined): CategoryDis
mutedLabel: raw,
};
}
+
+function stripEvalFileExtension(fileName: string): string {
+ return fileName.replace(/\.eval\.(ya?ml|json|jsonl)$/i, '').replace(/\.(ya?ml|json|jsonl)$/i, '');
+}
+
+export function formatSuiteDisplay(suite: string | undefined): SuiteDisplay | undefined {
+ const raw = cleanOptional(suite);
+ if (!raw || raw === 'Uncategorized') {
+ return undefined;
+ }
+
+ const normalized = raw.replace(/\\/g, '/');
+ const basename =
+ normalized
+ .split('/')
+ .filter((part) => part.length > 0)
+ .at(-1) ?? raw;
+ const label = normalized.includes('/') ? stripEvalFileExtension(basename) : raw;
+
+ return {
+ label: label || raw,
+ title: raw,
+ };
+}
+
+export function shouldShowSuiteLabels(results: readonly SuiteLabelResult[]): boolean {
+ const normalizedSuites = results.map((result) => cleanOptional(result.suite) ?? '');
+ const meaningfulSuites = normalizedSuites.filter((suite) => suite && suite !== 'Uncategorized');
+
+ return meaningfulSuites.length > 0 && new Set(normalizedSuites).size > 1;
+}
|