From 2aa062c7ef070b120c863d654e1d158ca47e8069 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Wed, 17 Jun 2026 11:04:25 +0200 Subject: [PATCH] fix(dashboard): show eval suite labels --- .../src/components/EvalSuiteLabel.tsx | 20 +++++++ apps/dashboard/src/components/RunDetail.tsx | 47 +++++++++------- apps/dashboard/src/components/Sidebar.tsx | 55 +++++++++++++++---- .../src/lib/run-detail-context.test.ts | 37 ++++++++++++- apps/dashboard/src/lib/run-detail-context.ts | 41 ++++++++++++++ 5 files changed, 167 insertions(+), 33 deletions(-) create mode 100644 apps/dashboard/src/components/EvalSuiteLabel.tsx diff --git a/apps/dashboard/src/components/EvalSuiteLabel.tsx b/apps/dashboard/src/components/EvalSuiteLabel.tsx new file mode 100644 index 000000000..d85290812 --- /dev/null +++ b/apps/dashboard/src/components/EvalSuiteLabel.tsx @@ -0,0 +1,20 @@ +import { formatSuiteDisplay } from '~/lib/run-detail-context'; + +interface EvalSuiteLabelProps { + suite?: string; + className?: string; +} + +export function EvalSuiteLabel({ suite, className = '' }: EvalSuiteLabelProps) { + const display = formatSuiteDisplay(suite); + if (!display) return null; + + return ( + + {display.label} + + ); +} diff --git a/apps/dashboard/src/components/RunDetail.tsx b/apps/dashboard/src/components/RunDetail.tsx index 538b212b4..78574ad9e 100644 --- a/apps/dashboard/src/components/RunDetail.tsx +++ b/apps/dashboard/src/components/RunDetail.tsx @@ -23,8 +23,9 @@ import type { EvalResult } from '~/lib/types'; import { isPassing, useRunLog, useStudioConfig } from '~/lib/api'; import { isExecutionError, summarizeQuality } from '~/lib/result-summary'; -import { formatCategoryDisplay } from '~/lib/run-detail-context'; +import { formatCategoryDisplay, shouldShowSuiteLabels } from '~/lib/run-detail-context'; +import { EvalSuiteLabel } from './EvalSuiteLabel'; import { PassRatePill } from './PassRatePill'; import { StatsCards } from './StatsCards'; @@ -118,6 +119,7 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) { const totalCost = results.reduce((sum, r) => sum + (r.costUsd ?? 0), 0); const categories = buildCategoryGroups(results, passThreshold); + const showSuiteLabels = shouldShowSuiteLabels(results); if (total === 0) { return ( @@ -268,25 +270,30 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) { )} - {projectId ? ( - - {result.testId} - - ) : ( - - {result.testId} - - )} +
+ {projectId ? ( + + {result.testId} + + ) : ( + + {result.testId} + + )} + {showSuiteLabels ? ( + + ) : null} +
wrapper. Handles mobile overlay and desktop static placement. */ function SidebarShell({ children }: { children: ReactNode }) { @@ -98,6 +101,32 @@ function SidebarRunText({ display }: { display: ReturnType + + {passed ? '\u2713' : '\u2717'} + + + {result.testId} + {showSuiteLabel ? ( + + ) : null} + + + ); +} + type ProjectTabId = 'runs' | 'experiments' | 'analytics' | 'targets'; const projectNavItems: { id: ProjectTabId; label: string; description: string }[] = [ @@ -381,6 +410,7 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s const { data } = useRunDetail(runId); const { data: config } = useStudioConfig(); const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8; + const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []); return ( @@ -405,23 +435,23 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s {data?.results.map((result) => { const isActive = result.testId === currentEvalId; - const passed = isPassing(result.score, passThreshold); return ( - - {passed ? '\u2713' : '\u2717'} - - {result.testId} + ); })} @@ -580,6 +610,7 @@ function ProjectEvalSidebar({ const { data } = useProjectRunDetail(projectId, runId); const { data: config } = useStudioConfig(projectId); const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8; + const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []); return ( @@ -602,22 +633,22 @@ function ProjectEvalSidebar({ {data?.results.map((result) => { const isActive = result.testId === currentEvalId; - const passed = isPassing(result.score, passThreshold); return ( - - {passed ? '\u2713' : '\u2717'} - - {result.testId} + ); })} diff --git a/apps/dashboard/src/lib/run-detail-context.test.ts b/apps/dashboard/src/lib/run-detail-context.test.ts index 29aa9f23d..cc54b7261 100644 --- a/apps/dashboard/src/lib/run-detail-context.test.ts +++ b/apps/dashboard/src/lib/run-detail-context.test.ts @@ -2,7 +2,12 @@ import { describe, expect, it } from 'bun:test'; import type { EvalResult } from './types'; -import { buildRunDetailHeader, formatCategoryDisplay } from './run-detail-context'; +import { + buildRunDetailHeader, + formatCategoryDisplay, + formatSuiteDisplay, + shouldShowSuiteLabels, +} from './run-detail-context'; const remoteRunDetailFixture = { runId: 'remote::smoke-wtg-2026-06-04T02-19-00Z', @@ -75,3 +80,33 @@ describe('formatCategoryDisplay', () => { expect(formatCategoryDisplay('examples/showcase')).toEqual({ label: 'examples/showcase' }); }); }); + +describe('formatSuiteDisplay', () => { + it('uses compact file labels for path-like eval suites', () => { + expect(formatSuiteDisplay('evals/github-actions.eval.yaml')).toEqual({ + label: 'github-actions', + title: 'evals/github-actions.eval.yaml', + }); + }); + + it('leaves named suites intact', () => { + expect(formatSuiteDisplay('wtg-smoke')).toEqual({ + label: 'wtg-smoke', + title: 'wtg-smoke', + }); + }); +}); + +describe('shouldShowSuiteLabels', () => { + it('shows labels for mixed-suite runs', () => { + expect( + shouldShowSuiteLabels([{ suite: 'evals/a.eval.yaml' }, { suite: 'evals/b.eval.yaml' }]), + ).toBe(true); + }); + + it('suppresses repeated labels for single-suite runs', () => { + expect( + shouldShowSuiteLabels([{ suite: 'evals/a.eval.yaml' }, { suite: 'evals/a.eval.yaml' }]), + ).toBe(false); + }); +}); diff --git a/apps/dashboard/src/lib/run-detail-context.ts b/apps/dashboard/src/lib/run-detail-context.ts index 1425dbef3..e88764a75 100644 --- a/apps/dashboard/src/lib/run-detail-context.ts +++ b/apps/dashboard/src/lib/run-detail-context.ts @@ -5,6 +5,10 @@ * runs carry extra source identity (`source_label`, results repo). Keep that * presentation logic here so route components stay thin and tests can pin * the remote-context contract without rendering React. + * + * Suite labels are displayed only when a run mixes suites or has partial suite + * metadata. Keep the table/sidebar dense by suppressing repeated labels for + * single-suite runs. */ import type { EvalResult, RunDetailResponse } from './types'; @@ -12,6 +16,7 @@ import type { EvalResult, RunDetailResponse } from './types'; type RunSource = RunDetailResponse['source']; type HeaderResult = Pick; +type SuiteLabelResult = Pick; export interface RunDetailHeaderInput { runId: string; @@ -40,6 +45,11 @@ export interface CategoryDisplay { mutedLabel?: string; } +export interface SuiteDisplay { + label: string; + title: string; +} + function nonDefaultExperiment(experiment: string | undefined): string | undefined { return experiment && experiment !== 'default' ? experiment : undefined; } @@ -127,3 +137,34 @@ export function formatCategoryDisplay(category: string | undefined): CategoryDis mutedLabel: raw, }; } + +function stripEvalFileExtension(fileName: string): string { + return fileName.replace(/\.eval\.(ya?ml|json|jsonl)$/i, '').replace(/\.(ya?ml|json|jsonl)$/i, ''); +} + +export function formatSuiteDisplay(suite: string | undefined): SuiteDisplay | undefined { + const raw = cleanOptional(suite); + if (!raw || raw === 'Uncategorized') { + return undefined; + } + + const normalized = raw.replace(/\\/g, '/'); + const basename = + normalized + .split('/') + .filter((part) => part.length > 0) + .at(-1) ?? raw; + const label = normalized.includes('/') ? stripEvalFileExtension(basename) : raw; + + return { + label: label || raw, + title: raw, + }; +} + +export function shouldShowSuiteLabels(results: readonly SuiteLabelResult[]): boolean { + const normalizedSuites = results.map((result) => cleanOptional(result.suite) ?? ''); + const meaningfulSuites = normalizedSuites.filter((suite) => suite && suite !== 'Uncategorized'); + + return meaningfulSuites.length > 0 && new Set(normalizedSuites).size > 1; +}