Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions apps/dashboard/src/components/EvalSuiteLabel.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { formatSuiteDisplay } from '~/lib/run-detail-context';

interface EvalSuiteLabelProps {
suite?: string;
className?: string;
}

export function EvalSuiteLabel({ suite, className = '' }: EvalSuiteLabelProps) {
const display = formatSuiteDisplay(suite);
if (!display) return null;

return (
<span
className={`inline-flex max-w-full shrink-0 items-center rounded-md border border-cyan-900/60 bg-cyan-950/30 px-2 py-0.5 text-xs font-medium text-cyan-300 ${className}`}
title={display.title}
>
<span className="truncate">{display.label}</span>
</span>
);
}
47 changes: 27 additions & 20 deletions apps/dashboard/src/components/RunDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ import type { EvalResult } from '~/lib/types';

import { isPassing, useRunLog, useStudioConfig } from '~/lib/api';
import { isExecutionError, summarizeQuality } from '~/lib/result-summary';
import { formatCategoryDisplay } from '~/lib/run-detail-context';
import { formatCategoryDisplay, shouldShowSuiteLabels } from '~/lib/run-detail-context';

import { EvalSuiteLabel } from './EvalSuiteLabel';
import { PassRatePill } from './PassRatePill';
import { StatsCards } from './StatsCards';

Expand Down Expand Up @@ -118,6 +119,7 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
const totalCost = results.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);

const categories = buildCategoryGroups(results, passThreshold);
const showSuiteLabels = shouldShowSuiteLabels(results);

if (total === 0) {
return (
Expand Down Expand Up @@ -268,25 +270,30 @@ export function RunDetail({ results, runId, projectId }: RunDetailProps) {
)}
</td>
<td className="w-[24rem] max-w-[24rem] px-4 py-3">
{projectId ? (
<Link
to="/projects/$projectId/evals/$runId/$evalId"
params={{ projectId, runId, evalId: result.testId }}
className="block truncate font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
title={result.testId}
>
{result.testId}
</Link>
) : (
<Link
to="/evals/$runId/$evalId"
params={{ runId, evalId: result.testId }}
className="block truncate font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
title={result.testId}
>
{result.testId}
</Link>
)}
<div className="flex min-w-0 items-center gap-2">
{projectId ? (
<Link
to="/projects/$projectId/evals/$runId/$evalId"
params={{ projectId, runId, evalId: result.testId }}
className="min-w-0 flex-1 truncate font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
title={result.testId}
>
{result.testId}
</Link>
) : (
<Link
to="/evals/$runId/$evalId"
params={{ runId, evalId: result.testId }}
className="min-w-0 flex-1 truncate font-medium text-cyan-400 hover:text-cyan-300 hover:underline"
title={result.testId}
>
{result.testId}
</Link>
)}
{showSuiteLabels ? (
<EvalSuiteLabel suite={result.suite} className="max-w-[10rem]" />
) : null}
</div>
</td>
<td
className="w-[12rem] max-w-[12rem] truncate px-4 py-3 text-gray-400"
Expand Down
55 changes: 43 additions & 12 deletions apps/dashboard/src/components/Sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ import {
useRunList,
useStudioConfig,
} from '~/lib/api';
import { shouldShowSuiteLabels } from '~/lib/run-detail-context';
import { formatRunDisplay } from '~/lib/run-label';
import { useSidebarContext } from '~/lib/sidebar-context';
import type { EvalResult } from '~/lib/types';

import { BrandName } from './BrandName';
import { EvalSuiteLabel } from './EvalSuiteLabel';

/** Responsive <aside> wrapper. Handles mobile overlay and desktop static placement. */
function SidebarShell({ children }: { children: ReactNode }) {
Expand Down Expand Up @@ -98,6 +101,32 @@ function SidebarRunText({ display }: { display: ReturnType<typeof formatRunDispl
);
}

function EvalSidebarItemContent({
result,
passThreshold,
showSuiteLabel,
}: {
result: EvalResult;
passThreshold: number;
showSuiteLabel: boolean;
}) {
const passed = isPassing(result.score, passThreshold);

return (
<>
<span className={`mt-0.5 shrink-0 text-xs ${passed ? 'text-emerald-400' : 'text-red-400'}`}>
{passed ? '\u2713' : '\u2717'}
</span>
<span className="min-w-0 flex-1">
<span className="block truncate">{result.testId}</span>
{showSuiteLabel ? (
<EvalSuiteLabel suite={result.suite} className="mt-1 max-w-full text-[11px] leading-4" />
) : null}
</span>
</>
);
}

type ProjectTabId = 'runs' | 'experiments' | 'analytics' | 'targets';

const projectNavItems: { id: ProjectTabId; label: string; description: string }[] = [
Expand Down Expand Up @@ -381,6 +410,7 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s
const { data } = useRunDetail(runId);
const { data: config } = useStudioConfig();
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []);

return (
<SidebarShell>
Expand All @@ -405,23 +435,23 @@ function EvalSidebar({ runId, currentEvalId }: { runId: string; currentEvalId: s

{data?.results.map((result) => {
const isActive = result.testId === currentEvalId;
const passed = isPassing(result.score, passThreshold);

return (
<Link
key={result.testId}
to="/evals/$runId/$evalId"
params={{ runId, evalId: result.testId }}
className={`mb-0.5 flex items-center gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
className={`mb-0.5 flex items-start gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
isActive
? 'bg-gray-800 text-cyan-400'
: 'text-gray-400 hover:bg-gray-800/50 hover:text-gray-200'
}`}
>
<span className={`text-xs ${passed ? 'text-emerald-400' : 'text-red-400'}`}>
{passed ? '\u2713' : '\u2717'}
</span>
<span className="truncate">{result.testId}</span>
<EvalSidebarItemContent
result={result}
passThreshold={passThreshold}
showSuiteLabel={showSuiteLabels}
/>
</Link>
);
})}
Expand Down Expand Up @@ -580,6 +610,7 @@ function ProjectEvalSidebar({
const { data } = useProjectRunDetail(projectId, runId);
const { data: config } = useStudioConfig(projectId);
const passThreshold = config?.threshold ?? config?.pass_threshold ?? 0.8;
const showSuiteLabels = shouldShowSuiteLabels(data?.results ?? []);

return (
<SidebarShell>
Expand All @@ -602,22 +633,22 @@ function ProjectEvalSidebar({
</div>
{data?.results.map((result) => {
const isActive = result.testId === currentEvalId;
const passed = isPassing(result.score, passThreshold);
return (
<Link
key={result.testId}
to="/projects/$projectId/evals/$runId/$evalId"
params={{ projectId, runId, evalId: result.testId }}
className={`mb-0.5 flex items-center gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
className={`mb-0.5 flex items-start gap-2 rounded-md px-2 py-1.5 text-sm transition-colors ${
isActive
? 'bg-gray-800 text-cyan-400'
: 'text-gray-400 hover:bg-gray-800/50 hover:text-gray-200'
}`}
>
<span className={`text-xs ${passed ? 'text-emerald-400' : 'text-red-400'}`}>
{passed ? '\u2713' : '\u2717'}
</span>
<span className="truncate">{result.testId}</span>
<EvalSidebarItemContent
result={result}
passThreshold={passThreshold}
showSuiteLabel={showSuiteLabels}
/>
</Link>
);
})}
Expand Down
37 changes: 36 additions & 1 deletion apps/dashboard/src/lib/run-detail-context.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@ import { describe, expect, it } from 'bun:test';

import type { EvalResult } from './types';

import { buildRunDetailHeader, formatCategoryDisplay } from './run-detail-context';
import {
buildRunDetailHeader,
formatCategoryDisplay,
formatSuiteDisplay,
shouldShowSuiteLabels,
} from './run-detail-context';

const remoteRunDetailFixture = {
runId: 'remote::smoke-wtg-2026-06-04T02-19-00Z',
Expand Down Expand Up @@ -75,3 +80,33 @@ describe('formatCategoryDisplay', () => {
expect(formatCategoryDisplay('examples/showcase')).toEqual({ label: 'examples/showcase' });
});
});

describe('formatSuiteDisplay', () => {
it('uses compact file labels for path-like eval suites', () => {
expect(formatSuiteDisplay('evals/github-actions.eval.yaml')).toEqual({
label: 'github-actions',
title: 'evals/github-actions.eval.yaml',
});
});

it('leaves named suites intact', () => {
expect(formatSuiteDisplay('wtg-smoke')).toEqual({
label: 'wtg-smoke',
title: 'wtg-smoke',
});
});
});

describe('shouldShowSuiteLabels', () => {
it('shows labels for mixed-suite runs', () => {
expect(
shouldShowSuiteLabels([{ suite: 'evals/a.eval.yaml' }, { suite: 'evals/b.eval.yaml' }]),
).toBe(true);
});

it('suppresses repeated labels for single-suite runs', () => {
expect(
shouldShowSuiteLabels([{ suite: 'evals/a.eval.yaml' }, { suite: 'evals/a.eval.yaml' }]),
).toBe(false);
});
});
41 changes: 41 additions & 0 deletions apps/dashboard/src/lib/run-detail-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@
* runs carry extra source identity (`source_label`, results repo). Keep that
* presentation logic here so route components stay thin and tests can pin
* the remote-context contract without rendering React.
*
* Suite labels are displayed only when a run mixes suites or has partial suite
* metadata. Keep the table/sidebar dense by suppressing repeated labels for
* single-suite runs.
*/

import type { EvalResult, RunDetailResponse } from './types';

type RunSource = RunDetailResponse['source'];

type HeaderResult = Pick<EvalResult, 'experiment' | 'target' | 'timestamp'>;
type SuiteLabelResult = Pick<EvalResult, 'suite'>;

export interface RunDetailHeaderInput {
runId: string;
Expand Down Expand Up @@ -40,6 +45,11 @@ export interface CategoryDisplay {
mutedLabel?: string;
}

export interface SuiteDisplay {
label: string;
title: string;
}

function nonDefaultExperiment(experiment: string | undefined): string | undefined {
return experiment && experiment !== 'default' ? experiment : undefined;
}
Expand Down Expand Up @@ -127,3 +137,34 @@ export function formatCategoryDisplay(category: string | undefined): CategoryDis
mutedLabel: raw,
};
}

function stripEvalFileExtension(fileName: string): string {
return fileName.replace(/\.eval\.(ya?ml|json|jsonl)$/i, '').replace(/\.(ya?ml|json|jsonl)$/i, '');
}

export function formatSuiteDisplay(suite: string | undefined): SuiteDisplay | undefined {
const raw = cleanOptional(suite);
if (!raw || raw === 'Uncategorized') {
return undefined;
}

const normalized = raw.replace(/\\/g, '/');
const basename =
normalized
.split('/')
.filter((part) => part.length > 0)
.at(-1) ?? raw;
const label = normalized.includes('/') ? stripEvalFileExtension(basename) : raw;

return {
label: label || raw,
title: raw,
};
}

export function shouldShowSuiteLabels(results: readonly SuiteLabelResult[]): boolean {
const normalizedSuites = results.map((result) => cleanOptional(result.suite) ?? '');
const meaningfulSuites = normalizedSuites.filter((suite) => suite && suite !== 'Uncategorized');

return meaningfulSuites.length > 0 && new Set(normalizedSuites).size > 1;
}
Loading