mrviduus · mrviduus · Jun 18, 2026 · Jun 18, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 ## [Unreleased]
 
+### Phase 12 — DriftDetectionWorker + Drift tab — closes Phase 12 (AI-080, slice 5b) (2026-06-18)
+
+Input-distribution drift detection — the last Phase-12 DoD ("drift alert fires on a synthetic regression"). `DriftDetectionWorker` (Api host `BackgroundService`, mirrors 5a: startup delay + hourly check, own Postgres advisory lock released with `CancellationToken.None`, never crashes the host, **OFF by default** `Drift:Enabled=false`) once/day per configured feature: samples the most-recent ≤`MaxSampleSize` (50) prompts from `llm_traces` (last 24h, first user message), embeds them via `IEmbeddingService`, **mean-pools a daily centroid**, and measures **cosine drift** (`1 - cosine`) against a **rolling baseline** of the prior good days' centroids. A consecutive-day state machine (`DriftCalculator`, pure) escalates `ok → warning → alerting` and emails the admin **once per streak** when drift ≥ `Threshold` (0.15) for ≥`ConsecutiveDays` (2); cold start seeds a `baseline` row (never alerts), a thin sample writes `insufficient`. Persisted to a new **`drift_centroids`** table (idempotent unique `(feature, day)`, `vector(1536)` centroid, never exposed raw). New `GET /admin/ai-quality/drift` + a read-only **Drift tab** on the admin AI-quality page: per-feature drift charted against the 0.15 threshold line + alert-state badges + the scheduled-eval-score trend (from 5a). Cost-bounded by the capped sample (no `ISpendTracker` hook — `IEmbeddingService` exposes no cost). architect → backend + frontend (parallel) → adversarial QA (verdict SHIP; **P1 fixed**: the rolling baseline now excludes breaching days so a sustained drift can't poison its own baseline and the streak correctly re-arms after recovery). Migration `AddDriftCentroids`. 808 unit tests green (incl. a non-tautological synthetic-regression DoD test over a faithful in-memory DbContext); admin tsc + build clean.
+
+**Phase 12 (RLOps) complete** — shadow routing, admin visibility, table-driven routing + one-click promote/rollback, cost-aware routing + daily budgets, continuous eval, drift detection. Full MLOps for the LLM stack, in C#.
+
 ### Phase 12 — ContinuousEvalWorker (AI-079, slice 5a) (2026-06-18)
 
 Automates the eval suite on a cadence so quality regressions on prod are caught without an admin clicking "run evals". `ContinuousEvalWorker` (Api host `BackgroundService`, ~10min startup delay + hourly check) runs `EvalSuiteRunner` for the configured features when due (no `run_type='scheduled'` row newer than `Eval:Scheduled:IntervalHours`, default 24h), persists with the new **`eval_runs.run_type`** column (`scheduled`/`manual`), and emails the admin (via `ResendEmailService`, no-op if unset) when a feature's score drops ≥ `RegressionDrop` (default 0.5 on the 1-5 scale) vs the **prior scheduled** run (`EvalRegressionDetector`, pure). **OFF by default** (`Eval:Scheduled:Enabled=false`) — it spends judge $ when on, so it also respects an optional `eval.judge` daily cap (fail-open). Concurrency-safe: the in-process overlap guard the admin trigger used is extracted into a shared singleton `IEvalRunGate` (so a scheduled run + an admin run can't collide), plus a **Postgres advisory lock** for multi-replica. The worker never crashes the host (every tick wrapped); the advisory lock is released with `CancellationToken.None` so host shutdown can't leak it onto a pooled connection (QA P1); the admin trigger releases the gate even on a synchronous setup failure (QA P2). New `GET /admin/ai-quality/drift/eval-trend` exposes the scheduled-only score trend (consumed by the Drift tab in slice 5b). Migration `AddEvalRunType` (backfills existing rows to `manual`). 780 unit tests green. Slice 5b (DriftDetectionWorker + Drift tab) follows.

diff --git a/apps/admin/src/api/client.ts b/apps/admin/src/api/client.ts
@@ -613,6 +613,23 @@ export interface ShadowSamplesPage {
   total: number
   items: ShadowSample[]
 }
+// Drift detection (RLOps)
+export type DriftAlertState = 'baseline' | 'ok' | 'warning' | 'alerting' | 'insufficient'
+export interface DriftPoint {
+  feature: string
+  day: string // date "YYYY-MM-DD"
+  driftScore: number | null
+  sampleSize: number
+  alertState: DriftAlertState
+}
+export interface ScheduledEvalPoint {
+  feature: string
+  modelId: string
+  score: number
+  n: number
+  gitSha: string
+  createdAt: string
+}
 // Model registry
 export interface ModelRegistration {
   id: string
@@ -1299,6 +1316,22 @@ export const adminApi = {
     return fetchJson<ShadowSamplesPage>(`/admin/ai-quality/shadow/samples${qs ? `?${qs}` : ''}`)
   },
 
+  getDrift: async (params?: { feature?: string; days?: number }): Promise<DriftPoint[]> => {
+    const query = new URLSearchParams()
+    if (params?.feature) query.set('feature', params.feature)
+    if (params?.days) query.set('days', String(params.days))
+    const qs = query.toString()
+    return fetchJson<DriftPoint[]>(`/admin/ai-quality/drift${qs ? `?${qs}` : ''}`)
+  },
+
+  getEvalTrend: async (params?: { feature?: string; limit?: number }): Promise<ScheduledEvalPoint[]> => {
+    const query = new URLSearchParams()
+    if (params?.feature) query.set('feature', params.feature)
+    if (params?.limit) query.set('limit', String(params.limit))
+    const qs = query.toString()
+    return fetchJson<ScheduledEvalPoint[]>(`/admin/ai-quality/drift/eval-trend${qs ? `?${qs}` : ''}`)
+  },
+
   getModels: async (): Promise<ModelsRegistry> => {
     return fetchJson<ModelsRegistry>('/admin/ai-quality/models')
   },

diff --git a/apps/admin/src/pages/AiQualityPage.tsx b/apps/admin/src/pages/AiQualityPage.tsx
@@ -17,9 +17,12 @@ import {
   ShadowSample,
   ModelRegistration,
   ModelPromotionResult,
+  DriftPoint,
+  DriftAlertState,
+  ScheduledEvalPoint,
 } from '../api/client'
 
-type Tab = 'summary' | 'traces' | 'transcripts' | 'evals' | 'shadow' | 'models'
+type Tab = 'summary' | 'traces' | 'transcripts' | 'evals' | 'shadow' | 'models' | 'drift'
 
 const KNOWN_FEATURES = ['explain', 'translate', 'distractor', 'bookmeta', 'tagsuggestion', 'eval.judge']
 
@@ -29,7 +32,7 @@ export function AiQualityPage() {
     <div className="dashboard-page">
       <h1>AI Quality</h1>
       <div style={{ display: 'flex', gap: 4, borderBottom: '1px solid #e5e7eb', margin: '12px 0 16px' }}>
-        {(['summary', 'traces', 'transcripts', 'evals', 'shadow', 'models'] as Tab[]).map((t) => (
+        {(['summary', 'traces', 'transcripts', 'evals', 'shadow', 'models', 'drift'] as Tab[]).map((t) => (
           <button
             key={t}
             onClick={() => setTab(t)}
@@ -54,6 +57,7 @@ export function AiQualityPage() {
       {tab === 'evals' && <EvalsTab />}
       {tab === 'shadow' && <ShadowTab />}
       {tab === 'models' && <ModelsTab />}
+      {tab === 'drift' && <DriftTab />}
     </div>
   )
 }
@@ -1413,6 +1417,302 @@ function ModelConfirmDialog({
   )
 }
 
+// ─────────────────────────── Drift ───────────────────────────
+
+const DRIFT_THRESHOLD = 0.15
+
+function driftStateColor(state: DriftAlertState): string {
+  if (state === 'alerting') return '#dc2626' // red
+  if (state === 'warning') return '#d97706' // amber
+  if (state === 'ok') return '#059669' // green
+  return '#9ca3af' // baseline / insufficient — grey/muted
+}
+
+function DriftBadge({ state }: { state: DriftAlertState }) {
+  const color = driftStateColor(state)
+  const muted = state === 'baseline' || state === 'insufficient'
+  return (
+    <span
+      style={{
+        fontSize: 11,
+        fontWeight: 600,
+        textTransform: 'uppercase',
+        color: muted ? '#6b7280' : '#fff',
+        background: muted ? '#f3f4f6' : color,
+        border: muted ? '1px solid #e5e7eb' : 'none',
+        borderRadius: 4,
+        padding: '2px 8px',
+      }}
+    >
+      {state}
+    </span>
+  )
+}
+
+// Inline drift-over-time line, daily driftScore vs the 0.15 alert threshold.
+function DriftChart({ points }: { points: DriftPoint[] }) {
+  const w = 640
+  const h = 120
+  const padX = 8
+  const padY = 8
+  const scored = points.filter((p) => p.driftScore != null)
+  if (scored.length < 2) {
+    return (
+      <p style={{ fontSize: 12, color: '#9ca3af', margin: '8px 0 16px' }}>
+        Not enough scored days to chart yet (need ≥ 2). See the table below.
+      </p>
+    )
+  }
+  const vals = scored.map((p) => p.driftScore as number)
+  const maxVal = Math.max(...vals, DRIFT_THRESHOLD * 1.2)
+  const minVal = 0
+  const range = maxVal - minVal || 1
+  const step = points.length > 1 ? (w - padX * 2) / (points.length - 1) : 0
+  const y = (v: number) => h - padY - ((v - minVal) / range) * (h - padY * 2)
+  const x = (i: number) => padX + i * step
+  const thresholdY = y(DRIFT_THRESHOLD)
+
+  // Build the polyline only over days that have a score (skip nulls in coords).
+  const coords = points
+    .map((p, i) => (p.driftScore == null ? null : `${x(i).toFixed(1)},${y(p.driftScore).toFixed(1)}`))
+    .filter((c): c is string => c != null)
+    .join(' ')
+
+  return (
+    <svg width="100%" height={h} viewBox={`0 0 ${w} ${h}`} preserveAspectRatio="none" style={{ margin: '8px 0 16px' }}>
+      {/* threshold line at 0.15 — the alert line, bold red dashed */}
+      <line x1={padX} y1={thresholdY} x2={w - padX} y2={thresholdY} stroke="#dc2626" strokeWidth="1.5" strokeDasharray="5 4" />
+      <polyline points={coords} fill="none" stroke="#2563eb" strokeWidth="2" />
+      {points.map((p, i) =>
+        p.driftScore == null ? null : (
+          <circle
+            key={p.day}
+            cx={x(i)}
+            cy={y(p.driftScore)}
+            r={p.alertState === 'alerting' ? 4 : 3}
+            fill={driftStateColor(p.alertState)}
+          >
+            <title>{`${p.day}: ${p.driftScore.toFixed(3)} (${p.alertState}, n=${p.sampleSize})`}</title>
+          </circle>
+        ),
+      )}
+    </svg>
+  )
+}
+
+function DriftTab() {
+  const [points, setPoints] = useState<DriftPoint[]>([])
+  const [feature, setFeature] = useState('')
+  const [days, setDays] = useState(30)
+  const [loading, setLoading] = useState(true)
+  const [error, setError] = useState<string | null>(null)
+
+  useEffect(() => {
+    setLoading(true)
+    adminApi
+      .getDrift({ feature: feature || undefined, days })
+      .then((d) => {
+        setPoints(d)
+        setError(null)
+      })
+      .catch((e) => setError(e instanceof Error ? e.message : 'Failed to load'))
+      .finally(() => setLoading(false))
+  }, [feature, days])
+
+  // Feature options: union of KNOWN_FEATURES and any features in the response.
+  const featureOptions = [...new Set([...KNOWN_FEATURES, ...points.map((p) => p.feature)])].sort()
+  const alertingDays = points.filter((p) => p.alertState === 'alerting').length
+
+  return (
+    <>
+      <div style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center', flexWrap: 'wrap', gap: 8 }}>
+        <div style={{ display: 'flex', gap: 8, alignItems: 'center', flexWrap: 'wrap' }}>
+          <select value={feature} onChange={(e) => setFeature(e.target.value)} style={input}>
+            <option value="">All features</option>
+            {featureOptions.map((f) => (
+              <option key={f} value={f}>
+                {f}
+              </option>
+            ))}
+          </select>
+        </div>
+        <div style={{ display: 'flex', gap: 4 }}>
+          {RANGES.map((r) => (
+            <button key={r.days} onClick={() => setDays(r.days)} style={rangeBtn(days === r.days)}>
+              {r.label}
+            </button>
+          ))}
+        </div>
+      </div>
+
+      <p className="dashboard-page__subtitle" style={{ margin: '12px 0 0' }}>
+        Daily output-drift score per feature vs a 0.15 alert threshold. Higher = more divergence from the rolling baseline.
+      </p>
+
+      {error && <Banner text={error} />}
+
+      {loading ? (
+        <p className="dashboard-page__subtitle" style={{ marginTop: 12 }}>Loading…</p>
+      ) : points.length === 0 ? (
+        <p className="dashboard-empty" style={{ marginTop: 24 }}>
+          Drift detection is OFF by default — enable Drift:Enabled to start sampling.
+        </p>
+      ) : (
+        <>
+          {alertingDays > 0 && (
+            <div
+              style={{
+                background: '#fef2f2',
+                color: '#b91c1c',
+                padding: '8px 12px',
+                borderRadius: 8,
+                margin: '12px 0',
+                fontSize: 13,
+                fontWeight: 600,
+              }}
+            >
+              ⚠ {alertingDays} day{alertingDays === 1 ? '' : 's'} over the 0.15 drift threshold.
+            </div>
+          )}
+          <DriftChart points={points} />
+          <table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 13 }}>
+            <thead>
+              <tr style={{ textAlign: 'left', color: '#6b7280', borderBottom: '1px solid #e5e7eb' }}>
+                <th style={th}>Day</th>
+                {feature === '' && <th style={th}>Feature</th>}
+                <th style={th}>Drift score</th>
+                <th style={th}>Sample size</th>
+                <th style={th}>State</th>
+              </tr>
+            </thead>
+            <tbody>
+              {points.map((p) => {
+                const over = p.driftScore != null && p.driftScore >= DRIFT_THRESHOLD
+                return (
+                  <tr
+                    key={`${p.feature}|${p.day}`}
+                    style={{ borderBottom: '1px solid #f3f4f6', background: p.alertState === 'alerting' ? '#fef2f2' : undefined }}
+                  >
+                    <td style={td}>{p.day}</td>
+                    {feature === '' && <td style={td}>{p.feature}</td>}
+                    <td style={{ ...td, fontWeight: 600, color: over ? '#dc2626' : '#111827' }}>
+                      {p.driftScore != null ? p.driftScore.toFixed(3) : '—'}
+                      {over && <span> ▲</span>}
+                    </td>
+                    <td style={td}>{p.sampleSize}</td>
+                    <td style={td}>
+                      <DriftBadge state={p.alertState} />
+                    </td>
+                  </tr>
+                )
+              })}
+            </tbody>
+          </table>
+        </>
+      )}
+
+      <DriftEvalTrend feature={feature} />
+    </>
+  )
+}
+
+function DriftEvalTrend({ feature }: { feature: string }) {
+  const [points, setPoints] = useState<ScheduledEvalPoint[]>([])
+  const [loading, setLoading] = useState(true)
+  const [error, setError] = useState<string | null>(null)
+
+  useEffect(() => {
+    setLoading(true)
+    adminApi
+      .getEvalTrend({ feature: feature || undefined })
+      .then((d) => {
+        setPoints(d)
+        setError(null)
+      })
+      .catch((e) => setError(e instanceof Error ? e.message : 'Failed to load'))
+      .finally(() => setLoading(false))
+  }, [feature])
+
+  return (
+    <div style={{ marginTop: 32 }}>
+      <h2 style={{ margin: '0 0 4px', fontSize: 16 }}>Scheduled eval trend</h2>
+      <p className="dashboard-page__subtitle" style={{ margin: '0 0 12px' }}>
+        Latest scheduled eval scores (1–5) over time — the quality signal drift is meant to catch early.
+      </p>
+
+      {error && <Banner text={error} />}
+
+      {loading ? (
+        <p className="dashboard-page__subtitle">Loading…</p>
+      ) : points.length === 0 ? (
+        <p className="dashboard-empty">No scheduled eval runs yet — enable Eval:Scheduled:Enabled.</p>
+      ) : (
+        <>
+          <EvalSparkline points={points} />
+          <table style={{ width: '100%', borderCollapse: 'collapse', fontSize: 13 }}>
+            <thead>
+              <tr style={{ textAlign: 'left', color: '#6b7280', borderBottom: '1px solid #e5e7eb' }}>
+                <th style={th}>When</th>
+                {feature === '' && <th style={th}>Feature</th>}
+                <th style={th}>Score</th>
+                <th style={th}>Model</th>
+                <th style={th}>N</th>
+                <th style={th}>Git SHA</th>
+              </tr>
+            </thead>
+            <tbody>
+              {points.map((p, i) => {
+                const prev = points[i + 1]
+                const regressed = prev && prev.feature === p.feature && p.score < prev.score - 0.1
+                return (
+                  <tr key={`${p.feature}|${p.createdAt}|${i}`} style={{ borderBottom: '1px solid #f3f4f6' }}>
+                    <td style={td}>{timeAgo(p.createdAt)}</td>
+                    {feature === '' && <td style={td}>{p.feature}</td>}
+                    <td style={{ ...td, fontWeight: 600, color: regressed ? '#dc2626' : '#111827' }}>
+                      {p.score.toFixed(2)}
+                      {regressed && <span title={`down from ${prev!.score.toFixed(2)}`}> ▼</span>}
+                    </td>
+                    <td style={td}>{p.modelId}</td>
+                    <td style={td}>{p.n}</td>
+                    <td style={{ ...td, fontFamily: 'monospace', fontSize: 12 }}>
+                      {p.gitSha ? p.gitSha.slice(0, 7) : '—'}
+                    </td>
+                  </tr>
+                )
+              })}
+            </tbody>
+          </table>
+        </>
+      )}
+    </div>
+  )
+}
+
+// Eval score sparkline (1–5). API returns newest-first, so render reversed (oldest → newest).
+function EvalSparkline({ points }: { points: ScheduledEvalPoint[] }) {
+  const w = 640
+  const h = 80
+  const pad = 6
+  const ordered = [...points].reverse()
+  if (ordered.length < 2) return null
+  const min = 1
+  const max = 5
+  const range = max - min
+  const step = (w - pad * 2) / (ordered.length - 1)
+  const y = (v: number) => h - pad - ((Math.min(max, Math.max(min, v)) - min) / range) * (h - pad * 2)
+  const coords = ordered.map((p, i) => `${(pad + i * step).toFixed(1)},${y(p.score).toFixed(1)}`).join(' ')
+  return (
+    <svg width="100%" height={h} viewBox={`0 0 ${w} ${h}`} preserveAspectRatio="none" style={{ margin: '4px 0 12px' }}>
+      <polyline points={coords} fill="none" stroke="#7c3aed" strokeWidth="2" />
+      {ordered.map((p, i) => (
+        <circle key={`${p.createdAt}|${i}`} cx={pad + i * step} cy={y(p.score)} r={3} fill="#7c3aed">
+          <title>{`${p.score.toFixed(2)} · ${p.feature} · ${new Date(p.createdAt).toLocaleString()}`}</title>
+        </circle>
+      ))}
+    </svg>
+  )
+}
+
 // ─────────────────────────── shared ───────────────────────────
 
 function Pager({