mrviduus · mrviduus · Jun 18, 2026 · Jun 18, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Phase 12 — ContinuousEvalWorker (AI-079, slice 5a) (2026-06-18)
+
+Automates the eval suite on a cadence so quality regressions on prod are caught without an admin clicking "run evals". `ContinuousEvalWorker` (Api host `BackgroundService`, ~10min startup delay + hourly check) runs `EvalSuiteRunner` for the configured features when due (no `run_type='scheduled'` row newer than `Eval:Scheduled:IntervalHours`, default 24h), persists with the new **`eval_runs.run_type`** column (`scheduled`/`manual`), and emails the admin (via `ResendEmailService`, no-op if unset) when a feature's score drops ≥ `RegressionDrop` (default 0.5 on the 1-5 scale) vs the **prior scheduled** run (`EvalRegressionDetector`, pure). **OFF by default** (`Eval:Scheduled:Enabled=false`) — it spends judge $ when on, so it also respects an optional `eval.judge` daily cap (fail-open). Concurrency-safe: the in-process overlap guard the admin trigger used is extracted into a shared singleton `IEvalRunGate` (so a scheduled run + an admin run can't collide), plus a **Postgres advisory lock** for multi-replica. The worker never crashes the host (every tick wrapped); the advisory lock is released with `CancellationToken.None` so host shutdown can't leak it onto a pooled connection (QA P1); the admin trigger releases the gate even on a synchronous setup failure (QA P2). New `GET /admin/ai-quality/drift/eval-trend` exposes the scheduled-only score trend (consumed by the Drift tab in slice 5b). Migration `AddEvalRunType` (backfills existing rows to `manual`). 780 unit tests green. Slice 5b (DriftDetectionWorker + Drift tab) follows.
+
 ### Phase 12 — cost-aware routing + per-feature daily budget (AI-078, slice 4) (2026-06-18)
 
 Per-feature daily USD budgets with cost-aware enforcement — the DoD "cost-aware routing cuts spend" lever.

diff --git a/backend/src/Ai/TextStack.Ai.EvalSuite/EvalSuiteRunner.cs b/backend/src/Ai/TextStack.Ai.EvalSuite/EvalSuiteRunner.cs
@@ -31,7 +31,8 @@ public async Task<IReadOnlyList<EvalRunResult>> RunAsync(
         bool persist,
         IAppDbContext? db,
         string? gitSha,
-        CancellationToken ct)
+        CancellationToken ct,
+        string runType = "manual")
     {
         var defs = EvalDefinitions.Build(keys);
         var chatConfig = new ChatConfiguration(new LlmServiceChatClient(judgeClient, defaultFeatureTag: "eval.judge"));
@@ -85,6 +86,7 @@ public async Task<IReadOnlyList<EvalRunResult>> RunAsync(
                         N = summary.N,
                         BreakdownJson = Breakdown(entry.Rubric, summary),
                         GitSha = gitSha,
+                        RunType = runType,
                         CreatedAt = DateTimeOffset.UtcNow,
                     });
                 }

diff --git a/backend/src/Api/Endpoints/AdminAiQualityEndpoints.cs b/backend/src/Api/Endpoints/AdminAiQualityEndpoints.cs
@@ -29,6 +29,7 @@ public static void MapAdminAiQualityEndpoints(this WebApplication app)
         group.MapGet("/agent-runs", GetAgentRuns);
         group.MapGet("/agent-runs/{id:guid}", GetAgentRun);
         group.MapGet("/evals", GetEvals);
+        group.MapGet("/drift/eval-trend", GetEvalTrend);
         group.MapPost("/evals/run", RunEvals);
         group.MapGet("/evals/status", GetEvalStatus);
         group.MapPost("/evals/toolcalls/run", RunToolCallEval);
@@ -236,37 +237,51 @@ private static async Task<IResult> RunToolCallEval(
     }
 
     // In-app eval runner state (one run at a time). Triggered from the admin Evals tab.
+    // The single-slot guard is now the shared IEvalRunGate (slice 5a) so an admin run and a
+    // scheduled ContinuousEvalWorker run can't collide; these fields are only display state.
     private static volatile bool _evalRunning;
     private static DateTimeOffset? _evalStartedAt;
     private static string? _evalLastError;
-    private static readonly object _evalLock = new();
 
     private static IResult RunEvals(
         [FromBody] RunEvalsRequest? body,
         IServiceScopeFactory scopeFactory,
         IConfiguration config,
+        Application.Ai.IEvalRunGate gate,
         ILogger<Program> logger)
     {
-        lock (_evalLock)
+        if (!gate.TryEnter())
+            return Results.Conflict(new { error = "An eval run is already in progress" });
+
+        // The gate is now held; any synchronous failure before the background task's
+        // finally takes over MUST release it, else evals are blocked until restart (QA P2).
+        string judgeKey, judgeModelId;
+        string? gitSha;
+        IReadOnlyList<string>? features;
+        try
         {
-            if (_evalRunning)
-                return Results.Conflict(new { error = "An eval run is already in progress" });
             _evalRunning = true;
             _evalStartedAt = DateTimeOffset.UtcNow;
             _evalLastError = null;
-        }
 
-        // Judge defaults to local Ollama (free); generation always goes through the
-        // gateway (routes by FeatureTag → OpenAI/Ollama exactly like prod).
-        // The OpenAI judge runs the dedicated 'openai-judge' provider (Eval:JudgeModel,
-        // default gpt-4.1) — stronger + independent of the nano generation model.
-        var useOpenAiJudge = string.Equals(body?.Judge, "openai", StringComparison.OrdinalIgnoreCase);
-        var judgeKey = useOpenAiJudge ? "openai-judge" : "ollama";
-        var judgeModelId = useOpenAiJudge
-            ? config["Eval:JudgeModel"] ?? "gpt-4.1"
-            : config["Ollama:Model"] ?? "gemma4:e2b";
-        var gitSha = Environment.GetEnvironmentVariable("GIT_SHA");
-        var features = body?.Features;
+            // Judge defaults to local Ollama (free); generation always goes through the
+            // gateway (routes by FeatureTag → OpenAI/Ollama exactly like prod).
+            // The OpenAI judge runs the dedicated 'openai-judge' provider (Eval:JudgeModel,
+            // default gpt-4.1) — stronger + independent of the nano generation model.
+            var useOpenAiJudge = string.Equals(body?.Judge, "openai", StringComparison.OrdinalIgnoreCase);
+            judgeKey = useOpenAiJudge ? "openai-judge" : "ollama";
+            judgeModelId = useOpenAiJudge
+                ? config["Eval:JudgeModel"] ?? "gpt-4.1"
+                : config["Ollama:Model"] ?? "gemma4:e2b";
+            gitSha = Environment.GetEnvironmentVariable("GIT_SHA");
+            features = body?.Features;
+        }
+        catch
+        {
+            _evalRunning = false;
+            gate.Exit();
+            throw;
+        }
 
         _ = Task.Run(async () =>
         {
@@ -278,7 +293,7 @@ private static IResult RunEvals(
                 var gateway = sp.GetRequiredService<ILlmService>();
                 var judge = sp.GetRequiredKeyedService<ILlmService>(judgeKey);
                 var db = sp.GetRequiredService<IAppDbContext>();
-                await runner.RunAsync(_ => gateway, judge, judgeModelId, features, persist: true, db, gitSha, CancellationToken.None);
+                await runner.RunAsync(_ => gateway, judge, judgeModelId, features, persist: true, db, gitSha, CancellationToken.None, runType: "manual");
             }
             catch (Exception ex)
             {
@@ -288,6 +303,7 @@ private static IResult RunEvals(
             finally
             {
                 _evalRunning = false;
+                gate.Exit();
             }
         });
 
@@ -417,12 +433,38 @@ private static async Task<IResult> GetEvals(
             .OrderByDescending(r => r.CreatedAt)
             .Take(limit)
             .Select(r => new EvalRunDto(
-                r.Id, r.Feature, r.ModelId, r.JudgeModelId, r.Score, r.N, r.BreakdownJson, r.GitSha, r.CreatedAt))
+                r.Id, r.Feature, r.ModelId, r.JudgeModelId, r.Score, r.N, r.BreakdownJson, r.GitSha, r.RunType, r.CreatedAt))
             .ToListAsync(ct);
 
         return Results.Ok(runs);
     }
 
+    // Phase 12 RLOps slice 5a: scheduled-only eval trend for the Drift tab (slice 5b UI).
+    // Last N RunType='scheduled' rows (optionally per feature), newest-first.
+    private static async Task<IResult> GetEvalTrend(
+        AppDbContext db,
+        [FromQuery] string? feature,
+        [FromQuery] int limit = 100,
+        CancellationToken ct = default)
+    {
+        limit = Math.Clamp(limit, 1, 1000);
+        var query = db.EvalRuns.Where(r => r.RunType == "scheduled");
+        if (!string.IsNullOrWhiteSpace(feature))
+        {
+            var feat = feature.Trim();
+            query = query.Where(r => r.Feature == feat);
+        }
+
+        var points = await query
+            .OrderByDescending(r => r.CreatedAt)
+            .Take(limit)
+            .Select(r => new ScheduledEvalPointDto(
+                r.Feature, r.ModelId, (double)r.Score, r.N, r.GitSha ?? "", r.CreatedAt))
+            .ToListAsync(ct);
+
+        return Results.Ok((IReadOnlyList<ScheduledEvalPointDto>)points);
+    }
+
     private static async Task<IResult> GetSummary(
         AppDbContext db,
         [FromQuery] DateTimeOffset? from,

diff --git a/backend/src/Api/Program.cs b/backend/src/Api/Program.cs
@@ -81,6 +81,10 @@
 // Application layer
 builder.Services.AddApplication();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.EvalSuiteRunner>();
+// Scheduled-eval support (Phase 12 RLOps slice 5a): shared single-slot overlap gate +
+// pure regression detector, consumed by both the admin trigger and ContinuousEvalWorker.
+builder.Services.AddSingleton<Application.Ai.IEvalRunGate, Application.Ai.EvalRunGate>();
+builder.Services.AddSingleton<Application.Ai.EvalRegressionDetector>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.RagEvalRunner>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.ToolCallEvalRunner>();
 builder.Services.AddSingleton<TextStack.Ai.EvalSuite.StudyBuddyEvalRunner>();
@@ -222,6 +226,8 @@
 
 // AI-058: weekly semantic concept clustering (groups vocab by meaning across all books)
 builder.Services.AddHostedService<ConceptClusteringWorker>();
+// Phase 12 RLOps slice 5a: scheduled continuous evals (OFF by default — Eval:Scheduled:Enabled).
+builder.Services.AddHostedService<ContinuousEvalWorker>();
 
 // Rate limiting
 builder.Services.AddRateLimiter(options =>