diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d714da3..f1bd4297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,16 @@ ## [Unreleased] +### Phase 12 — cost-aware routing + per-feature daily budget (AI-078, slice 4) (2026-06-18) + +Per-feature daily USD budgets with cost-aware enforcement — the DoD "cost-aware routing cuts spend" lever. +- **Spend tracking**: new in-memory `RollingSpendTracker` (singleton, Core `ISpendTracker`) accumulates per-feature spend in **UTC-daily buckets** as lock-free `long` micro-dollars (`Interlocked.Add`, `Math.Round` away-from-zero — no truncation drift). Lazy day rollover via injected `TimeProvider`. On a feature's first touch each day it **seeds from `llm_traces`** — scaled by **1/sample-rate** (same `TracingOptions` the tracer uses; explain is sampled 0.1 in prod, so the raw trace sum is 10× low) — so a mid-day restart doesn't reset the budget to $0. Every-call recording happens in the gateway (unsampled, exactly once per `CompleteAsync`/`StreamAsync`), NOT the sampled tracer. +- **Enforcement** in `ModelGateway`: when today's spend ≥ a feature's `DailyUsd`, mode `fallback` reroutes to a cheaper provider key (e.g. free local `ollama`); mode `hardstop` throws `BudgetExceededException` → **429**. Budget logic can **never break a live call** — any tracker/config failure or unregistered fallback falls through to the true primary + logs. Shadow is unaffected (it still compares the TRUE primary, not the budget fallback); shadow spend does not count against the primary budget. +- **80%-of-budget admin alert**: edge-triggered (fires once on crossing 0.8×budget, not per call), deduped per (feature, day), fire-and-forget via `ResendEmailService` (no-op if `Resend:AdminAlertEmail` empty), refires next day. +- **Admin**: `GET /admin/ai-quality/budgets` + a "Daily budgets" section on the Summary tab — per-feature today-spend vs cap, color-coded % bar, mode, "in fallback" badge. Reads the tracker (unsampled, accurate) only for budgeted features (read-only endpoint doesn't seed unbudgeted ones). + +**Budgets OFF by default** (`Ai:Budgets` empty) → zero behavior change until configured per feature (`Features:{feature}:{DailyUsd,Fallback,Mode}`). No migration (in-memory + config). Multi-replica caveat: counters are per-replica (lazy DB seed bounds drift; periodic re-seed is a named follow-up); prod is single-replica. architect → backend + frontend (parallel) → adversarial QA (verdict SHIP; money-counting/edge-alert/never-break all verified, P2 fixed). 761 unit tests green; admin tsc + build clean. + ### Phase 12 — table-driven routing + one-click promote/rollback (AI-077, slice 3) (2026-06-18) The `models` registry is now a **routing input**, not just an audit log — and an admin can swap which model serves a feature in one click, no redeploy. diff --git a/apps/admin/src/api/client.ts b/apps/admin/src/api/client.ts index 9dfee12f..4e0b8381 100644 --- a/apps/admin/src/api/client.ts +++ b/apps/admin/src/api/client.ts @@ -454,6 +454,15 @@ export interface AiQualitySummary { totalCostUsd: number features: FeatureSummary[] } +export interface BudgetStatus { + featureTag: string + todaySpendUsd: number + dailyBudgetUsd: number | null + pctUsed: number + mode: 'off' | 'fallback' | 'hardstop' + fallbackKey: string | null + inFallback: boolean +} export interface TraceListItem { id: string featureTag: string @@ -1207,6 +1216,10 @@ export const adminApi = { return fetchJson(`/admin/ai-quality/summary${qs ? `?${qs}` : ''}`) }, + getBudgets: async (): Promise => { + return fetchJson('/admin/ai-quality/budgets') + }, + getAiTraces: async (params?: { feature?: string; q?: string; limit?: number; offset?: number }): Promise => { const query = new URLSearchParams() if (params?.feature) query.set('feature', params.feature) diff --git a/apps/admin/src/pages/AiQualityPage.tsx b/apps/admin/src/pages/AiQualityPage.tsx index 4647ae60..163b6961 100644 --- a/apps/admin/src/pages/AiQualityPage.tsx +++ b/apps/admin/src/pages/AiQualityPage.tsx @@ -2,6 +2,7 @@ import { useState, useEffect, CSSProperties } from 'react' import { adminApi, AiQualitySummary, + BudgetStatus, FeatureSummary, DailyCostPoint, TraceListItem, @@ -120,10 +121,115 @@ function SummaryTab() { )} + + ) } +// ─────────────────────────── Budgets ─────────────────────────── + +function budgetBarColor(pctUsed: number): string { + if (pctUsed >= 1) return '#dc2626' // red, over budget + if (pctUsed >= 0.8) return '#d97706' // amber, near budget + return '#059669' // green +} + +function BudgetsSection() { + const [budgets, setBudgets] = useState(null) + const [loading, setLoading] = useState(true) + const [error, setError] = useState(null) + + useEffect(() => { + setLoading(true) + adminApi + .getBudgets() + .then((d) => { + setBudgets(d) + setError(null) + }) + .catch((e) => setError(e instanceof Error ? e.message : 'Failed to load')) + .finally(() => setLoading(false)) + }, []) + + // Only show features that actually have a budget or are not "off" — the rest are noise. + const rows = (budgets ?? []).filter((b) => b.dailyBudgetUsd != null || b.mode !== 'off') + + return ( +
+

Daily budgets

+

+ Per-feature spend today vs the configured daily cap. +

+ + {error && } + + {loading ? ( +

Loading…

+ ) : rows.length === 0 ? ( +

+ No daily budgets configured. Set Ai:Budgets:Features:{feature} to cap per-feature spend (over-budget + routes to a cheaper fallback or 429). +

+ ) : ( + + + + + + + + + + + + + {rows.map((b) => { + const pct = Math.min(100, Math.max(0, b.pctUsed * 100)) + const color = budgetBarColor(b.pctUsed) + return ( + + + + + + + + + ) + })} + +
FeatureTodayBudget% usedModeStatus
{b.featureTag}${b.todaySpendUsd.toFixed(4)}{b.dailyBudgetUsd != null ? `$${b.dailyBudgetUsd.toFixed(4)}` : '—'} +
+
+
+
+ + {(b.pctUsed * 100).toFixed(0)}% + +
+
{b.mode} + {b.inFallback && ( + + in fallback{b.fallbackKey ? ` · ${b.fallbackKey}` : ''} + + )} +
+ )} +
+ ) +} + function Totals({ label, value }: { label: string; value: string }) { return (
diff --git a/backend/src/Ai/TextStack.Ai.Core/BudgetExceededException.cs b/backend/src/Ai/TextStack.Ai.Core/BudgetExceededException.cs new file mode 100644 index 00000000..7f29b38d --- /dev/null +++ b/backend/src/Ai/TextStack.Ai.Core/BudgetExceededException.cs @@ -0,0 +1,22 @@ +namespace TextStack.Ai.Core; + +/// +/// Thrown by the ModelGateway when a feature is over its per-feature daily budget AND its +/// budget mode is hardstop (Phase 12 RLOps). The API's ExceptionMiddleware maps this +/// to HTTP 429 (Too Many Requests). In fallback mode the gateway silently reroutes to +/// the cheaper fallback provider instead of throwing; this type is hardstop-only. +/// +public sealed class BudgetExceededException : Exception +{ + public string FeatureTag { get; } + public decimal DailyBudgetUsd { get; } + public decimal SpentTodayUsd { get; } + + public BudgetExceededException(string featureTag, decimal dailyBudgetUsd, decimal spentTodayUsd) + : base($"Daily budget exceeded for feature '{featureTag}': spent ${spentTodayUsd} of ${dailyBudgetUsd}.") + { + FeatureTag = featureTag; + DailyBudgetUsd = dailyBudgetUsd; + SpentTodayUsd = spentTodayUsd; + } +} diff --git a/backend/src/Ai/TextStack.Ai.Core/ISpendTracker.cs b/backend/src/Ai/TextStack.Ai.Core/ISpendTracker.cs new file mode 100644 index 00000000..1c13bc4d --- /dev/null +++ b/backend/src/Ai/TextStack.Ai.Core/ISpendTracker.cs @@ -0,0 +1,20 @@ +namespace TextStack.Ai.Core; + +/// +/// Tracks today's (UTC) LLM spend per FeatureTag so the ModelGateway can enforce a +/// per-feature daily budget (Phase 12 RLOps). Process-local, in-memory + lock-free: both +/// members are on the LLM hot path and the fire-and-forget completion path, so they MUST +/// NEVER throw. On any failure or unknown feature, returns 0 +/// (fail-open: a tracker glitch must never block a paid feature). Implementation lives in +/// Application (it lazily seeds from the sampled llm_traces table on a fresh scope). +/// +public interface ISpendTracker +{ + /// Approximate USD spent today (UTC) for the feature. 0 on any failure or + /// unknown feature. Hot path — never throws. + decimal SpentTodayUsd(string featureTag); + + /// Add the cost of one completed call to today's running total for the feature. + /// Fire-and-forget caller — never throws. + void Record(string featureTag, decimal costUsd); +} diff --git a/backend/src/Ai/TextStack.Ai.Llm/BudgetOptions.cs b/backend/src/Ai/TextStack.Ai.Llm/BudgetOptions.cs new file mode 100644 index 00000000..4db82fce --- /dev/null +++ b/backend/src/Ai/TextStack.Ai.Llm/BudgetOptions.cs @@ -0,0 +1,62 @@ +namespace TextStack.Ai.Llm; + +/// How the gateway reacts when a feature crosses its daily budget. +public enum BudgetMode +{ + /// No enforcement — route as normal even over budget (default). + Off, + + /// Reroute to the configured cheaper fallback provider (silent, no error). + Fallback, + + /// Reject the call with BudgetExceededException → HTTP 429. + HardStop, +} + +/// Per-feature budget policy for one feature (daily cap + over-budget behaviour). +public sealed record FeatureBudget(decimal? DailyUsd, string? Fallback, BudgetMode Mode); + +/// +/// Cost-aware routing policy (Phase 12 RLOps slice 4). For a given FeatureTag, +/// maps the feature → a daily USD cap + over-budget mode + an optional +/// cheaper fallback provider key. A feature with no entry inherits 's mode +/// (and has no cap). Budgets are OFF by default (empty + Default mode Off), +/// so nothing is enforced until a feature is explicitly configured. Built from appsettings +/// (Ai:Budgets). Pure lookups — unit-testable, no DI. +/// +public sealed record BudgetOptions( + FeatureBudget Default, + IReadOnlyDictionary Features) +{ + /// An all-off policy (no features, Default mode Off). The DI-safe empty default. + public static BudgetOptions Empty { get; } = + new(new FeatureBudget(null, null, BudgetMode.Off), new Dictionary()); + + /// The explicitly-configured feature tags (those with an Ai:Budgets:Features entry). + public IEnumerable ConfiguredFeatures => Features.Keys; + + /// Daily USD cap for a feature, or null if none is set (cap absent or ≤ 0 → disabled). + public decimal? DailyUsdFor(string? featureTag) + { + var b = Lookup(featureTag); + return b.DailyUsd is { } d && d > 0m ? d : null; + } + + /// Cheaper fallback provider key for a feature, or null if none is configured. + public string? FallbackKeyFor(string? featureTag) + { + var key = Lookup(featureTag).Fallback; + return string.IsNullOrWhiteSpace(key) ? null : key; + } + + /// Over-budget mode for a feature; a missing feature inherits 's mode. + public BudgetMode ModeFor(string? featureTag) => Lookup(featureTag).Mode; + + private FeatureBudget Lookup(string? featureTag) + { + if (!string.IsNullOrWhiteSpace(featureTag) && Features.TryGetValue(featureTag, out var b)) + return b; + // Unknown feature → inherit the Default mode, but never a cap/fallback. + return new FeatureBudget(null, null, Default.Mode); + } +} diff --git a/backend/src/Ai/TextStack.Ai.Llm/ModelGateway.cs b/backend/src/Ai/TextStack.Ai.Llm/ModelGateway.cs index fd897534..33e100d5 100644 --- a/backend/src/Ai/TextStack.Ai.Llm/ModelGateway.cs +++ b/backend/src/Ai/TextStack.Ai.Llm/ModelGateway.cs @@ -30,14 +30,17 @@ public sealed class ModelGateway( IServiceScopeFactory scopeFactory, ShadowOptions shadowOptions, IModelRouteProvider routeProvider, + ISpendTracker spendTracker, + BudgetOptions budgetOptions, ILogger logger) : ILlmService { public async Task CompleteAsync(LlmRequest request, CancellationToken ct) { var sw = Stopwatch.StartNew(); - var primary = await Route(request.FeatureTag).CompleteAsync(request, ct); + var primary = await BudgetAwareRoute(request.FeatureTag).CompleteAsync(request, ct); sw.Stop(); + RecordSpend(request.FeatureTag, primary); // Fire-and-forget — never awaited, so the primary returns instantly. MaybeShadow(request, primary, sw.ElapsedMilliseconds); return primary; @@ -57,7 +60,7 @@ public async IAsyncEnumerable StreamAsync( string? modelId = null; var traceId = Guid.NewGuid(); - await using var e = Route(request.FeatureTag).StreamAsync(request, ct).GetAsyncEnumerator(ct); + await using var e = BudgetAwareRoute(request.FeatureTag).StreamAsync(request, ct).GetAsyncEnumerator(ct); while (true) { if (!await e.MoveNextAsync()) @@ -75,6 +78,7 @@ public async IAsyncEnumerable StreamAsync( // Only reached on clean completion (an exception escapes the loop without // running this), so a primary that throws mid-stream is never shadowed. var primary = TracingDecorator.BuildStreamedResponse(text.ToString(), toolCalls, usage, modelId, traceId); + RecordSpend(request.FeatureTag, primary); MaybeShadow(request, primary, sw.ElapsedMilliseconds); } @@ -104,6 +108,84 @@ private ILlmService Route(string? featureTag) return svc; } + /// + /// Route resolution with cost-aware budget enforcement layered on top of the true primary + /// (). When a feature is at/over its daily budget and budget enforcement is + /// ON for it: fallback mode reroutes to the configured cheaper provider (if it resolves + /// to a registered keyed service; otherwise it logs + uses the true primary), and hardstop + /// mode throws . ANY exception in the budget check (a + /// tracker glitch, an options bug) is swallowed → the true primary is used, so budget logic can + /// NEVER break a real LLM call. The shadow path is unaffected: it always compares the TRUE + /// primary (see ), not the budget fallback. + /// + private ILlmService BudgetAwareRoute(string? featureTag) + { + var truePrimary = Route(featureTag); + try + { + if (string.IsNullOrWhiteSpace(featureTag)) + return truePrimary; + + var mode = budgetOptions.ModeFor(featureTag); + if (mode == BudgetMode.Off) + return truePrimary; + + if (budgetOptions.DailyUsdFor(featureTag) is not { } dailyUsd) + return truePrimary; + + var spent = spendTracker.SpentTodayUsd(featureTag); + if (spent < dailyUsd) + return truePrimary; + + if (mode == BudgetMode.HardStop) + throw new BudgetExceededException(featureTag, dailyUsd, spent); + + // Fallback mode: reroute to the cheaper provider if it's a registered keyed service. + var fallbackKey = budgetOptions.FallbackKeyFor(featureTag); + if (fallbackKey is not null + && serviceProvider.GetKeyedService(fallbackKey) is { } fallbackSvc) + { + logger.LogDebug( + "Feature '{Feature}' over budget (${Spent}/${Budget}); routing to fallback '{Key}'", + featureTag, spent, dailyUsd, fallbackKey); + return fallbackSvc; + } + + logger.LogWarning( + "Feature '{Feature}' over budget but fallback '{Key}' is not registered; using primary", + featureTag, fallbackKey); + return truePrimary; + } + catch (BudgetExceededException) + { + // Hard stop is a deliberate signal — let it surface (mapped to 429 by the API). + throw; + } + catch (Exception ex) + { + // Budget logic must NEVER break a call — fall back to the true primary. + logger.LogDebug(ex, "Budget-aware routing failed for feature '{Feature}'; using primary", featureTag); + return truePrimary; + } + } + + /// Record one completed call's cost against the feature's running daily total, exactly + /// once per gateway call (regardless of whether the budget fallback served it). Shadow -raw + /// calls bypass the gateway entirely, so shadow spend is correctly NOT counted. Never throws. + private void RecordSpend(string? featureTag, LlmResponse primary) + { + if (string.IsNullOrWhiteSpace(featureTag)) + return; + try + { + spendTracker.Record(featureTag, primary.Usage.CostUsd); + } + catch (Exception ex) + { + logger.LogDebug(ex, "Spend record failed for feature '{Feature}'", featureTag); + } + } + /// Resolved PRIMARY provider key for a feature (registry → config → default), /// mirroring 's precedence WITHOUT touching DI. Used to skip a /// shadow that now points at the same provider as the primary. diff --git a/backend/src/Api/Endpoints/AdminAiQualityEndpoints.cs b/backend/src/Api/Endpoints/AdminAiQualityEndpoints.cs index 16e7865e..18499b6c 100644 --- a/backend/src/Api/Endpoints/AdminAiQualityEndpoints.cs +++ b/backend/src/Api/Endpoints/AdminAiQualityEndpoints.cs @@ -40,6 +40,7 @@ public static void MapAdminAiQualityEndpoints(this WebApplication app) group.MapGet("/models", GetModels); group.MapPost("/models/{id:guid}/promote", PromoteModel); group.MapPost("/models/{feature}/rollback", RollbackModel); + group.MapGet("/budgets", GetBudgets); } // Phase 7 DoD gate (AI-046): A/B the single-call baseline vs the full FieldCrew on the same brief+source over @@ -692,6 +693,48 @@ private static async Task RollbackModel( return Results.Ok(ToDto(result)); } + // Phase 12 (RLOps slice 4): per-feature daily budget status. Display reads the live in-memory + // ISpendTracker (NOT the sampled traces — those undercount). The feature set = every explicitly + // budgeted feature UNION every feature with a configured route (Ai:Routes), so the tab shows + // routed features even before they have a budget. Budgets are OFF by default → all rows show + // mode "off", $0 budget, 0% used. + private static IResult GetBudgets( + ISpendTracker tracker, + TextStack.Ai.Llm.BudgetOptions budgets, + IConfiguration config) + { + var features = new SortedSet(StringComparer.Ordinal); + foreach (var f in budgets.ConfiguredFeatures) + features.Add(f); + foreach (var route in config.GetSection("Ai:Routes").GetChildren()) + features.Add(route.Key); + + var rows = features.Select(f => + { + var dailyBudget = budgets.DailyUsdFor(f); + var mode = budgets.ModeFor(f); + // Only read the tracker (which lazily seeds a DB sum) for features actually under a + // budget — a read-only display endpoint must not seed/touch buckets for every routed + // feature on each page load (QA P2). Unbudgeted features report $0/off without a hit. + var enforced = mode != TextStack.Ai.Llm.BudgetMode.Off && dailyBudget is { } d && d > 0m; + var spend = enforced ? tracker.SpentTodayUsd(f) : 0m; + var pctUsed = dailyBudget is { } b && b > 0m ? (double)(spend / b) : 0d; + var inFallback = mode == TextStack.Ai.Llm.BudgetMode.Fallback + && dailyBudget is { } db && spend >= db; + + return new BudgetStatusDto( + FeatureTag: f, + TodaySpendUsd: spend, + DailyBudgetUsd: dailyBudget, + PctUsed: pctUsed, + Mode: mode.ToString().ToLowerInvariant(), + FallbackKey: budgets.FallbackKeyFor(f), + InFallback: inFallback); + }).ToList(); + + return Results.Ok((IReadOnlyList)rows); + } + private static ModelPromotionResultDto ToDto(Application.Ai.ModelPromotionResult r) => new( r.FeatureTag, diff --git a/backend/src/Api/Middleware/ExceptionMiddleware.cs b/backend/src/Api/Middleware/ExceptionMiddleware.cs index f288cd07..d391cfa5 100644 --- a/backend/src/Api/Middleware/ExceptionMiddleware.cs +++ b/backend/src/Api/Middleware/ExceptionMiddleware.cs @@ -1,5 +1,6 @@ using System.Text.Json; using Domain.Exceptions; +using TextStack.Ai.Core; namespace Api.Middleware; @@ -38,6 +39,10 @@ private async Task HandleExceptionAsync(HttpContext context, Exception exception DomainException ex => (StatusCodes.Status400BadRequest, new ErrorResponse(ex.Code, ex.Message)), + // Per-feature daily budget hit in hard-stop mode (Phase 12 RLOps slice 4). + BudgetExceededException ex => (StatusCodes.Status429TooManyRequests, + new ErrorResponse("BUDGET_EXCEEDED", ex.Message)), + _ => HandleUnexpectedException(exception) }; diff --git a/backend/src/Api/appsettings.json b/backend/src/Api/appsettings.json index 121c259b..b8ab039f 100644 --- a/backend/src/Api/appsettings.json +++ b/backend/src/Api/appsettings.json @@ -95,6 +95,13 @@ "Routes": {}, "SampleRates": {}, "TimeoutSeconds": 15 + }, + "_BudgetsComment": "Per-feature daily (UTC) USD budget. OFF by default (Default:Mode=off + empty Features). Enable per feature: Features:{feature}:{DailyUsd (>0), Fallback (cheaper provider key, e.g. 'ollama'), Mode ('fallback'|'hardstop')}. fallback reroutes over budget; hardstop returns HTTP 429. Spend is process-local + UTC-daily, lazy-seeded from sampled llm_traces on restart (approximate).", + "Budgets": { + "Default": { + "Mode": "off" + }, + "Features": {} } }, "FileUpload": { diff --git a/backend/src/Application/Ai/RollingSpendTracker.cs b/backend/src/Application/Ai/RollingSpendTracker.cs new file mode 100644 index 00000000..db9a6756 --- /dev/null +++ b/backend/src/Application/Ai/RollingSpendTracker.cs @@ -0,0 +1,202 @@ +using System.Collections.Concurrent; +using Application.Auth; +using Application.Common.Interfaces; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using TextStack.Ai.Core; +using TextStack.Ai.Llm; + +namespace Application.Ai; + +/// +/// In-memory, process-local per-feature daily (UTC) spend tracker (Phase 12 RLOps slice 4). +/// Backs the ModelGateway's cost-aware routing + per-feature daily budget. Singleton. +/// +/// Storage is a lock-free of feature → a bucket +/// holding the UTC day + the day's spend as long micro-dollars (cost × 1e6, rounded), so +/// increments are a single with no decimal contention. +/// +/// Day rollover: both and compare the bucket's UTC +/// date (via the injected ) to today; a stale bucket is atomically swapped +/// for a fresh zeroed one before use, so a new day starts at 0. +/// +/// Lazy seed: the FIRST touch of a (feature, today) bucket queries llm_traces for today's +/// summed cost on a fresh DI scope and scales it by 1/sampleRate for that feature (traces are +/// sampled — e.g. explain at 0.1 → multiply by 10). This APPROXIMATELY recovers spend accrued before +/// a process restart; it is an estimate, not exact (sampling variance + the in-flight window between +/// the seed query and the first post-restart Record). Runs at most once per bucket and never throws. +/// +/// 80% alert: when an increment pushes a budgeted feature from below to ≥ 80% of its daily budget, +/// one admin alert email is fired (fire-and-forget, deduped per (feature, day)). All side effects are +/// wrapped in try/catch — this lives on the LLM hot path and the fire-and-forget completion path, so +/// it NEVER throws: fails open to 0. +/// +public sealed class RollingSpendTracker( + IServiceScopeFactory scopeFactory, + TimeProvider timeProvider, + BudgetOptions budgets, + TracingOptions tracing, + ILogger logger) : ISpendTracker +{ + private const decimal MicrosPerDollar = 1_000_000m; + + private sealed class Bucket(DateOnly utc) + { + public DateOnly Utc { get; } = utc; + public long Micros; + public int Seeded; // 0 = not yet seeded; set to 1 via Interlocked.Exchange. + } + + private readonly ConcurrentDictionary _buckets = new(); + // Dedup key for the 80%-budget alert: at most one alert per (feature, UTC day). + private readonly ConcurrentDictionary<(string Feature, DateOnly Day), bool> _alerted = new(); + + public decimal SpentTodayUsd(string featureTag) + { + try + { + if (string.IsNullOrWhiteSpace(featureTag)) + return 0m; + var bucket = GetTodayBucket(featureTag); + return Interlocked.Read(ref bucket.Micros) / MicrosPerDollar; + } + catch (Exception ex) + { + // Fail open: a tracker glitch must never block a paid feature. + logger.LogDebug(ex, "SpentTodayUsd failed for feature '{Feature}'; treating as 0", featureTag); + return 0m; + } + } + + public void Record(string featureTag, decimal costUsd) + { + try + { + if (string.IsNullOrWhiteSpace(featureTag) || costUsd <= 0m) + return; + + var bucket = GetTodayBucket(featureTag); + var micros = (long)Math.Round(costUsd * MicrosPerDollar, MidpointRounding.AwayFromZero); + if (micros <= 0) + return; + + var before = Interlocked.Read(ref bucket.Micros); + var after = Interlocked.Add(ref bucket.Micros, micros); + + MaybeAlert(featureTag, bucket.Utc, before, after); + } + catch (Exception ex) + { + logger.LogDebug(ex, "Record failed for feature '{Feature}'", featureTag); + } + } + + /// Fetch the bucket for (feature, today), rolling a stale day over to a fresh zeroed + /// bucket and lazily seeding it from llm_traces exactly once. + private Bucket GetTodayBucket(string featureTag) + { + var today = DateOnly.FromDateTime(timeProvider.GetUtcNow().UtcDateTime); + + var bucket = _buckets.AddOrUpdate( + featureTag, + _ => new Bucket(today), + (_, existing) => existing.Utc == today ? existing : new Bucket(today)); + + // Seed at most once per bucket (the very first touch of a (feature, today) pair). + if (Interlocked.Exchange(ref bucket.Seeded, 1) == 0) + SeedFromTraces(featureTag, bucket); + + return bucket; + } + + /// Query today's summed trace cost and scale by 1/sampleRate to estimate true spend + /// (traces are sampled). Best-effort: any failure leaves the bucket at its current value. + private void SeedFromTraces(string featureTag, Bucket bucket) + { + try + { + var utcMidnight = new DateTimeOffset(bucket.Utc.ToDateTime(TimeOnly.MinValue), TimeSpan.Zero); + + decimal sampledCost; + using (var scope = scopeFactory.CreateScope()) + { + var db = scope.ServiceProvider.GetRequiredService(); + sampledCost = db.LlmTraces + .Where(t => t.FeatureTag == featureTag && t.CreatedAt >= utcMidnight) + .Sum(t => (decimal?)t.CostUsd) ?? 0m; + } + + if (sampledCost <= 0m) + return; + + // Scale up by 1/sampleRate (sampled traces under-count). Guard a 0/negative/insane rate. + var rate = tracing.RateFor(featureTag); + if (rate <= 0d || rate > 1d) + rate = 1d; + var estimated = sampledCost / (decimal)rate; + + var micros = (long)Math.Round(estimated * MicrosPerDollar, MidpointRounding.AwayFromZero); + if (micros > 0) + Interlocked.Add(ref bucket.Micros, micros); + } + catch (Exception ex) + { + // Seed is an approximation; if it fails the bucket simply starts from live Records. + logger.LogDebug(ex, "Spend seed from traces failed for feature '{Feature}'", featureTag); + } + } + + /// If this increment pushed a budgeted feature across the 80% line (and we haven't + /// alerted yet today), fire one admin alert email. Fully best-effort. + private void MaybeAlert(string featureTag, DateOnly day, long beforeMicros, long afterMicros) + { + try + { + if (budgets.ModeFor(featureTag) == BudgetMode.Off) + return; + if (budgets.DailyUsdFor(featureTag) is not { } dailyUsd) + return; + + var thresholdMicros = (long)Math.Round(0.8m * dailyUsd * MicrosPerDollar, MidpointRounding.AwayFromZero); + // Only on the crossing edge: was below, now at/above. + if (beforeMicros >= thresholdMicros || afterMicros < thresholdMicros) + return; + + // Dedup: one alert per (feature, day). + if (!_alerted.TryAdd((featureTag, day), true)) + return; + + var spent = afterMicros / MicrosPerDollar; + FireAlert(featureTag, dailyUsd, spent); + } + catch (Exception ex) + { + logger.LogDebug(ex, "Budget alert check failed for feature '{Feature}'", featureTag); + } + } + + private void FireAlert(string featureTag, decimal dailyUsd, decimal spentUsd) + { + var pct = dailyUsd > 0m ? Math.Round(spentUsd / dailyUsd * 100m, 1) : 0m; + var subject = $"[TextStack AI] '{featureTag}' at {pct}% of daily budget"; + var html = + $"

Feature {featureTag} has spent ${spentUsd:0.######} of its " + + $"${dailyUsd:0.######} daily LLM budget ({pct}%).

" + + "

At 100% the feature's budget mode takes effect (fallback or hard-stop).

"; + + _ = Task.Run(async () => + { + try + { + using var scope = scopeFactory.CreateScope(); + var email = scope.ServiceProvider.GetRequiredService(); + await email.SendAdminAlertAsync(subject, html, CancellationToken.None); + } + catch (Exception ex) + { + logger.LogWarning(ex, "Failed to send 80% budget alert for feature '{Feature}'", featureTag); + } + }); + } +} diff --git a/backend/src/Application/DependencyInjection.cs b/backend/src/Application/DependencyInjection.cs index 39c43726..085c77ed 100644 --- a/backend/src/Application/DependencyInjection.cs +++ b/backend/src/Application/DependencyInjection.cs @@ -122,6 +122,38 @@ public static IServiceCollection AddApplication(this IServiceCollection services // Singleton (caches one snapshot of the `models` registry); hot-path safe + never throws. services.AddSingleton(); + // Cost-aware routing policy (Ai:Budgets, Phase 12 RLOps slice 4). OFF by default + // (empty Features + Default mode Off → no enforcement). A budgeted feature can hard-stop + // (→429) or reroute to a cheaper fallback provider once over its daily USD cap. + services.AddSingleton(sp => + { + var c = sp.GetRequiredService(); + + global::TextStack.Ai.Llm.BudgetMode ParseMode(string? raw) => + Enum.TryParse(raw, ignoreCase: true, out var m) + ? m + : global::TextStack.Ai.Llm.BudgetMode.Off; + + var defaultMode = ParseMode(c["Ai:Budgets:Default:Mode"]); + var def = new global::TextStack.Ai.Llm.FeatureBudget(null, null, defaultMode); + + var features = new Dictionary(); + foreach (var section in c.GetSection("Ai:Budgets:Features").GetChildren()) + { + var daily = section.GetValue("DailyUsd"); + var fallback = section["Fallback"]; + var mode = ParseMode(section["Mode"]); + features[section.Key] = new global::TextStack.Ai.Llm.FeatureBudget(daily, fallback, mode); + } + + return new global::TextStack.Ai.Llm.BudgetOptions(def, features); + }); + + // Per-feature daily spend tracker (in-memory, UTC rollover, lazy-seeds from llm_traces). + // Singleton: the gateway reads/records spend on every call; lives behind the never-throw + // ISpendTracker contract. Registered BEFORE the gateway (the gateway depends on it). + services.AddSingleton(); + // One-click promote / rollback of the primary model for a feature. services.AddScoped(); @@ -134,6 +166,8 @@ public static IServiceCollection AddApplication(this IServiceCollection services sp.GetRequiredService(), sp.GetRequiredService(), sp.GetRequiredService(), + sp.GetRequiredService(), + sp.GetRequiredService(), sp.GetRequiredService>())); // Embeddings (Phase 4 RAG). Single OpenAI provider; resolved lazily so a keyless diff --git a/backend/src/Contracts/Admin/AiQualityDtos.cs b/backend/src/Contracts/Admin/AiQualityDtos.cs index 8fb6d4dc..ecabc30e 100644 --- a/backend/src/Contracts/Admin/AiQualityDtos.cs +++ b/backend/src/Contracts/Admin/AiQualityDtos.cs @@ -180,6 +180,20 @@ public record ModelRegistrationDto( /// The models registry payload (whole table; tiny). public record ModelsRegistryDto(IReadOnlyList Models); +/// Per-feature budget status for the Budgets tab (Phase 12 RLOps slice 4). Spend is read +/// from the live in-memory tracker (NOT the sampled traces — those undercount). PctUsed is +/// 0 when the feature has no budget. InFallback is true only when the mode is fallback AND +/// today's spend is at/over the budget (i.e. calls are currently being rerouted to the cheaper +/// provider). Mode is the lowercased string: "off" | "fallback" | "hardstop". +public record BudgetStatusDto( + string FeatureTag, + decimal TodaySpendUsd, + decimal? DailyBudgetUsd, + double PctUsed, + string Mode, + string? FallbackKey, + bool InFallback); + /// Result of a promote/rollback: the new Primary, the model demoted to Shadow /// (null if there was none), the audited action ("Promote"/"Rollback") + admin + time. public record ModelPromotionResultDto( diff --git a/tests/TextStack.IntegrationTests/AdminBudgetEndpointTests.cs b/tests/TextStack.IntegrationTests/AdminBudgetEndpointTests.cs new file mode 100644 index 00000000..42ebf64e --- /dev/null +++ b/tests/TextStack.IntegrationTests/AdminBudgetEndpointTests.cs @@ -0,0 +1,72 @@ +using System.Net; +using System.Text.Json; + +namespace TextStack.IntegrationTests; + +/// +/// Integration tests for the admin per-feature budget endpoint (Phase 12 RLOps slice 4), against +/// the live API on the admin host (textstack.dev). Budgets are OFF by default, so the regression +/// guarantee is that GET /admin/ai-quality/budgets returns a well-formed list of rows (one per +/// routed/configured feature) with mode "off" and a $0/0% budget — and that the endpoint is +/// admin-gated (401 without auth). Authed assertions need the fixture's test user to be admin; +/// otherwise AdminAuth → 401/403 and the test is skipped rather than false-passing. +/// +/// To run: `docker compose up` (API on :8080) with `ENABLE_TEST_AUTH=true`; runs in CI. +/// +public class AdminBudgetEndpointTests : IClassFixture +{ + private readonly AuthenticatedApiFixture _fixture; + + public AdminBudgetEndpointTests(AuthenticatedApiFixture fixture) => _fixture = fixture; + + [Fact] + public async Task GetBudgets_NoAuth_Unauthorized() + { + var request = new HttpRequestMessage(HttpMethod.Get, "/admin/ai-quality/budgets"); + request.Headers.Host = AuthenticatedApiFixture.AdminHost; + + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.SkipWhen(response.StatusCode is HttpStatusCode.NotFound, "endpoint not deployed"); + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task GetBudgets_Authed_BudgetsOffByDefault_ReturnsWellFormedRows() + { + Assert.SkipUnless(_fixture.IsAuthenticated, "auth unavailable"); + + var request = _fixture.CreateAdminRequest(HttpMethod.Get, "/admin/ai-quality/budgets"); + var response = await _fixture.Client.SendAsync(request, TestContext.Current.CancellationToken); + + Assert.SkipWhen(IntegrationSkip.Unavailable(response), "endpoint not deployed"); + Assert.SkipWhen( + response.StatusCode is HttpStatusCode.Unauthorized or HttpStatusCode.Forbidden, + "test user is not admin"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + using var doc = JsonDocument.Parse( + await response.Content.ReadAsStringAsync(TestContext.Current.CancellationToken)); + var root = doc.RootElement; + Assert.Equal(JsonValueKind.Array, root.ValueKind); + + // Budgets are OFF by default → every row reports mode "off", no budget, 0% used, + // not in fallback. The list itself is well-formed (one row per routed feature). + foreach (var row in root.EnumerateArray()) + { + Assert.True(row.TryGetProperty("featureTag", out var feature)); + Assert.Equal(JsonValueKind.String, feature.ValueKind); + + Assert.True(row.TryGetProperty("mode", out var mode)); + Assert.Equal("off", mode.GetString()); + + Assert.True(row.TryGetProperty("todaySpendUsd", out _)); + + Assert.True(row.TryGetProperty("pctUsed", out var pct)); + Assert.Equal(0d, pct.GetDouble()); + + Assert.True(row.TryGetProperty("inFallback", out var inFallback)); + Assert.False(inFallback.GetBoolean()); + } + } +} diff --git a/tests/TextStack.UnitTests/BudgetOptionsTests.cs b/tests/TextStack.UnitTests/BudgetOptionsTests.cs new file mode 100644 index 00000000..48446bae --- /dev/null +++ b/tests/TextStack.UnitTests/BudgetOptionsTests.cs @@ -0,0 +1,90 @@ +using TextStack.Ai.Llm; + +namespace TextStack.UnitTests; + +/// +/// Pure lookup behaviour for the cost-aware routing policy (Phase 12 RLOps slice 4): +/// feature → default → off precedence, DailyUsd absent/≤0 disables, mode resolution. +/// +public class BudgetOptionsTests +{ + private static BudgetOptions Build(BudgetMode defaultMode, params (string Feature, decimal? Daily, string? Fallback, BudgetMode Mode)[] features) + { + var dict = features.ToDictionary( + f => f.Feature, + f => new FeatureBudget(f.Daily, f.Fallback, f.Mode)); + return new BudgetOptions(new FeatureBudget(null, null, defaultMode), dict); + } + + [Fact] + public void Empty_AllOff_NoCaps() + { + var b = BudgetOptions.Empty; + Assert.Equal(BudgetMode.Off, b.ModeFor("explain")); + Assert.Null(b.DailyUsdFor("explain")); + Assert.Null(b.FallbackKeyFor("explain")); + } + + [Fact] + public void ModeFor_KnownFeature_ReturnsFeatureMode() + { + var b = Build(BudgetMode.Off, ("explain", 5m, "ollama", BudgetMode.Fallback)); + Assert.Equal(BudgetMode.Fallback, b.ModeFor("explain")); + } + + [Fact] + public void ModeFor_UnknownFeature_InheritsDefaultMode() + { + var b = Build(BudgetMode.HardStop, ("explain", 5m, null, BudgetMode.Fallback)); + Assert.Equal(BudgetMode.HardStop, b.ModeFor("translate")); // unknown → default + Assert.Equal(BudgetMode.HardStop, b.ModeFor(null)); + } + + [Fact] + public void DailyUsdFor_PositiveCap_ReturnsCap() + { + var b = Build(BudgetMode.Off, ("explain", 12.50m, null, BudgetMode.HardStop)); + Assert.Equal(12.50m, b.DailyUsdFor("explain")); + } + + [Theory] + [InlineData(null)] + [InlineData(0.0)] + [InlineData(-3.0)] + public void DailyUsdFor_AbsentOrNonPositive_Disabled(double? raw) + { + decimal? daily = raw is null ? null : (decimal)raw.Value; + var b = Build(BudgetMode.Off, ("explain", daily, null, BudgetMode.HardStop)); + Assert.Null(b.DailyUsdFor("explain")); + } + + [Fact] + public void DailyUsdFor_UnknownFeature_Null() + { + var b = Build(BudgetMode.Fallback, ("explain", 5m, null, BudgetMode.Fallback)); + Assert.Null(b.DailyUsdFor("translate")); // unknown never has a cap + } + + [Fact] + public void FallbackKeyFor_Whitespace_Null() + { + var b = Build(BudgetMode.Off, ("explain", 5m, " ", BudgetMode.Fallback)); + Assert.Null(b.FallbackKeyFor("explain")); + } + + [Fact] + public void FallbackKeyFor_Configured_ReturnsKey() + { + var b = Build(BudgetMode.Off, ("explain", 5m, "ollama", BudgetMode.Fallback)); + Assert.Equal("ollama", b.FallbackKeyFor("explain")); + } + + [Fact] + public void ConfiguredFeatures_ReturnsExplicitKeys() + { + var b = Build(BudgetMode.Off, + ("explain", 5m, null, BudgetMode.HardStop), + ("translate", 2m, "ollama", BudgetMode.Fallback)); + Assert.Equal(new[] { "explain", "translate" }, b.ConfiguredFeatures.OrderBy(x => x).ToArray()); + } +} diff --git a/tests/TextStack.UnitTests/ModelGatewayBudgetTests.cs b/tests/TextStack.UnitTests/ModelGatewayBudgetTests.cs new file mode 100644 index 00000000..d683af2f --- /dev/null +++ b/tests/TextStack.UnitTests/ModelGatewayBudgetTests.cs @@ -0,0 +1,278 @@ +using System.Runtime.CompilerServices; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging.Abstractions; +using TextStack.Ai.Core; +using TextStack.Ai.Llm; + +namespace TextStack.UnitTests; + +/// +/// Cost-aware routing + per-feature daily budget enforcement in ModelGateway (Phase 12 RLOps +/// slice 4). Under budget → true primary; over + fallback (registered) → fallback key; over + +/// fallback (unregistered) → primary (no break); over + hardstop → BudgetExceededException; mode +/// off → primary regardless; tracker throws → primary (no break). Record fires exactly once per +/// call; the shadow self-skip still compares the TRUE primary even when a budget redirect happens. +/// +public class ModelGatewayBudgetTests +{ + private static LlmRequest Req(string feature = "explain") => + new("sys", Array.Empty(), 10, FeatureTag: feature); + + [Fact] + public async Task UnderBudget_RoutesToTruePrimary() + { + var (gateway, _) = Build(spent: 1.0m, dailyUsd: 5m, mode: BudgetMode.Fallback, fallbackRegistered: true); + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("primary", resp.ModelId); + } + + [Fact] + public async Task OverBudget_FallbackMode_RegisteredFallback_RoutesToFallback() + { + var (gateway, _) = Build(spent: 9.0m, dailyUsd: 5m, mode: BudgetMode.Fallback, fallbackRegistered: true); + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("fallback", resp.ModelId); + } + + [Fact] + public async Task OverBudget_FallbackMode_UnregisteredFallback_RoutesToPrimary_NoBreak() + { + var (gateway, _) = Build(spent: 9.0m, dailyUsd: 5m, mode: BudgetMode.Fallback, fallbackRegistered: false); + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("primary", resp.ModelId); + } + + [Fact] + public async Task OverBudget_HardStop_Throws() + { + var (gateway, _) = Build(spent: 9.0m, dailyUsd: 5m, mode: BudgetMode.HardStop, fallbackRegistered: false); + var ex = await Assert.ThrowsAsync( + () => gateway.CompleteAsync(Req(), CancellationToken.None)); + Assert.Equal("explain", ex.FeatureTag); + Assert.Equal(5m, ex.DailyBudgetUsd); + } + + [Fact] + public async Task ModeOff_OverBudget_RoutesToPrimary() + { + var (gateway, _) = Build(spent: 999m, dailyUsd: 5m, mode: BudgetMode.Off, fallbackRegistered: true); + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("primary", resp.ModelId); + } + + [Fact] + public async Task TrackerThrows_RoutesToPrimary_NoBreak() + { + var (gateway, _) = Build( + spent: 9m, dailyUsd: 5m, mode: BudgetMode.HardStop, fallbackRegistered: true, + tracker: new ThrowingSpendTracker()); + // Tracker throws on read → budget logic swallows → true primary, NO BudgetExceededException. + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("primary", resp.ModelId); + } + + [Fact] + public async Task Record_CalledExactlyOnce_PerCompleteAsync() + { + var (gateway, tracker) = Build(spent: 0m, dailyUsd: 5m, mode: BudgetMode.Fallback, fallbackRegistered: true); + await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal(1, tracker.RecordCalls); + Assert.Equal("explain", tracker.LastRecordedFeature); + } + + [Fact] + public async Task Record_CalledOnce_EvenWhenBudgetRedirects() + { + var (gateway, tracker) = Build(spent: 9m, dailyUsd: 5m, mode: BudgetMode.Fallback, fallbackRegistered: true); + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("fallback", resp.ModelId); // served by fallback + Assert.Equal(1, tracker.RecordCalls); // still recorded exactly once + } + + [Fact] + public async Task Record_CalledExactlyOnce_PerStreamAsync() + { + var (gateway, tracker) = Build(spent: 0m, dailyUsd: 5m, mode: BudgetMode.Fallback, fallbackRegistered: true); + await foreach (var _ in gateway.StreamAsync(Req(), CancellationToken.None)) { } + Assert.Equal(1, tracker.RecordCalls); + Assert.Equal("explain", tracker.LastRecordedFeature); + // Cost recorded is the streamed FinalUsage cost (0.001), not zero. + Assert.Equal(0.001m, tracker.LastRecordedCost); + } + + [Fact] + public async Task Record_NotCalled_WhenStreamThrowsMidStream() + { + var cfg = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + ["Ai:DefaultProvider"] = "openai", + ["Ai:Routes:explain"] = "openai", + }) + .Build(); + + var services = new ServiceCollection(); + services.AddKeyedSingleton("openai", new ThrowingStreamLlm()); + var sp = services.BuildServiceProvider(); + + var shadow = new ShadowOptions(0.0, new Dictionary(), null, 15); + var budgets = new BudgetOptions( + new FeatureBudget(null, null, BudgetMode.Off), + new Dictionary { ["explain"] = new(5m, "ollama", BudgetMode.Fallback) }); + var tracker = new StubSpendTracker(0m); + var gateway = new ModelGateway( + sp, cfg, sp.GetRequiredService(), shadow, + new StubRouteProvider(), tracker, budgets, NullLogger.Instance); + + await Assert.ThrowsAsync(async () => + { + await foreach (var _ in gateway.StreamAsync(Req(), CancellationToken.None)) { } + }); + + Assert.Equal(0, tracker.RecordCalls); // mid-stream throw → spend NOT recorded + } + + // ShadowKey == TRUE primary key → self-skip must hold even though the budget redirected the + // serving route to the fallback. The shadow compares the TRUE primary, not the budget fallback. + [Fact] + public async Task BudgetRedirect_DoesNotAffectShadowSelfSkip() + { + // Registry/config primary for "explain" = "openai". Shadow route = "openai" (same as primary) + // → self-shadow skip. Budget pushes the SERVING route to "fallback", but shadow still compares + // against the true primary "openai" and skips. + var cfg = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + ["Ai:DefaultProvider"] = "openai", + ["Ai:Routes:explain"] = "openai", + }) + .Build(); + + var writer = new CountingShadowWriter(); + var services = new ServiceCollection(); + services.AddKeyedSingleton("openai", new KeyEcho("primary")); + services.AddKeyedSingleton("openai-raw", new KeyEcho("primary-raw")); + services.AddKeyedSingleton("fallback", new KeyEcho("fallback")); + services.AddScoped(_ => writer); + var sp = services.BuildServiceProvider(); + + // Shadow route points at "openai" == true primary → must self-skip. + var shadow = new ShadowOptions(1.0, + new Dictionary { ["explain"] = "openai" }, null, 15); + var budgets = new BudgetOptions( + new FeatureBudget(null, null, BudgetMode.Off), + new Dictionary + { + ["explain"] = new(5m, "fallback", BudgetMode.Fallback), + }); + var tracker = new StubSpendTracker(9m); // over budget → redirect to fallback + + var gateway = new ModelGateway( + sp, cfg, sp.GetRequiredService(), shadow, + new StubRouteProvider(), tracker, budgets, NullLogger.Instance); + + var resp = await gateway.CompleteAsync(Req(), CancellationToken.None); + Assert.Equal("fallback", resp.ModelId); // budget redirected the serving route + + await Task.Delay(50, TestContext.Current.CancellationToken); + Assert.Equal(0, writer.Count); // self-shadow skipped (shadow key == TRUE primary) + } + + // ---- harness ---- + + private static (ModelGateway Gateway, StubSpendTracker Tracker) Build( + decimal spent, decimal dailyUsd, BudgetMode mode, bool fallbackRegistered, + ISpendTracker? tracker = null) + { + var cfg = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + ["Ai:DefaultProvider"] = "openai", + ["Ai:Routes:explain"] = "openai", + }) + .Build(); + + var services = new ServiceCollection(); + services.AddKeyedSingleton("openai", new KeyEcho("primary")); + if (fallbackRegistered) + services.AddKeyedSingleton("ollama", new KeyEcho("fallback")); + var sp = services.BuildServiceProvider(); + + var shadow = new ShadowOptions(0.0, new Dictionary(), null, 15); + var budgets = new BudgetOptions( + new FeatureBudget(null, null, BudgetMode.Off), + new Dictionary + { + ["explain"] = new(dailyUsd, "ollama", mode), + }); + + var stub = new StubSpendTracker(spent); + var gateway = new ModelGateway( + sp, cfg, sp.GetRequiredService(), shadow, + new StubRouteProvider(), tracker ?? stub, budgets, NullLogger.Instance); + return (gateway, stub); + } + + private sealed class StubRouteProvider : IModelRouteProvider + { + public string? PrimaryProviderKey(string featureTag) => null; + public void Invalidate() { } + } + + private sealed class StubSpendTracker(decimal spent) : ISpendTracker + { + public int RecordCalls { get; private set; } + public string? LastRecordedFeature { get; private set; } + public decimal LastRecordedCost { get; private set; } + public decimal SpentTodayUsd(string featureTag) => spent; + public void Record(string featureTag, decimal costUsd) + { + RecordCalls++; + LastRecordedFeature = featureTag; + LastRecordedCost = costUsd; + } + } + + private sealed class ThrowingStreamLlm : ILlmService + { + public Task CompleteAsync(LlmRequest request, CancellationToken ct) => + throw new NotSupportedException(); + + public async IAsyncEnumerable StreamAsync(LlmRequest request, [EnumeratorCancellation] CancellationToken ct) + { + yield return new LlmDelta(TextDelta: "partial"); + await Task.Yield(); + throw new InvalidOperationException("stream boom"); + } + } + + private sealed class ThrowingSpendTracker : ISpendTracker + { + public decimal SpentTodayUsd(string featureTag) => throw new InvalidOperationException("boom"); + public void Record(string featureTag, decimal costUsd) { } + } + + private sealed class KeyEcho(string id) : ILlmService + { + public Task CompleteAsync(LlmRequest request, CancellationToken ct) => + Task.FromResult(new LlmResponse("", Array.Empty(), new LlmUsage(1, 1, 0.001m), id, Guid.NewGuid())); + + public async IAsyncEnumerable StreamAsync(LlmRequest request, [EnumeratorCancellation] CancellationToken ct) + { + await Task.CompletedTask; + yield return new LlmDelta(FinalUsage: new LlmUsage(1, 1, 0.001m), ModelId: id); + } + } + + private sealed class CountingShadowWriter : IShadowRunWriter + { + private int _count; + public int Count => Volatile.Read(ref _count); + public Task WriteAsync(ShadowRun run, CancellationToken ct) + { + Interlocked.Increment(ref _count); + return Task.CompletedTask; + } + } +} diff --git a/tests/TextStack.UnitTests/ModelGatewayShadowTests.cs b/tests/TextStack.UnitTests/ModelGatewayShadowTests.cs index 4d892e9a..22367a3f 100644 --- a/tests/TextStack.UnitTests/ModelGatewayShadowTests.cs +++ b/tests/TextStack.UnitTests/ModelGatewayShadowTests.cs @@ -257,7 +257,8 @@ public async Task MaybeShadow_ShadowKeyEqualsPrimaryKey_SkipsShadow() TimeoutSeconds: 15); var routes = new StubRouteProvider(new() { ["explain"] = "openai-explain" }); - var gateway = new ModelGateway(spy, cfg, spy.ScopeFactory, opts, routes, NullLogger.Instance); + var gateway = new ModelGateway(spy, cfg, spy.ScopeFactory, opts, routes, + new NoopSpendTracker(), BudgetOptions.Empty, NullLogger.Instance); var result = await gateway.CompleteAsync(Req(), CancellationToken.None); Assert.Equal("primary", result.Text); @@ -374,7 +375,8 @@ private ModelGateway Build( TimeoutSeconds: timeoutSeconds); var routes = routeProvider ?? new StubRouteProvider(); - return new ModelGateway(spy, cfg, spy.ScopeFactory, opts, routes, NullLogger.Instance); + return new ModelGateway(spy, cfg, spy.ScopeFactory, opts, routes, + new NoopSpendTracker(), BudgetOptions.Empty, NullLogger.Instance); } /// Registry route provider returning a fixed map (empty = no registry hit). @@ -386,6 +388,13 @@ private sealed class StubRouteProvider(Dictionary? routes = null public void Invalidate() { } } + /// No-op tracker: budgets are off for the shadow tests. + private sealed class NoopSpendTracker : ISpendTracker + { + public decimal SpentTodayUsd(string featureTag) => 0m; + public void Record(string featureTag, decimal costUsd) { } + } + // ---- fakes ---- private sealed class FakeShadowRunWriter(bool blockUntilReleased = false) : IShadowRunWriter diff --git a/tests/TextStack.UnitTests/ModelGatewayTests.cs b/tests/TextStack.UnitTests/ModelGatewayTests.cs index a798ef4c..e65a1ab5 100644 --- a/tests/TextStack.UnitTests/ModelGatewayTests.cs +++ b/tests/TextStack.UnitTests/ModelGatewayTests.cs @@ -33,8 +33,17 @@ private static ModelGateway BuildGateway() var shadow = new ShadowOptions(0.0, new Dictionary(), null, 15); // No registry routes → falls through to the config route (these tests cover config routing). var routes = new StubRouteProvider(); + // Budgets OFF → routing unaffected (budget routing covered in ModelGatewayBudgetTests). return new ModelGateway( - sp, cfg, sp.GetRequiredService(), shadow, routes, NullLogger.Instance); + sp, cfg, sp.GetRequiredService(), shadow, routes, + new NoopSpendTracker(), BudgetOptions.Empty, NullLogger.Instance); + } + + /// No-op tracker: budgets are off for these routing-precedence tests. + private sealed class NoopSpendTracker : ISpendTracker + { + public decimal SpentTodayUsd(string featureTag) => 0m; + public void Record(string featureTag, decimal costUsd) { } } /// Registry route provider returning a fixed map (empty = no registry hit). @@ -83,7 +92,8 @@ private static ModelGateway BuildGateway(IModelRouteProvider routes) var shadow = new ShadowOptions(0.0, new Dictionary(), null, 15); return new ModelGateway( - sp, cfg, sp.GetRequiredService(), shadow, routes, NullLogger.Instance); + sp, cfg, sp.GetRequiredService(), shadow, routes, + new NoopSpendTracker(), BudgetOptions.Empty, NullLogger.Instance); } [Fact] diff --git a/tests/TextStack.UnitTests/RollingSpendTrackerTests.cs b/tests/TextStack.UnitTests/RollingSpendTrackerTests.cs new file mode 100644 index 00000000..b81b0950 --- /dev/null +++ b/tests/TextStack.UnitTests/RollingSpendTrackerTests.cs @@ -0,0 +1,328 @@ +using Application.Ai; +using Application.Auth; +using Application.Common.Interfaces; +using Domain.Entities; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging.Abstractions; +using Moq; +using TextStack.Ai.Llm; + +namespace TextStack.UnitTests; + +/// +/// In-memory per-feature daily spend tracking (Phase 12 RLOps slice 4): Interlocked increments, +/// UTC day rollover via TimeProvider, lazy seed scaled by 1/sampleRate, the 80% alert (once on +/// crossing + dedup same day + refires next day), and best-effort failure swallowing. +/// +public class RollingSpendTrackerTests +{ + private static readonly DateTimeOffset Day1 = new(2026, 6, 18, 10, 0, 0, TimeSpan.Zero); + private static readonly DateTimeOffset Day2 = new(2026, 6, 19, 10, 0, 0, TimeSpan.Zero); + + // ---- Record + SpentTodayUsd ---- + + [Fact] + public void Record_Increments_SpentTodaySums() + { + var t = Build(out _, out _); + t.Record("explain", 0.5m); + t.Record("explain", 0.25m); + Assert.Equal(0.75m, t.SpentTodayUsd("explain")); + } + + [Fact] + public void SpentTodayUsd_UnknownFeature_Zero() + { + var t = Build(out _, out _); + Assert.Equal(0m, t.SpentTodayUsd("never-touched")); + } + + [Fact] + public void Record_Concurrent_SumsExactly_ViaInterlocked() + { + var t = Build(out _, out _); + Parallel.For(0, 1000, _ => t.Record("explain", 0.001m)); + Assert.Equal(1.0m, t.SpentTodayUsd("explain")); + } + + [Fact] + public void Record_NanoCost_MicroDollarRoundTrip_Precise() + { + var t = Build(out _, out _); + // 0.000001 USD = exactly 1 micro-dollar; 1000 of them = 0.001 USD. + for (var i = 0; i < 1000; i++) + t.Record("explain", 0.000001m); + Assert.Equal(0.001m, t.SpentTodayUsd("explain")); + } + + // ---- Day rollover ---- + + [Fact] + public void DayRollover_ResetsSpend() + { + var clock = new FakeClock(Day1); + var t = Build(out _, out _, clock: clock); + + t.Record("explain", 2.0m); + Assert.Equal(2.0m, t.SpentTodayUsd("explain")); + + clock.Now = Day2; // new UTC day + Assert.Equal(0m, t.SpentTodayUsd("explain")); // fresh bucket + t.Record("explain", 0.3m); + Assert.Equal(0.3m, t.SpentTodayUsd("explain")); + } + + // ---- Lazy seed ---- + + [Fact] + public void LazySeed_ScalesBy_InverseSampleRate() + { + // explain sampled at 0.1 → seed = tracedSum * 10. + var traces = new List + { + Trace("explain", 0.05m, Day1), + Trace("explain", 0.05m, Day1), + }; + var tracing = new TracingOptions(1.0, new Dictionary { ["explain"] = 0.1 }); + var t = Build(out _, out _, clock: new FakeClock(Day1), traces: traces, tracing: tracing); + + // 0.10 traced * (1/0.1) = 1.00 estimated. + Assert.Equal(1.0m, t.SpentTodayUsd("explain")); + } + + [Fact] + public void LazySeed_DefaultRate_NoScale() + { + var traces = new List { Trace("translate", 0.4m, Day1) }; + var t = Build(out _, out _, clock: new FakeClock(Day1), traces: traces); + Assert.Equal(0.4m, t.SpentTodayUsd("translate")); // default rate 1.0 → unscaled + } + + [Fact] + public void LazySeed_RunsOnce_ThenLiveRecordsAddOnTop() + { + var traces = new List { Trace("explain", 0.05m, Day1) }; + var tracing = new TracingOptions(1.0, new Dictionary { ["explain"] = 0.1 }); + var t = Build(out var scopeFactory, out _, clock: new FakeClock(Day1), traces: traces, tracing: tracing); + + Assert.Equal(0.5m, t.SpentTodayUsd("explain")); // 0.05 * 10 (seeded) + t.Record("explain", 1.0m); + Assert.Equal(1.5m, t.SpentTodayUsd("explain")); // seed not re-applied + Assert.Equal(1, scopeFactory.ScopesCreated); // seed queried DB exactly once + } + + // ---- 80% alert ---- + + [Fact] + public void Alert_FiresOnce_OnCrossing_80Percent() + { + var email = new Mock(); + var t = Build(out _, out email, budgets: Budget("explain", daily: 10m, BudgetMode.HardStop), clock: new FakeClock(Day1)); + + t.Record("explain", 7.0m); // 70% — below threshold, no alert + t.Record("explain", 1.5m); // crosses to 85% — ONE alert + + Eventually(() => email.Verify(e => e.SendAdminAlertAsync( + It.IsAny(), It.IsAny(), It.IsAny()), Times.Once)); + } + + [Fact] + public void Alert_ManyCallsOverThreshold_ExactlyOneEmail_NoStorm() + { + // Edge-trigger + dedup: once over 80%, hammering with 100 more over-budget Records + // must NOT spam — exactly one email total. + var t = Build(out _, out var email, budgets: Budget("explain", 10m, BudgetMode.HardStop), clock: new FakeClock(Day1)); + + t.Record("explain", 8.5m); // crosses 80% → the single alert + for (var i = 0; i < 100; i++) + t.Record("explain", 1.0m); // all well over 80%/100% — must not re-alert + + // Let any wrongly-scheduled alerts run, then assert exactly one. + Thread.Sleep(100); + email.Verify(e => e.SendAdminAlertAsync( + It.IsAny(), It.IsAny(), It.IsAny()), Times.Once); + } + + [Fact] + public void Alert_ConcurrentCrossing_ExactlyOneEmail() + { + // 200 parallel Records that together cross 80% — dedup must still yield exactly one email. + var t = Build(out _, out var email, budgets: Budget("explain", 10m, BudgetMode.Fallback), clock: new FakeClock(Day1)); + Parallel.For(0, 200, _ => t.Record("explain", 0.1m)); // sums to $20, well over 80% + + Thread.Sleep(100); + email.Verify(e => e.SendAdminAlertAsync( + It.IsAny(), It.IsAny(), It.IsAny()), Times.Once); + } + + [Fact] + public void Alert_DedupsSameDay() + { + var t = Build(out _, out var email, budgets: Budget("explain", 10m, BudgetMode.Fallback), clock: new FakeClock(Day1)); + + t.Record("explain", 8.5m); // crosses 80% → alert + t.Record("explain", 1.0m); // already alerted today → no second alert + + Eventually(() => email.Verify(e => e.SendAdminAlertAsync( + It.IsAny(), It.IsAny(), It.IsAny()), Times.Once)); + } + + [Fact] + public void Alert_Refires_NextDay() + { + var clock = new FakeClock(Day1); + var t = Build(out _, out var email, budgets: Budget("explain", 10m, BudgetMode.HardStop), clock: clock); + + t.Record("explain", 9.0m); // day1 alert + clock.Now = Day2; + t.Record("explain", 9.0m); // day2 alert (fresh dedup) + + Eventually(() => email.Verify(e => e.SendAdminAlertAsync( + It.IsAny(), It.IsAny(), It.IsAny()), Times.Exactly(2))); + } + + [Fact] + public void Alert_NotFired_WhenBudgetOff() + { + var t = Build(out _, out var email, budgets: BudgetOptions.Empty, clock: new FakeClock(Day1)); + t.Record("explain", 1000m); + // Give any (wrongly) scheduled alert a chance, then assert none. + Thread.Sleep(50); + email.Verify(e => e.SendAdminAlertAsync( + It.IsAny(), It.IsAny(), It.IsAny()), Times.Never); + } + + [Fact] + public void SeedFailure_Swallowed_SpentTreatedAsZero() + { + // A scope factory whose DbContext throws on access → seed fails → bucket starts at 0. + var scopeFactory = new ThrowingScopeFactory(); + var t = new RollingSpendTracker( + scopeFactory, new FakeClock(Day1), BudgetOptions.Empty, + new TracingOptions(), NullLogger.Instance); + + Assert.Equal(0m, t.SpentTodayUsd("explain")); // no throw + t.Record("explain", 0.5m); + Assert.Equal(0.5m, t.SpentTodayUsd("explain")); // live record still works + } + + // ---- harness ---- + + private static void Eventually(Action assert, int timeoutMs = 2000) + { + var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs); + while (true) + { + try { assert(); return; } + catch (MockException) when (DateTime.UtcNow < deadline) { Thread.Sleep(20); } + } + } + + private static BudgetOptions Budget(string feature, decimal daily, BudgetMode mode) => + new(new FeatureBudget(null, null, BudgetMode.Off), + new Dictionary { [feature] = new(daily, "ollama", mode) }); + + private static LlmTrace Trace(string feature, decimal cost, DateTimeOffset at) => new() + { + Id = Guid.NewGuid(), + FeatureTag = feature, + ModelId = "m", + PromptHash = "h", + MessagesJson = "[]", + CostUsd = cost, + CreatedAt = at, + }; + + private static RollingSpendTracker Build( + out RecordingScopeFactory scopeFactory, + out Mock email, + BudgetOptions? budgets = null, + FakeClock? clock = null, + List? traces = null, + TracingOptions? tracing = null) + { + email = new Mock(); + email.Setup(e => e.SendAdminAlertAsync(It.IsAny(), It.IsAny(), It.IsAny())) + .Returns(Task.CompletedTask); + + scopeFactory = new RecordingScopeFactory(traces ?? new List(), email.Object); + + return new RollingSpendTracker( + scopeFactory, + clock ?? new FakeClock(Day1), + budgets ?? BudgetOptions.Empty, + tracing ?? new TracingOptions(), + NullLogger.Instance); + } + + private sealed class FakeClock(DateTimeOffset now) : TimeProvider + { + public DateTimeOffset Now { get; set; } = now; + public override DateTimeOffset GetUtcNow() => Now; + } + + // A scope factory that resolves IAppDbContext (Moq-backed LlmTraces) + IEmailService. + private sealed class RecordingScopeFactory(List traces, IEmailService email) : IServiceScopeFactory + { + private int _scopes; + public int ScopesCreated => Volatile.Read(ref _scopes); + + public IServiceScope CreateScope() + { + Interlocked.Increment(ref _scopes); + return new Scope(traces, email); + } + + private sealed class Scope(List traces, IEmailService email) : IServiceScope + { + public IServiceProvider ServiceProvider { get; } = BuildProvider(traces, email); + public void Dispose() { } + + private static IServiceProvider BuildProvider(List traces, IEmailService email) + { + var db = new Mock(); + db.Setup(x => x.LlmTraces).Returns(() => FakeSet(traces).Object); + + var services = new ServiceCollection(); + services.AddScoped(_ => db.Object); + services.AddScoped(_ => email); + return services.BuildServiceProvider(); + } + } + } + + private sealed class ThrowingScopeFactory : IServiceScopeFactory + { + public IServiceScope CreateScope() => new Scope(); + + private sealed class Scope : IServiceScope + { + public IServiceProvider ServiceProvider { get; } + public Scope() + { + var db = new Mock(); + db.Setup(x => x.LlmTraces).Throws(new InvalidOperationException("db down")); + var services = new ServiceCollection(); + services.AddScoped(_ => db.Object); + ServiceProvider = services.BuildServiceProvider(); + } + public void Dispose() { } + } + } + + private static Mock> FakeSet(List data) where T : class + { + var q = new TestAsyncEnumerable(data); + var set = new Mock>(); + var iq = set.As>(); + iq.Setup(m => m.Provider).Returns(((IQueryable)q).Provider); + iq.Setup(m => m.Expression).Returns(((IQueryable)q).Expression); + iq.Setup(m => m.ElementType).Returns(((IQueryable)q).ElementType); + iq.Setup(m => m.GetEnumerator()).Returns(() => data.GetEnumerator()); + set.As>() + .Setup(m => m.GetAsyncEnumerator(It.IsAny())) + .Returns(() => new TestAsyncEnumerator(data.GetEnumerator())); + return set; + } +}