From cb6e83c55920943bb86e629af88d040cdf3f0e83 Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Wed, 29 Apr 2026 18:37:46 +1000
Subject: [PATCH 1/5] Add Browser AI API

---
 ui/src/components/ModelPicker/model-utils.ts  |    1 +
 ui/src/components/WasmSetup/BrowserAiCard.tsx |  138 ++
 ui/src/components/WasmSetup/WasmSetup.tsx     |   40 +-
 .../components/WasmSetup/WasmSetupGuard.tsx   |   81 +-
 ui/src/pages/providers/shared.tsx             |    4 +
 ui/src/service-worker/browser-ai.ts           | 1196 +++++++++++++++++
 ui/src/service-worker/sw.ts                   |   56 +-
 ui/src/services/browser-ai/availability.ts    |   58 +
 ui/src/services/browser-ai/bridge.ts          |  196 +++
 ui/src/services/browser-ai/index.ts           |   17 +
 ui/src/services/browser-ai/types.ts           |   98 ++
 11 files changed, 1879 insertions(+), 6 deletions(-)
 create mode 100644 ui/src/components/WasmSetup/BrowserAiCard.tsx
 create mode 100644 ui/src/service-worker/browser-ai.ts
 create mode 100644 ui/src/services/browser-ai/availability.ts
 create mode 100644 ui/src/services/browser-ai/bridge.ts
 create mode 100644 ui/src/services/browser-ai/index.ts
 create mode 100644 ui/src/services/browser-ai/types.ts
diff --git a/ui/src/components/ModelPicker/model-utils.ts b/ui/src/components/ModelPicker/model-utils.ts
index e6a5415..8752619 100644
--- a/ui/src/components/ModelPicker/model-utils.ts
+++ b/ui/src/components/ModelPicker/model-utils.ts
@@ -137,6 +137,7 @@ const PROVIDER_LABELS: Record<string, string> = {
   qwen: "Qwen",
   openrouter: "OpenRouter",
   test: "Test",
+  browser: "Browser AI",
 };
 
 export function getProviderInfo(
diff --git a/ui/src/components/WasmSetup/BrowserAiCard.tsx b/ui/src/components/WasmSetup/BrowserAiCard.tsx
new file mode 100644
index 0000000..44e6dfe
--- /dev/null
+++ b/ui/src/components/WasmSetup/BrowserAiCard.tsx
@@ -0,0 +1,138 @@
+import { CheckCircle2, Cpu, Download, ExternalLink, Loader2, XCircle } from "lucide-react";
+import { Button } from "@/components/Button/Button";
+import { cn } from "@/utils/cn";
+import type { LanguageModelAvailability } from "@/services/browser-ai";
+
+export interface BrowserAiState {
+  /** True if `globalThis.LanguageModel` is exposed by this browser. */
+  supported: boolean;
+  availability: LanguageModelAvailability;
+  /** 0..1, only meaningful while a download is in progress. */
+  downloadProgress: number | null;
+  /** True while we are actively triggering or awaiting a download. */
+  downloading: boolean;
+  error: string | null;
+}
+
+interface BrowserAiCardProps {
+  state: BrowserAiState;
+  onDownload: () => void;
+  className?: string;
+}
+
+export function BrowserAiCard({ state, onDownload, className }: BrowserAiCardProps) {
+  if (!state.supported) {
+    return (
+      <div className={cn("rounded-lg border border-border bg-muted/30 p-4", className)}>
+        <div className="flex items-center justify-between gap-3">
+          <div className="min-w-0">
+            <p className="text-sm font-medium">Browser AI</p>
+            <p className="text-xs text-muted-foreground">
+              On-device model running locally in your browser
+            </p>
+          </div>
+          <div className="flex shrink-0 items-center gap-1.5 text-xs text-muted-foreground">
+            <XCircle className="h-3.5 w-3.5" />
+            Not supported
+          </div>
+        </div>
+        <p className="mt-2 text-xs text-muted-foreground">
+          Open this page in Chrome 148+ (or recent Edge / Brave / other Chromium).{" "}
+          <a
+            href="https://developer.chrome.com/docs/ai/get-started"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-primary underline"
+          >
+            docs
+            <ExternalLink className="ml-0.5 inline h-3 w-3" />
+          </a>
+        </p>
+      </div>
+    );
+  }
+
+  const isReady = state.availability === "available";
+  const isDownloading = state.downloading || state.availability === "downloading";
+  const isDownloadable = state.availability === "downloadable" && !isDownloading;
+  const progressPercent =
+    state.downloadProgress != null
+      ? Math.max(0, Math.min(100, state.downloadProgress * 100))
+      : null;
+
+  return (
+    <div
+      className={cn(
+        "rounded-lg border p-4",
+        isReady
+          ? "border-emerald-200 bg-emerald-50/60 dark:border-emerald-500/20 dark:bg-emerald-500/5"
+          : "border-sky-200 bg-sky-50/60 dark:border-sky-500/20 dark:bg-sky-500/5",
+        className
+      )}
+    >
+      <div className="flex items-center justify-between gap-3">
+        <div className="min-w-0">
+          <p className="text-sm font-medium">Browser AI</p>
+          <p className="text-xs text-muted-foreground">
+            Runs locally on-device. Private, free, no API key.
+          </p>
+        </div>
+        {isReady ? (
+          <div className="flex shrink-0 items-center gap-1.5 text-sm text-emerald-700 dark:text-emerald-400">
+            <CheckCircle2 className="h-4 w-4" />
+            Ready
+          </div>
+        ) : isDownloading ? (
+          <div className="flex shrink-0 items-center gap-1.5 text-xs text-sky-700 dark:text-sky-400">
+            <Loader2 className="h-3.5 w-3.5 animate-spin" />
+            {progressPercent != null ? `${progressPercent.toFixed(0)}%` : "Downloading"}
+          </div>
+        ) : isDownloadable ? (
+          <Button
+            size="sm"
+            onClick={onDownload}
+            className="shrink-0 bg-sky-600 text-white hover:bg-sky-700 dark:bg-sky-600 dark:hover:bg-sky-500"
+          >
+            <Download className="mr-1.5 h-3.5 w-3.5" />
+            Download
+          </Button>
+        ) : (
+          <div className="flex shrink-0 items-center gap-1.5 text-xs text-muted-foreground">
+            <Cpu className="h-3.5 w-3.5" />
+            Unavailable
+          </div>
+        )}
+      </div>
+
+      {isDownloading && progressPercent != null && (
+        <div
+          className="mt-3 h-1 w-full overflow-hidden rounded-full bg-sky-100 dark:bg-sky-500/10"
+          role="progressbar"
+          aria-valuenow={progressPercent}
+          aria-valuemin={0}
+          aria-valuemax={100}
+          aria-label="Browser AI model download progress"
+        >
+          <div
+            className="h-full rounded-full bg-sky-600 transition-[width] duration-200 dark:bg-sky-500"
+            style={{ width: `${progressPercent}%` }}
+          />
+        </div>
+      )}
+
+      {state.error && (
+        <p className="mt-2 flex items-start gap-1.5 text-xs text-destructive">
+          <XCircle className="mt-0.5 h-3 w-3 shrink-0" />
+          {state.error}
+        </p>
+      )}
+
+      {state.availability === "unavailable" && !state.error && (
+        <p className="mt-2 text-xs text-muted-foreground">
+          The browser exposes the API but reports the device as ineligible (typically not enough
+          memory, storage, or GPU). The model will appear here once your environment qualifies.
+        </p>
+      )}
+    </div>
+  );
+}
diff --git a/ui/src/components/WasmSetup/WasmSetup.tsx b/ui/src/components/WasmSetup/WasmSetup.tsx
index 69c56dc..8ec881f 100644
--- a/ui/src/components/WasmSetup/WasmSetup.tsx
+++ b/ui/src/components/WasmSetup/WasmSetup.tsx
@@ -33,6 +33,7 @@ import { Input } from "@/components/Input/Input";
 import { FormField } from "@/components/FormField/FormField";
 import { HadrianIcon } from "@/components/HadrianIcon/HadrianIcon";
 import { startOpenRouterOAuth, isInIframe } from "./openrouter-oauth";
+import { BrowserAiCard, type BrowserAiState } from "./BrowserAiCard";
 import { cn } from "@/utils/cn";
 
 import { formatApiError } from "@/utils/formatApiError";
@@ -115,6 +116,8 @@ export function WasmSetup({
   ollamaConnecting,
   ollamaConnected,
   onOllamaConnect,
+  browserAi,
+  onBrowserAiDownload,
 }: {
   open: boolean;
   onComplete: () => void;
@@ -125,6 +128,8 @@ export function WasmSetup({
   ollamaConnecting?: boolean;
   ollamaConnected?: boolean;
   onOllamaConnect?: () => void;
+  browserAi?: BrowserAiState;
+  onBrowserAiDownload?: () => void;
 }) {
   const [step, setStep] = useState<Step>("welcome");
   const [entries, setEntries] = useState<ProviderEntry[]>(initialEntries);
@@ -244,10 +249,12 @@ export function WasmSetup({
     }
   }, []);
 
+  const browserAiReady = browserAi?.availability === "available";
   const savedCount =
     entries.filter((e) => e.saved).length +
     (hasExistingOpenRouter ? 1 : 0) +
-    (hasExistingOllama ? 1 : 0);
+    (hasExistingOllama ? 1 : 0) +
+    (browserAiReady ? 1 : 0);
   const hasAnySaved = savedCount > 0;
 
   return (
@@ -266,6 +273,8 @@ export function WasmSetup({
           onOllamaConnect={onOllamaConnect}
           existingProviders={existingProviders}
           onDeleteExisting={handleDeleteExisting}
+          browserAi={browserAi}
+          onBrowserAiDownload={onBrowserAiDownload}
         />
       )}
       {step === "providers" && (
@@ -290,6 +299,8 @@ export function WasmSetup({
           onOllamaConnect={onOllamaConnect}
           existingProviders={existingProviders}
           onDeleteExisting={handleDeleteExisting}
+          browserAi={browserAi}
+          onBrowserAiDownload={onBrowserAiDownload}
         />
       )}
       {step === "done" && <DoneStep savedCount={savedCount} onComplete={onComplete} />}
@@ -310,6 +321,8 @@ function WelcomeStep({
   onOllamaConnect,
   existingProviders,
   onDeleteExisting,
+  browserAi,
+  onBrowserAiDownload,
 }: {
   onNext: () => void;
   onReady: () => void;
@@ -323,8 +336,11 @@ function WelcomeStep({
   onOllamaConnect?: () => void;
   existingProviders?: DynamicProviderResponse[];
   onDeleteExisting: (id: string) => void;
+  browserAi?: BrowserAiState;
+  onBrowserAiDownload?: () => void;
 }) {
-  const hasProvider = hasExistingOpenRouter || hasExistingOllama;
+  const hasBrowserAiReady = browserAi?.availability === "available";
+  const hasProvider = hasExistingOpenRouter || hasExistingOllama || hasBrowserAiReady;
   return (
     <>
       <ModalHeader>
@@ -457,6 +473,14 @@ function WelcomeStep({
           </div>
         )}
 
+        {browserAi && (
+          <BrowserAiCard
+            state={browserAi}
+            onDownload={onBrowserAiDownload ?? (() => {})}
+            className="mt-3"
+          />
+        )}
+
         <p className="text-sm text-muted-foreground mt-4">
           {hasProvider
             ? "You can also add API keys from OpenAI, Anthropic, or other providers."
@@ -524,6 +548,8 @@ function ProvidersStep({
   onOllamaConnect,
   existingProviders,
   onDeleteExisting,
+  browserAi,
+  onBrowserAiDownload,
 }: {
   entries: ProviderEntry[];
   onUpdate: (key: string, update: Partial<ProviderEntry>) => void;
@@ -545,6 +571,8 @@ function ProvidersStep({
   onOllamaConnect?: () => void;
   existingProviders?: DynamicProviderResponse[];
   onDeleteExisting: (id: string) => void;
+  browserAi?: BrowserAiState;
+  onBrowserAiDownload?: () => void;
 }) {
   return (
     <>
@@ -636,6 +664,14 @@ function ProvidersStep({
           </div>
         ) : null}
 
+        {browserAi && (
+          <BrowserAiCard
+            state={browserAi}
+            onDownload={onBrowserAiDownload ?? (() => {})}
+            className="mb-4"
+          />
+        )}
+
         <div className="space-y-5">
           {entries.map((entry) => (
             <ProviderKeyEntry
diff --git a/ui/src/components/WasmSetup/WasmSetupGuard.tsx b/ui/src/components/WasmSetup/WasmSetupGuard.tsx
index 4e29a81..e059a56 100644
--- a/ui/src/components/WasmSetup/WasmSetupGuard.tsx
+++ b/ui/src/components/WasmSetup/WasmSetupGuard.tsx
@@ -15,12 +15,19 @@ import {
   apiV1ModelsQueryKey,
 } from "@/api/generated/@tanstack/react-query.gen";
 import { WasmSetup } from "./WasmSetup";
+import type { BrowserAiState } from "./BrowserAiCard";
 import { formatApiError } from "@/utils/formatApiError";
 import {
   getOpenRouterCallbackCode,
   clearCallbackCode,
   exchangeCodeForKey,
 } from "./openrouter-oauth";
+import {
+  getAvailability,
+  getLanguageModel,
+  installBrowserAiBridge,
+  isLanguageModelSupported,
+} from "@/services/browser-ai";
 
 const IS_WASM = import.meta.env.VITE_WASM_MODE === "true";
 const DISMISSED_KEY = "hadrian-wasm-setup-dismissed";
@@ -64,6 +71,13 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
   const [ollamaDetected, setOllamaDetected] = useState(false);
   const [ollamaConnecting, setOllamaConnecting] = useState(false);
   const [ollamaConnected, setOllamaConnected] = useState(false);
+  const [browserAi, setBrowserAi] = useState<BrowserAiState>(() => ({
+    supported: IS_WASM ? isLanguageModelSupported() : false,
+    availability: "unavailable",
+    downloadProgress: null,
+    downloading: false,
+    error: null,
+  }));
   const queryClient = useQueryClient();
 
   const createProvider = useMutation({ ...meProvidersCreateMutation() });
@@ -85,6 +99,62 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
     return () => controller.abort();
   }, []);
 
+  // Install the LanguageModel bridge so the WASM service worker can reach
+  // the on-device Prompt API (only exposed in window scope), and surface the
+  // current availability state for the UI.
+  useEffect(() => {
+    if (!IS_WASM) return;
+    if (!isLanguageModelSupported()) return;
+    const uninstall = installBrowserAiBridge();
+    let cancelled = false;
+    getAvailability().then((state) => {
+      if (cancelled) return;
+      setBrowserAi((prev) => ({ ...prev, availability: state }));
+    });
+    return () => {
+      cancelled = true;
+      uninstall();
+    };
+  }, []);
+
+  const handleBrowserAiDownload = useCallback(async () => {
+    const lm = getLanguageModel();
+    if (!lm) return;
+    setBrowserAi((prev) => ({
+      ...prev,
+      downloading: true,
+      downloadProgress: 0,
+      availability: "downloading",
+      error: null,
+    }));
+    try {
+      const session = await lm.create({
+        monitor(m) {
+          m.addEventListener("downloadprogress", (event) => {
+            setBrowserAi((prev) => ({ ...prev, downloadProgress: event.loaded }));
+          });
+        },
+      });
+      session.destroy();
+      const next = await getAvailability();
+      setBrowserAi((prev) => ({
+        ...prev,
+        availability: next,
+        downloading: false,
+        downloadProgress: null,
+      }));
+      queryClient.invalidateQueries({ queryKey: apiV1ModelsQueryKey() });
+    } catch (err) {
+      setBrowserAi((prev) => ({
+        ...prev,
+        downloading: false,
+        downloadProgress: null,
+        availability: "downloadable",
+        error: formatApiError(err),
+      }));
+    }
+  }, [queryClient]);
+
   const handleOllamaConnect = useCallback(async () => {
     setOllamaConnecting(true);
     try {
@@ -165,8 +235,13 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
     return <WasmSetupContext.Provider value={contextValue}>{children}</WasmSetupContext.Provider>;
   }
 
-  // Auto-show: no providers and not previously dismissed
-  const needsOnboarding = !dismissed && !isLoading && (data?.data?.length ?? 0) === 0;
+  // Auto-show: no providers and not previously dismissed. Browser AI counts
+  // as a provider once the model is ready locally, since requests against
+  // it succeed without any setup the wizard could prompt for.
+  const dynamicProviderCount = data?.data?.length ?? 0;
+  const browserAiCounts = browserAi.supported && browserAi.availability === "available";
+  const needsOnboarding =
+    !dismissed && !isLoading && dynamicProviderCount === 0 && !browserAiCounts;
 
   return (
     <WasmSetupContext.Provider value={contextValue}>
@@ -181,6 +256,8 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
         ollamaConnecting={ollamaConnecting}
         ollamaConnected={ollamaConnected}
         onOllamaConnect={handleOllamaConnect}
+        browserAi={browserAi}
+        onBrowserAiDownload={handleBrowserAiDownload}
       />
     </WasmSetupContext.Provider>
   );
diff --git a/ui/src/pages/providers/shared.tsx b/ui/src/pages/providers/shared.tsx
index e764f93..b91154a 100644
--- a/ui/src/pages/providers/shared.tsx
+++ b/ui/src/pages/providers/shared.tsx
@@ -246,4 +246,8 @@ export const PROVIDER_COLORS: Record<string, ProviderColorEntry> = {
     solid: "bg-gray-500",
     badge: "bg-gray-500/10 text-gray-700 dark:text-gray-400",
   },
+  browser: {
+    solid: "bg-sky-500",
+    badge: "bg-sky-500/10 text-sky-700 dark:text-sky-400",
+  },
 };
diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts
new file mode 100644
index 0000000..c8433c5
--- /dev/null
+++ b/ui/src/service-worker/browser-ai.ts
@@ -0,0 +1,1196 @@
+/// <reference lib="webworker" />
+
+/**
+ * Service-worker side of the Browser AI integration. Intercepts requests for
+ * `browser/*` models and routes them through a postMessage bridge to the
+ * controlling window: the Prompt API global (`LanguageModel`) is only
+ * exposed in window/dedicated-worker scopes, not in service workers.
+ */
+
+import type { LanguageModelMessage } from "../services/browser-ai/types";
+import {
+  BROWSER_AI_PREFIX,
+  BROWSER_AI_PROVIDER,
+  detectBrowserAiModel,
+} from "../services/browser-ai/availability";
+
+declare const self: ServiceWorkerGlobalScope;
+
+export { BROWSER_AI_PREFIX };
+
+interface BridgeAvailabilityReply {
+  type: "AVAILABILITY";
+  state: "available" | "downloadable" | "downloading" | "unavailable";
+}
+
+type BridgeReply =
+  | BridgeAvailabilityReply
+  | { type: "DOWNLOAD_PROGRESS"; loaded: number }
+  | { type: "DELTA"; text: string }
+  | { type: "DONE"; inputTokens: number; outputTokens: number }
+  | { type: "ABORTED" }
+  | { type: "ERROR"; message: string };
+
+interface PromptToolDef {
+  name: string;
+  description?: string;
+  parameters?: object;
+}
+
+let availabilityCache: { state: BridgeAvailabilityReply["state"]; checkedAt: number } | null = null;
+const AVAILABILITY_TTL_MS = 60_000;
+
+export function isBrowserAiModel(model: unknown): boolean {
+  return typeof model === "string" && model.startsWith(BROWSER_AI_PREFIX);
+}
+
+async function getClient(clientId: string): Promise<Client | null> {
+  if (clientId) {
+    const direct = await self.clients.get(clientId);
+    if (direct) return direct;
+  }
+  const all = await self.clients.matchAll({ type: "window", includeUncontrolled: false });
+  return all[0] ?? null;
+}
+
+async function sendToBridge<T extends BridgeReply>(
+  client: Client,
+  payload:
+    | { type: "AVAILABILITY" }
+    | {
+        type: "PROMPT";
+        messages: LanguageModelMessage[];
+        temperature?: number;
+        topK?: number;
+        responseConstraint?: object;
+      },
+  onMessage?: (reply: BridgeReply, port: MessagePort) => boolean,
+  signal?: AbortSignal
+): Promise<T> {
+  return new Promise<T>((resolve, reject) => {
+    const channel = new MessageChannel();
+    const port = channel.port1;
+    let settled = false;
+
+    const cleanup = () => {
+      try {
+        port.close();
+      } catch {
+        // ignored
+      }
+      if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
+    };
+
+    const settle = (fn: () => void) => {
+      if (settled) return;
+      settled = true;
+      cleanup();
+      fn();
+    };
+
+    let abortHandler: (() => void) | null = null;
+    if (signal) {
+      abortHandler = () => {
+        try {
+          port.postMessage({ type: "ABORT" });
+        } catch {
+          // ignored
+        }
+        settle(() => reject(new DOMException("Aborted", "AbortError")));
+      };
+      if (signal.aborted) {
+        abortHandler();
+      } else {
+        signal.addEventListener("abort", abortHandler);
+      }
+    }
+
+    port.onmessage = (event: MessageEvent) => {
+      const reply = event.data as BridgeReply;
+      try {
+        if (onMessage) {
+          const finished = onMessage(reply, port);
+          if (finished) {
+            settle(() => resolve(reply as T));
+          }
+          return;
+        }
+        // No streaming handler: resolve on first reply.
+        settle(() => resolve(reply as T));
+      } catch (err) {
+        // Consumers signal terminal errors by throwing inside `onMessage`
+        // (e.g. on an ERROR reply). Catching here is critical: an uncaught
+        // throw inside a port.onmessage handler is silently swallowed, so
+        // without this the outer Promise would never settle and the SW
+        // request would hang until the page is reloaded.
+        settle(() => reject(err));
+      }
+    };
+    port.start();
+
+    try {
+      client.postMessage({ type: "BROWSER_AI_REQUEST", payload }, [channel.port2]);
+    } catch (err) {
+      settle(() => reject(err));
+    }
+  });
+}
+
+export async function getCachedAvailability(
+  clientId: string
+): Promise<BridgeAvailabilityReply["state"]> {
+  if (availabilityCache && Date.now() - availabilityCache.checkedAt < AVAILABILITY_TTL_MS) {
+    return availabilityCache.state;
+  }
+  const client = await getClient(clientId);
+  if (!client) return "unavailable";
+  try {
+    const reply = await sendToBridge<BridgeAvailabilityReply>(client, { type: "AVAILABILITY" });
+    if (reply.type === "AVAILABILITY") {
+      availabilityCache = { state: reply.state, checkedAt: Date.now() };
+      return reply.state;
+    }
+  } catch {
+    // Bridge unavailable.
+  }
+  return "unavailable";
+}
+
+/** Append the browser AI model to a `/v1/models` response when supported. */
+export async function augmentModelsResponse(
+  response: Response,
+  clientId: string
+): Promise<Response> {
+  let availability: BridgeAvailabilityReply["state"];
+  try {
+    availability = await getCachedAvailability(clientId);
+  } catch {
+    return response;
+  }
+  if (availability === "unavailable") return response;
+
+  let body: { data?: unknown[]; [k: string]: unknown };
+  try {
+    body = await response.clone().json();
+  } catch {
+    return response;
+  }
+  const data = Array.isArray(body.data) ? body.data : [];
+
+  const detected = detectBrowserAiModel();
+  const entry = {
+    id: detected.id,
+    object: "model",
+    created: 0,
+    owned_by: BROWSER_AI_PROVIDER,
+    source: "static",
+    description:
+      availability === "available"
+        ? `On-device ${detected.vendor} model, runs locally in your browser.`
+        : `On-device ${detected.vendor} model, runs locally in your browser. Downloads on first use.`,
+    capabilities: { tools: true, vision: false, streaming: true },
+    modalities: { input: ["text"], output: ["text"] },
+    tasks: ["chat"],
+  };
+
+  const augmented = { ...body, data: [...data, entry] };
+  const headers = new Headers(response.headers);
+  headers.delete("content-length");
+  return new Response(JSON.stringify(augmented), {
+    status: response.status,
+    statusText: response.statusText,
+    headers,
+  });
+}
+
+interface ToolDef {
+  type?: string;
+  name: string;
+  description?: string;
+  parameters?: object;
+}
+
+interface ResponsesPayload {
+  model: string;
+  input: Array<{
+    role?: string;
+    type?: string;
+    content?: string | Array<{ type: string; text?: string }>;
+    [k: string]: unknown;
+  }>;
+  stream?: boolean;
+  temperature?: number;
+  top_k?: number;
+  max_output_tokens?: number;
+  tools?: ToolDef[];
+}
+
+interface ChatCompletionsPayload {
+  model: string;
+  messages: Array<{ role: string; content: string | Array<{ type: string; text?: string }> }>;
+  stream?: boolean;
+  temperature?: number;
+  top_k?: number;
+  max_tokens?: number;
+  tools?: Array<{ type: string; function?: ToolDef }>;
+}
+
+function flattenContent(content: unknown): string {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    return content
+      .map((part) => {
+        if (typeof part === "string") return part;
+        if (part && typeof part === "object") {
+          const p = part as { type?: string; text?: string; value?: unknown };
+          if (p.type === "input_text" || p.type === "output_text" || p.type === "text") {
+            return p.text ?? "";
+          }
+        }
+        return "";
+      })
+      .join("");
+  }
+  return "";
+}
+
+function inputToMessages(input: ResponsesPayload["input"]): LanguageModelMessage[] {
+  const messages: LanguageModelMessage[] = [];
+
+  // function_call_output items reference a prior function_call by call_id.
+  // Build a lookup so we can render the result alongside the tool name in
+  // the synthetic conversation we feed to the Prompt API.
+  const callIdToName = new Map<string, string>();
+  for (const item of input) {
+    if (
+      item.type === "function_call" &&
+      typeof item.call_id === "string" &&
+      typeof item.name === "string"
+    ) {
+      callIdToName.set(item.call_id, item.name);
+    }
+  }
+
+  for (const item of input) {
+    if (item.type === "function_call") {
+      const name = typeof item.name === "string" ? item.name : "tool";
+      const args = typeof item.arguments === "string" ? item.arguments : "{}";
+      messages.push({
+        role: "assistant",
+        content: `<tool_call name="${name}">${args}</tool_call>`,
+      });
+      continue;
+    }
+    if (item.type === "function_call_output") {
+      const callId = typeof item.call_id === "string" ? item.call_id : "";
+      const name = callIdToName.get(callId) ?? "tool";
+      const output =
+        typeof item.output === "string" ? item.output : JSON.stringify(item.output ?? "");
+      messages.push({
+        role: "user",
+        content: `<tool_result name="${name}">${output}</tool_result>`,
+      });
+      continue;
+    }
+    if (item.type && item.type !== "message") continue;
+    const role = item.role;
+    if (role !== "system" && role !== "user" && role !== "assistant") continue;
+    const text = flattenContent(item.content);
+    if (!text) continue;
+    messages.push({ role, content: text });
+  }
+  return messages;
+}
+
+/** Convert OpenAI-style tool definitions into the bridge's payload shape. */
+function extractTools(body: ResponsesPayload | ChatCompletionsPayload): PromptToolDef[] {
+  const out: PromptToolDef[] = [];
+  for (const t of body.tools ?? []) {
+    if (!t) continue;
+    // Responses API wraps function tools flat: { type: "function", name, description, parameters }
+    // Chat completions wraps them: { type: "function", function: { name, description, parameters } }
+    const candidate =
+      "function" in t && t.function
+        ? t.function
+        : (t as { name?: string; description?: string; parameters?: object });
+    if (!candidate || typeof candidate.name !== "string") continue;
+    const tType = (t as { type?: string }).type;
+    if (tType && tType !== "function") continue;
+    out.push({
+      name: candidate.name,
+      description: candidate.description,
+      parameters: candidate.parameters,
+    });
+  }
+  return out;
+}
+
+/**
+ * Polyfill for the spec's native `tools` option, which Chrome rejects at
+ * `LanguageModel.create()` today. Instead of asking the model to emit
+ * `<tool_call>` markers in free text (which it mutates into markdown
+ * fences, drops closing tags, mixes with prose, etc), we describe the
+ * tools in the system prompt and force a JSON-shaped reply via
+ * `responseConstraint`. Chrome 137+ enforces the schema at decode time,
+ * which the swyx and dobidev write-ups identify as the only mechanism
+ * that reliably pins format on Gemini Nano.
+ */
+function injectToolPrompt(
+  messages: LanguageModelMessage[],
+  tools: PromptToolDef[]
+): LanguageModelMessage[] {
+  if (tools.length === 0) return messages;
+  const toolBlock = tools
+    .map((t) => {
+      const params = t.parameters ? JSON.stringify(t.parameters) : "{}";
+      const desc = t.description ?? "(no description)";
+      return `- ${t.name}: ${desc}\n  arguments schema: ${params}`;
+    })
+    .join("\n\n");
+
+  const instructions = [
+    "You can use tools. Your reply will be a JSON object with two optional fields:",
+    '  "tool_calls": list of tool invocations, each {"name": "...", "arguments": {...}}',
+    '  "text": plain-text reply to the user',
+    "",
+    "Use tool_calls when you need to run a tool. Use text when you have a final answer. You may use both.",
+    "",
+    "Available tools:",
+    "",
+    toolBlock,
+    "",
+    "Examples (these are entire valid replies):",
+    '{"tool_calls":[{"name":"code_interpreter","arguments":{"code":"print(\'hi\')"}}]}',
+    '{"tool_calls":[{"name":"code_interpreter","arguments":{"code":"import math\\nprint(math.pi)"}}]}',
+    '{"text":"Hello! How can I help?"}',
+    '{"text":"Let me check.","tool_calls":[{"name":"wikipedia","arguments":{"action":"search","query":"Einstein"}}]}',
+    "",
+    'Tool results arrive in the next user message in the form: <tool_result name="TOOL_NAME">...</tool_result>. After receiving tool results, reply with {"text":"..."} containing your final answer.',
+  ].join("\n");
+
+  const out = messages.slice();
+  const systemIdx = out.findIndex((m) => m.role === "system");
+  if (systemIdx >= 0) {
+    out[systemIdx] = {
+      role: "system",
+      content: `${out[systemIdx].content}\n\n${instructions}`,
+    };
+  } else {
+    out.unshift({ role: "system", content: instructions });
+  }
+  return out;
+}
+
+interface ParsedToolCall {
+  name: string;
+  arguments: string;
+}
+
+interface ParsedEnvelope {
+  toolCalls: ParsedToolCall[];
+  text: string;
+}
+
+/**
+ * Build the `responseConstraint` schema for a request that has tools.
+ * Constrains the model to a `{tool_calls?, text?}` object where every
+ * tool name comes from the supplied list. Argument schemas are kept as
+ * plain `object` to avoid tripping up Chrome's JSON Schema implementation
+ * with per-tool oneOf gymnastics; per-arg validation happens downstream
+ * in Hadrian's tool executors.
+ */
+function buildToolResponseSchema(tools: PromptToolDef[]): object {
+  const toolNames = tools.map((t) => t.name);
+  return {
+    type: "object",
+    properties: {
+      tool_calls: {
+        type: "array",
+        items: {
+          type: "object",
+          properties: {
+            name: { type: "string", enum: toolNames },
+            arguments: { type: "object" },
+          },
+          required: ["name", "arguments"],
+        },
+      },
+      text: { type: "string" },
+    },
+  };
+}
+
+/**
+ * Parse the constrained JSON envelope returned by the model. Returns
+ * empty arrays when the body fails to parse so callers can fall back to
+ * a retry path.
+ */
+function parseEnvelope(raw: string): ParsedEnvelope | null {
+  const trimmed = raw.trim();
+  if (!trimmed) return null;
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(trimmed);
+  } catch {
+    return null;
+  }
+  if (!parsed || typeof parsed !== "object") return null;
+  const obj = parsed as { tool_calls?: unknown; text?: unknown };
+  const toolCalls: ParsedToolCall[] = [];
+  if (Array.isArray(obj.tool_calls)) {
+    for (const entry of obj.tool_calls) {
+      if (!entry || typeof entry !== "object") continue;
+      const item = entry as { name?: unknown; arguments?: unknown };
+      if (typeof item.name !== "string") continue;
+      const args =
+        item.arguments && typeof item.arguments === "object"
+          ? (item.arguments as Record<string, unknown>)
+          : {};
+      toolCalls.push({ name: item.name, arguments: JSON.stringify(args) });
+    }
+  }
+  const text = typeof obj.text === "string" ? obj.text : "";
+  return { toolCalls, text };
+}
+
+function chatMessagesToBridge(
+  messages: ChatCompletionsPayload["messages"]
+): LanguageModelMessage[] {
+  const out: LanguageModelMessage[] = [];
+  for (const m of messages) {
+    if (m.role !== "system" && m.role !== "user" && m.role !== "assistant") continue;
+    const text = flattenContent(m.content);
+    if (!text) continue;
+    out.push({ role: m.role, content: text });
+  }
+  return out;
+}
+
+function jsonError(message: string, status = 503): Response {
+  return new Response(JSON.stringify({ error: { message, type: "browser_ai_error" } }), {
+    status,
+    headers: { "content-type": "application/json" },
+  });
+}
+
+function sseHeaders(): HeadersInit {
+  return {
+    "content-type": "text/event-stream; charset=utf-8",
+    "cache-control": "no-cache, no-transform",
+    "x-accel-buffering": "no",
+  };
+}
+
+function sseEvent(name: string, data: unknown): string {
+  return `event: ${name}\ndata: ${JSON.stringify(data)}\n\n`;
+}
+
+function genId(prefix: string): string {
+  return `${prefix}_${Math.random().toString(36).slice(2, 12)}${Date.now().toString(36)}`;
+}
+
+/** Handle `/api/v1/responses` for Browser AI. */
+export async function handleResponsesRequest(
+  request: Request,
+  body: ResponsesPayload,
+  clientId: string
+): Promise<Response> {
+  const client = await getClient(clientId);
+  if (!client) return jsonError("No active client to handle Browser AI request.");
+
+  let messages = inputToMessages(body.input ?? []);
+  if (messages.length === 0) {
+    return jsonError("Browser AI requires at least one text message.", 400);
+  }
+
+  const tools = extractTools(body);
+  const responseId = genId("resp");
+  const model = body.model;
+  const stream = body.stream !== false;
+
+  if (tools.length > 0) {
+    messages = injectToolPrompt(messages, tools);
+    return generateToolModeResponse(
+      client,
+      body,
+      messages,
+      tools,
+      request.signal,
+      responseId,
+      model,
+      stream
+    );
+  }
+
+  const messageItemId = genId("msg");
+  if (!stream) {
+    return generateNonStreamingResponse(
+      client,
+      body,
+      messages,
+      request.signal,
+      responseId,
+      messageItemId,
+      model
+    );
+  }
+  return generateStreamingResponse(
+    client,
+    body,
+    messages,
+    request.signal,
+    responseId,
+    messageItemId,
+    model
+  );
+}
+
+/**
+ * Tool-aware path. Buffers the full generated text from the bridge, parses
+ * `<tool_call>` markers, and emits either function_call output items or a
+ * single message item depending on what the model produced. Always wraps
+ * the result in the Responses-API event sequence so the chat UI sees its
+ * normal lifecycle, even though no text is streamed token-by-token.
+ */
+async function generateToolModeResponse(
+  client: Client,
+  body: ResponsesPayload,
+  messages: LanguageModelMessage[],
+  tools: PromptToolDef[],
+  signal: AbortSignal,
+  responseId: string,
+  model: string,
+  stream: boolean
+): Promise<Response> {
+  const schema = buildToolResponseSchema(tools);
+
+  async function runOnce(
+    msgs: LanguageModelMessage[]
+  ): Promise<{ raw: string; inputTokens: number; outputTokens: number }> {
+    let raw = "";
+    let inputTokens = 0;
+    let outputTokens = 0;
+    await sendToBridge<BridgeReply>(
+      client,
+      {
+        type: "PROMPT",
+        messages: msgs,
+        temperature: body.temperature,
+        topK: body.top_k,
+        responseConstraint: schema,
+      },
+      (reply) => {
+        if (reply.type === "DELTA") {
+          raw += reply.text;
+          return false;
+        }
+        if (reply.type === "DONE") {
+          inputTokens = reply.inputTokens;
+          outputTokens = reply.outputTokens;
+          return true;
+        }
+        if (reply.type === "ERROR") throw new Error(reply.message);
+        if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError");
+        return false;
+      },
+      signal
+    );
+    return { raw, inputTokens, outputTokens };
+  }
+
+  let raw: string;
+  let inputTokens: number;
+  let outputTokens: number;
+  try {
+    ({ raw, inputTokens, outputTokens } = await runOnce(messages));
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return jsonError(`Browser AI: ${message}`);
+  }
+
+  let envelope = parseEnvelope(raw);
+  // Parse-and-retry safety net. If the model returned text that doesn't
+  // fit the envelope (or fits but is empty), give it one more shot with
+  // an explicit reminder. Limited to a single retry to avoid loops.
+  const empty = !envelope || (envelope.toolCalls.length === 0 && !envelope.text);
+  if (empty) {
+    const retryMessages: LanguageModelMessage[] = [
+      ...messages,
+      { role: "assistant", content: raw || "(empty)" },
+      {
+        role: "user",
+        content:
+          "That reply did not match the required JSON shape. Reply ONLY with a JSON object: " +
+          '{"tool_calls":[{"name":"...","arguments":{...}}]} or {"text":"..."}. ' +
+          "Use the same tool names and argument schemas listed earlier.",
+      },
+    ];
+    try {
+      const retry = await runOnce(retryMessages);
+      raw = retry.raw;
+      inputTokens += retry.inputTokens;
+      outputTokens += retry.outputTokens;
+      envelope = parseEnvelope(raw);
+    } catch {
+      // Fall through with what we have.
+    }
+  }
+
+  const toolCalls = envelope?.toolCalls ?? [];
+  const text = envelope?.text ?? "";
+  const createdAt = Math.floor(Date.now() / 1000);
+
+  const outputItems: Array<Record<string, unknown>> = [];
+  for (const call of toolCalls) {
+    const fcId = genId("fc");
+    outputItems.push({
+      id: fcId,
+      type: "function_call",
+      call_id: fcId,
+      name: call.name,
+      arguments: call.arguments,
+      status: "completed",
+    });
+  }
+  if (text) {
+    outputItems.push({
+      id: genId("msg"),
+      type: "message",
+      role: "assistant",
+      status: "completed",
+      content: [{ type: "output_text", text }],
+    });
+  }
+  if (outputItems.length === 0) {
+    // Both retries produced nothing usable. Surface the raw output so the
+    // user sees what went wrong rather than an empty turn.
+    outputItems.push({
+      id: genId("msg"),
+      type: "message",
+      role: "assistant",
+      status: "completed",
+      content: [{ type: "output_text", text: raw.trim() || "(no response)" }],
+    });
+  }
+
+  const completedResponse = {
+    id: responseId,
+    object: "response",
+    created_at: createdAt,
+    status: "completed" as const,
+    model,
+    output: outputItems,
+    output_text: text,
+    usage: {
+      input_tokens: inputTokens,
+      output_tokens: outputTokens,
+      total_tokens: inputTokens + outputTokens,
+    },
+  };
+
+  if (!stream) {
+    return new Response(JSON.stringify(completedResponse), {
+      status: 200,
+      headers: { "content-type": "application/json" },
+    });
+  }
+
+  const encoder = new TextEncoder();
+  const sseStream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      const enqueue = (event: string, data: unknown) => {
+        controller.enqueue(encoder.encode(sseEvent(event, data)));
+      };
+
+      enqueue("response.created", {
+        type: "response.created",
+        response: { ...completedResponse, status: "in_progress", output: [] },
+      });
+
+      let outputIndex = 0;
+      for (const item of outputItems) {
+        const isFunctionCall = item.type === "function_call";
+        enqueue("response.output_item.added", {
+          type: "response.output_item.added",
+          output_index: outputIndex,
+          item: isFunctionCall
+            ? { ...item, arguments: "" }
+            : { ...item, status: "in_progress", content: [] },
+        });
+
+        if (isFunctionCall) {
+          enqueue("response.function_call_arguments.delta", {
+            type: "response.function_call_arguments.delta",
+            item_id: item.id,
+            output_index: outputIndex,
+            delta: item.arguments,
+          });
+          enqueue("response.function_call_arguments.done", {
+            type: "response.function_call_arguments.done",
+            item_id: item.id,
+            output_index: outputIndex,
+            arguments: item.arguments,
+          });
+        } else {
+          const text = (item.content as Array<{ text: string }>)[0]?.text ?? "";
+          enqueue("response.content_part.added", {
+            type: "response.content_part.added",
+            item_id: item.id,
+            output_index: outputIndex,
+            content_index: 0,
+            part: { type: "output_text", text: "" },
+          });
+          enqueue("response.output_text.delta", {
+            type: "response.output_text.delta",
+            item_id: item.id,
+            output_index: outputIndex,
+            content_index: 0,
+            delta: text,
+          });
+          enqueue("response.output_text.done", {
+            type: "response.output_text.done",
+            item_id: item.id,
+            output_index: outputIndex,
+            content_index: 0,
+            text,
+          });
+          enqueue("response.content_part.done", {
+            type: "response.content_part.done",
+            item_id: item.id,
+            output_index: outputIndex,
+            content_index: 0,
+            part: { type: "output_text", text },
+          });
+        }
+
+        enqueue("response.output_item.done", {
+          type: "response.output_item.done",
+          output_index: outputIndex,
+          item,
+        });
+        outputIndex += 1;
+      }
+
+      enqueue("response.completed", {
+        type: "response.completed",
+        response: completedResponse,
+      });
+      controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+      controller.close();
+    },
+  });
+
+  return new Response(sseStream, { status: 200, headers: sseHeaders() });
+}
+
+async function generateNonStreamingResponse(
+  client: Client,
+  body: ResponsesPayload,
+  messages: LanguageModelMessage[],
+  signal: AbortSignal,
+  responseId: string,
+  messageItemId: string,
+  model: string
+): Promise<Response> {
+  let outputText = "";
+  let inputTokens = 0;
+  let outputTokens = 0;
+  try {
+    await sendToBridge<BridgeReply>(
+      client,
+      {
+        type: "PROMPT",
+        messages,
+        temperature: body.temperature,
+        topK: body.top_k,
+      },
+      (reply) => {
+        if (reply.type === "DELTA") {
+          outputText += reply.text;
+          return false;
+        }
+        if (reply.type === "DONE") {
+          inputTokens = reply.inputTokens;
+          outputTokens = reply.outputTokens;
+          return true;
+        }
+        if (reply.type === "ERROR") {
+          throw new Error(reply.message);
+        }
+        if (reply.type === "ABORTED") {
+          throw new DOMException("Aborted", "AbortError");
+        }
+        return false;
+      },
+      signal
+    );
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return jsonError(`Browser AI: ${message}`);
+  }
+
+  return new Response(
+    JSON.stringify({
+      id: responseId,
+      object: "response",
+      created_at: Math.floor(Date.now() / 1000),
+      status: "completed",
+      model,
+      output: [
+        {
+          id: messageItemId,
+          type: "message",
+          role: "assistant",
+          status: "completed",
+          content: [{ type: "output_text", text: outputText }],
+        },
+      ],
+      output_text: outputText,
+      usage: {
+        input_tokens: inputTokens,
+        output_tokens: outputTokens,
+        total_tokens: inputTokens + outputTokens,
+      },
+    }),
+    { status: 200, headers: { "content-type": "application/json" } }
+  );
+}
+
+async function generateStreamingResponse(
+  client: Client,
+  body: ResponsesPayload,
+  messages: LanguageModelMessage[],
+  signal: AbortSignal,
+  responseId: string,
+  messageItemId: string,
+  model: string
+): Promise<Response> {
+  const encoder = new TextEncoder();
+  const createdAt = Math.floor(Date.now() / 1000);
+
+  const stream = new ReadableStream<Uint8Array>({
+    async start(controller) {
+      const enqueue = (event: string, data: unknown) => {
+        controller.enqueue(encoder.encode(sseEvent(event, data)));
+      };
+
+      const baseResponse = {
+        id: responseId,
+        object: "response",
+        created_at: createdAt,
+        model,
+        status: "in_progress",
+        output: [] as unknown[],
+      };
+
+      enqueue("response.created", { type: "response.created", response: baseResponse });
+
+      enqueue("response.output_item.added", {
+        type: "response.output_item.added",
+        output_index: 0,
+        item: {
+          id: messageItemId,
+          type: "message",
+          role: "assistant",
+          status: "in_progress",
+          content: [],
+        },
+      });
+
+      enqueue("response.content_part.added", {
+        type: "response.content_part.added",
+        item_id: messageItemId,
+        output_index: 0,
+        content_index: 0,
+        part: { type: "output_text", text: "" },
+      });
+
+      let outputText = "";
+      let inputTokens = 0;
+      let outputTokens = 0;
+      let downloading = false;
+
+      try {
+        await sendToBridge<BridgeReply>(
+          client,
+          {
+            type: "PROMPT",
+            messages,
+            temperature: body.temperature,
+            topK: body.top_k,
+          },
+          (reply) => {
+            if (reply.type === "DOWNLOAD_PROGRESS") {
+              if (!downloading) {
+                downloading = true;
+                enqueue("response.browser_ai.download.started", {
+                  type: "response.browser_ai.download.started",
+                });
+              }
+              enqueue("response.browser_ai.download.progress", {
+                type: "response.browser_ai.download.progress",
+                loaded: reply.loaded,
+              });
+              return false;
+            }
+            if (reply.type === "DELTA") {
+              outputText += reply.text;
+              enqueue("response.output_text.delta", {
+                type: "response.output_text.delta",
+                item_id: messageItemId,
+                output_index: 0,
+                content_index: 0,
+                delta: reply.text,
+              });
+              return false;
+            }
+            if (reply.type === "DONE") {
+              inputTokens = reply.inputTokens;
+              outputTokens = reply.outputTokens;
+              return true;
+            }
+            if (reply.type === "ERROR") {
+              throw new Error(reply.message);
+            }
+            if (reply.type === "ABORTED") {
+              throw new DOMException("Aborted", "AbortError");
+            }
+            return false;
+          },
+          signal
+        );
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        enqueue("response.error", {
+          type: "response.error",
+          error: { message: `Browser AI: ${message}`, type: "browser_ai_error" },
+        });
+        controller.close();
+        return;
+      }
+
+      enqueue("response.output_text.done", {
+        type: "response.output_text.done",
+        item_id: messageItemId,
+        output_index: 0,
+        content_index: 0,
+        text: outputText,
+      });
+
+      enqueue("response.content_part.done", {
+        type: "response.content_part.done",
+        item_id: messageItemId,
+        output_index: 0,
+        content_index: 0,
+        part: { type: "output_text", text: outputText },
+      });
+
+      enqueue("response.output_item.done", {
+        type: "response.output_item.done",
+        output_index: 0,
+        item: {
+          id: messageItemId,
+          type: "message",
+          role: "assistant",
+          status: "completed",
+          content: [{ type: "output_text", text: outputText }],
+        },
+      });
+
+      enqueue("response.completed", {
+        type: "response.completed",
+        response: {
+          ...baseResponse,
+          status: "completed",
+          output: [
+            {
+              id: messageItemId,
+              type: "message",
+              role: "assistant",
+              status: "completed",
+              content: [{ type: "output_text", text: outputText }],
+            },
+          ],
+          output_text: outputText,
+          usage: {
+            input_tokens: inputTokens,
+            output_tokens: outputTokens,
+            total_tokens: inputTokens + outputTokens,
+          },
+        },
+      });
+
+      controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+      controller.close();
+    },
+  });
+
+  return new Response(stream, { status: 200, headers: sseHeaders() });
+}
+
+/** Handle `/v1/chat/completions` for Browser AI. */
+export async function handleChatCompletionsRequest(
+  request: Request,
+  body: ChatCompletionsPayload,
+  clientId: string
+): Promise<Response> {
+  const client = await getClient(clientId);
+  if (!client) return jsonError("No active client to handle Browser AI request.");
+
+  let messages = chatMessagesToBridge(body.messages ?? []);
+  if (messages.length === 0) {
+    return jsonError("Browser AI requires at least one text message.", 400);
+  }
+
+  const tools = extractTools(body);
+  if (tools.length > 0) {
+    messages = injectToolPrompt(messages, tools);
+  }
+  const id = genId("chatcmpl");
+  const created = Math.floor(Date.now() / 1000);
+  const model = body.model;
+  const stream = body.stream === true;
+
+  if (!stream) {
+    let outputText = "";
+    let inputTokens = 0;
+    let outputTokens = 0;
+    try {
+      await sendToBridge<BridgeReply>(
+        client,
+        {
+          type: "PROMPT",
+          messages,
+          temperature: body.temperature,
+          topK: body.top_k,
+        },
+        (reply) => {
+          if (reply.type === "DELTA") {
+            outputText += reply.text;
+            return false;
+          }
+          if (reply.type === "DONE") {
+            inputTokens = reply.inputTokens;
+            outputTokens = reply.outputTokens;
+            return true;
+          }
+          if (reply.type === "ERROR") throw new Error(reply.message);
+          if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError");
+          return false;
+        },
+        request.signal
+      );
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      return jsonError(`Browser AI: ${message}`);
+    }
+
+    return new Response(
+      JSON.stringify({
+        id,
+        object: "chat.completion",
+        created,
+        model,
+        choices: [
+          {
+            index: 0,
+            message: { role: "assistant", content: outputText },
+            finish_reason: "stop",
+          },
+        ],
+        usage: {
+          prompt_tokens: inputTokens,
+          completion_tokens: outputTokens,
+          total_tokens: inputTokens + outputTokens,
+        },
+      }),
+      { status: 200, headers: { "content-type": "application/json" } }
+    );
+  }
+
+  const encoder = new TextEncoder();
+  const sseStream = new ReadableStream<Uint8Array>({
+    async start(controller) {
+      const writeChunk = (chunk: Record<string, unknown>) => {
+        controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
+      };
+
+      writeChunk({
+        id,
+        object: "chat.completion.chunk",
+        created,
+        model,
+        choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }],
+      });
+
+      let outputText = "";
+      let inputTokens = 0;
+      let outputTokens = 0;
+
+      try {
+        await sendToBridge<BridgeReply>(
+          client,
+          {
+            type: "PROMPT",
+            messages,
+            temperature: body.temperature,
+            topK: body.top_k,
+          },
+          (reply) => {
+            if (reply.type === "DELTA") {
+              outputText += reply.text;
+              writeChunk({
+                id,
+                object: "chat.completion.chunk",
+                created,
+                model,
+                choices: [{ index: 0, delta: { content: reply.text }, finish_reason: null }],
+              });
+              return false;
+            }
+            if (reply.type === "DONE") {
+              inputTokens = reply.inputTokens;
+              outputTokens = reply.outputTokens;
+              return true;
+            }
+            if (reply.type === "ERROR") throw new Error(reply.message);
+            if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError");
+            return false;
+          },
+          request.signal
+        );
+      } catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        writeChunk({
+          error: { message: `Browser AI: ${message}`, type: "browser_ai_error" },
+        });
+        controller.close();
+        return;
+      }
+
+      writeChunk({
+        id,
+        object: "chat.completion.chunk",
+        created,
+        model,
+        choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
+        usage: {
+          prompt_tokens: inputTokens,
+          completion_tokens: outputTokens,
+          total_tokens: inputTokens + outputTokens,
+        },
+      });
+
+      // Acknowledge unused output for type-checker: `outputText` tracks the
+      // streamed text but we don't replay it at the end.
+      void outputText;
+
+      controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+      controller.close();
+    },
+  });
+
+  return new Response(sseStream, { status: 200, headers: sseHeaders() });
+}
+
+export type { ResponsesPayload, ChatCompletionsPayload };
diff --git a/ui/src/service-worker/sw.ts b/ui/src/service-worker/sw.ts
index 9b70b21..3e5ccc7 100644
--- a/ui/src/service-worker/sw.ts
+++ b/ui/src/service-worker/sw.ts
@@ -23,6 +23,15 @@ import "./sqlite-bridge";
 import wasmInit, { HadrianGateway } from "/wasm/hadrian.js";
 
 import { formatApiError } from "../utils/formatApiError";
+import {
+  augmentModelsResponse,
+  handleChatCompletionsRequest,
+  handleResponsesRequest,
+  isBrowserAiModel,
+  type ChatCompletionsPayload,
+  type ResponsesPayload,
+} from "./browser-ai";
+
 let gateway: HadrianGateway | null = null;
 let initPromise: Promise<void> | null = null;
 
@@ -59,10 +68,10 @@ self.addEventListener("fetch", (event) => {
   if (url.origin !== self.location.origin) return;
   if (!GATEWAY_PATHS.some((p) => url.pathname.startsWith(p))) return;
 
-  event.respondWith(handleRequest(event.request));
+  event.respondWith(handleRequest(event.request, url, event.clientId));
 });
 
-async function handleRequest(request: Request): Promise<Response> {
+async function handleRequest(request: Request, url: URL, clientId: string): Promise<Response> {
   // Lazy-init the WASM gateway on first intercepted request
   if (!gateway) {
     if (!initPromise) {
@@ -90,6 +99,8 @@ async function handleRequest(request: Request): Promise<Response> {
   }
 
   try {
+    const intercepted = await maybeHandleBrowserAi(request, url, clientId);
+    if (intercepted) return intercepted;
     return await gateway!.handle(request);
   } catch (error) {
     console.error("Hadrian WASM gateway error:", error);
@@ -108,3 +119,44 @@ async function handleRequest(request: Request): Promise<Response> {
     );
   }
 }
+
+function isResponsesPath(pathname: string): boolean {
+  return pathname.endsWith("/v1/responses");
+}
+
+function isChatCompletionsPath(pathname: string): boolean {
+  return pathname.endsWith("/v1/chat/completions");
+}
+
+function isModelsPath(pathname: string): boolean {
+  return pathname.endsWith("/v1/models");
+}
+
+async function maybeHandleBrowserAi(
+  request: Request,
+  url: URL,
+  clientId: string
+): Promise<Response | null> {
+  if (request.method === "GET" && isModelsPath(url.pathname)) {
+    const upstream = await gateway!.handle(request);
+    return augmentModelsResponse(upstream, clientId);
+  }
+
+  if (request.method !== "POST") return null;
+  if (!isResponsesPath(url.pathname) && !isChatCompletionsPath(url.pathname)) return null;
+
+  let body: unknown;
+  try {
+    body = await request.clone().json();
+  } catch {
+    return null;
+  }
+  if (!body || typeof body !== "object") return null;
+  const model = (body as { model?: unknown }).model;
+  if (!isBrowserAiModel(model)) return null;
+
+  if (isResponsesPath(url.pathname)) {
+    return handleResponsesRequest(request, body as ResponsesPayload, clientId);
+  }
+  return handleChatCompletionsRequest(request, body as ChatCompletionsPayload, clientId);
+}
diff --git a/ui/src/services/browser-ai/availability.ts b/ui/src/services/browser-ai/availability.ts
new file mode 100644
index 0000000..ffc6f64
--- /dev/null
+++ b/ui/src/services/browser-ai/availability.ts
@@ -0,0 +1,58 @@
+import type { LanguageModelAvailability, LanguageModelGlobal } from "./types";
+
+export const BROWSER_AI_PROVIDER = "browser";
+export const BROWSER_AI_PREFIX = `${BROWSER_AI_PROVIDER}/`;
+
+/**
+ * Best-guess identifier for the on-device model behind the Prompt API. The
+ * spec exposes no model name, so we infer one from the user agent. Chrome and
+ * Brave ship Gemini Nano; Edge announced Phi-4 Mini for its on-device stack.
+ * Anything else falls back to a generic id that still lets the routing layer
+ * recognise the model via the `browser/` prefix.
+ */
+export function detectBrowserAiModel(): { id: string; modelName: string; vendor: string } {
+  const ua = typeof navigator !== "undefined" ? (navigator.userAgent ?? "") : "";
+  // The doubled `browser-` prefix on the model name surfaces as
+  // "Browser <Name>" in the model picker after the provider segment is
+  // stripped (formatModelName splits on hyphens). Without it the picker
+  // would show just "Gemini Nano" with no Browser-AI cue.
+  if (/\bEdg\//.test(ua)) {
+    return {
+      id: `${BROWSER_AI_PREFIX}browser-phi-4-mini`,
+      modelName: "browser-phi-4-mini",
+      vendor: "Edge",
+    };
+  }
+  if (/\b(?:Chrome|Chromium|Brave)\//.test(ua) || ua.includes(" Brave/")) {
+    return {
+      id: `${BROWSER_AI_PREFIX}browser-gemini-nano`,
+      modelName: "browser-gemini-nano",
+      vendor: "Chromium",
+    };
+  }
+  return {
+    id: `${BROWSER_AI_PREFIX}browser-on-device`,
+    modelName: "browser-on-device",
+    vendor: "Browser",
+  };
+}
+
+export function getLanguageModel(): LanguageModelGlobal | null {
+  if (typeof globalThis === "undefined") return null;
+  const lm = (globalThis as unknown as { LanguageModel?: LanguageModelGlobal }).LanguageModel;
+  return lm ?? null;
+}
+
+export function isLanguageModelSupported(): boolean {
+  return getLanguageModel() !== null;
+}
+
+export async function getAvailability(): Promise<LanguageModelAvailability> {
+  const lm = getLanguageModel();
+  if (!lm) return "unavailable";
+  try {
+    return await lm.availability();
+  } catch {
+    return "unavailable";
+  }
+}
diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts
new file mode 100644
index 0000000..b3c5cf5
--- /dev/null
+++ b/ui/src/services/browser-ai/bridge.ts
@@ -0,0 +1,196 @@
+import { getAvailability, getLanguageModel } from "./availability";
+import type { LanguageModelMessage, LanguageModelSession } from "./types";
+
+/**
+ * Window-side bridge that responds to LanguageModel requests from the
+ * service worker. The Prompt API (`globalThis.LanguageModel`) is only
+ * exposed in window/dedicated-worker scopes, so we relay calls from the
+ * SW through this bridge over a `MessageChannel` per request.
+ *
+ * Tools are not passed through to the model: the spec defines a native
+ * `tools` option, but Chrome rejects sessions that supply one
+ * ("the device is unable to create a session to run the model"). Until
+ * that ships, the SW polyfills tools by injecting their descriptions into
+ * the system prompt and parsing `<tool_call>` markers from the streamed
+ * text. This bridge stays intentionally tool-agnostic so it works on every
+ * Chromium channel that ships the Prompt API.
+ */
+
+interface PromptRequestPayload {
+  type: "PROMPT";
+  messages: LanguageModelMessage[];
+  temperature?: number;
+  topK?: number;
+  /**
+   * JSON Schema for `responseConstraint`. When set the bridge runs the
+   * non-streaming `prompt()` API: the model output is forced to match the
+   * schema, and partial chunks would be malformed JSON anyway.
+   */
+  responseConstraint?: object;
+}
+
+interface AvailabilityRequestPayload {
+  type: "AVAILABILITY";
+}
+
+type BridgeRequest = PromptRequestPayload | AvailabilityRequestPayload;
+
+export function installBrowserAiBridge(): () => void {
+  if (typeof navigator === "undefined" || !("serviceWorker" in navigator)) {
+    return () => {};
+  }
+
+  const handler = (event: MessageEvent) => {
+    const data = event.data as { type?: string; payload?: BridgeRequest } | null;
+    if (!data || data.type !== "BROWSER_AI_REQUEST" || !data.payload) return;
+    const port = event.ports?.[0];
+    if (!port) return;
+    handleRequest(port, data.payload).catch((err: unknown) => {
+      const message = err instanceof Error ? err.message : String(err);
+      try {
+        port.postMessage({ type: "ERROR", message });
+        port.close();
+      } catch {
+        // Port already closed.
+      }
+    });
+  };
+
+  navigator.serviceWorker.addEventListener("message", handler);
+  return () => navigator.serviceWorker.removeEventListener("message", handler);
+}
+
+async function handleRequest(port: MessagePort, payload: BridgeRequest): Promise<void> {
+  if (payload.type === "AVAILABILITY") {
+    port.postMessage({ type: "AVAILABILITY", state: await getAvailability() });
+    port.close();
+    return;
+  }
+
+  if (payload.type === "PROMPT") {
+    await handlePrompt(port, payload);
+    return;
+  }
+
+  port.postMessage({
+    type: "ERROR",
+    message: `Unknown bridge request type: ${(payload as { type?: string }).type}`,
+  });
+  port.close();
+}
+
+async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): Promise<void> {
+  const lm = getLanguageModel();
+  if (!lm) {
+    port.postMessage({
+      type: "ERROR",
+      message: "Browser AI is not available in this browser.",
+    });
+    port.close();
+    return;
+  }
+
+  const abort = new AbortController();
+  let session: LanguageModelSession | null = null;
+  // Tear down the on-device session on cancel. Chrome's LanguageModel
+  // implementation does not always honour AbortSignal mid-prompt, so an
+  // abort that only fires the controller can leave the call hanging
+  // indefinitely. Destroying the session forces it to release.
+  abort.signal.addEventListener("abort", () => {
+    try {
+      session?.destroy();
+    } catch {
+      // ignored
+    }
+  });
+  port.addEventListener("message", (event: MessageEvent) => {
+    if ((event.data as { type?: string } | null)?.type === "ABORT") {
+      abort.abort();
+    }
+  });
+  port.start();
+
+  const systemMessages = payload.messages.filter((m) => m.role === "system");
+  const conversation = payload.messages.filter((m) => m.role !== "system");
+
+  try {
+    session = await lm.create({
+      initialPrompts: systemMessages.length > 0 ? systemMessages : undefined,
+      temperature: payload.temperature,
+      topK: payload.topK,
+      monitor(m) {
+        m.addEventListener("downloadprogress", (event) => {
+          port.postMessage({ type: "DOWNLOAD_PROGRESS", loaded: event.loaded });
+        });
+      },
+      signal: abort.signal,
+    });
+
+    let inputTokens = 0;
+    try {
+      inputTokens = await session.measureInputUsage(conversation);
+    } catch {
+      // measureInputUsage may not be implemented on every channel.
+    }
+
+    let outputText = "";
+    if (payload.responseConstraint) {
+      // Constrained output: token chunks would be malformed JSON, so use
+      // the non-streaming API and surface the full response as one delta.
+      outputText = await session.prompt(conversation, {
+        signal: abort.signal,
+        responseConstraint: payload.responseConstraint,
+      });
+      if (outputText) port.postMessage({ type: "DELTA", text: outputText });
+    } else {
+      const stream = session.promptStreaming(conversation, { signal: abort.signal });
+      const reader = stream.getReader();
+      let cumulative = "";
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        // Most Chromium channels stream deltas; older channels streamed
+        // cumulative text. Detect and normalise to deltas.
+        let delta: string;
+        if (
+          value.length >= cumulative.length &&
+          value.startsWith(cumulative) &&
+          cumulative.length > 0
+        ) {
+          delta = value.slice(cumulative.length);
+          cumulative = value;
+        } else {
+          delta = value;
+          cumulative += value;
+        }
+        if (!delta) continue;
+        outputText += delta;
+        port.postMessage({ type: "DELTA", text: delta });
+      }
+    }
+
+    let outputTokens = 0;
+    try {
+      outputTokens = await session.measureInputUsage([{ role: "assistant", content: outputText }]);
+    } catch {
+      outputTokens = Math.max(1, Math.ceil(outputText.length / 4));
+    }
+
+    port.postMessage({ type: "DONE", inputTokens, outputTokens });
+  } catch (err: unknown) {
+    if (abort.signal.aborted) {
+      port.postMessage({ type: "ABORTED" });
+    } else {
+      const message = err instanceof Error ? err.message : String(err);
+      port.postMessage({ type: "ERROR", message });
+    }
+  } finally {
+    session?.destroy();
+    try {
+      port.close();
+    } catch {
+      // Already closed.
+    }
+  }
+}
diff --git a/ui/src/services/browser-ai/index.ts b/ui/src/services/browser-ai/index.ts
new file mode 100644
index 0000000..60cdf21
--- /dev/null
+++ b/ui/src/services/browser-ai/index.ts
@@ -0,0 +1,17 @@
+export {
+  BROWSER_AI_PREFIX,
+  BROWSER_AI_PROVIDER,
+  detectBrowserAiModel,
+  getAvailability,
+  getLanguageModel,
+  isLanguageModelSupported,
+} from "./availability";
+export { installBrowserAiBridge } from "./bridge";
+export type {
+  LanguageModelAvailability,
+  LanguageModelGlobal,
+  LanguageModelMessage,
+  LanguageModelMonitor,
+  LanguageModelParams,
+  LanguageModelSession,
+} from "./types";
diff --git a/ui/src/services/browser-ai/types.ts b/ui/src/services/browser-ai/types.ts
new file mode 100644
index 0000000..275c109
--- /dev/null
+++ b/ui/src/services/browser-ai/types.ts
@@ -0,0 +1,98 @@
+/**
+ * Type declarations for the on-device LanguageModel API exposed by recent
+ * Chromium browsers (Chrome, Edge, Brave, etc.). Lives on `window` and
+ * dedicated workers; not exposed in service workers (see the bridge in
+ * `service-worker/browser-ai.ts` for how the SW reaches it).
+ *
+ * Spec: https://github.com/webmachinelearning/prompt-api
+ */
+
+export type LanguageModelAvailability =
+  | "available"
+  | "downloadable"
+  | "downloading"
+  | "unavailable";
+
+export interface LanguageModelMonitor {
+  addEventListener(type: "downloadprogress", listener: (event: { loaded: number }) => void): void;
+  removeEventListener(
+    type: "downloadprogress",
+    listener: (event: { loaded: number }) => void
+  ): void;
+}
+
+export interface LanguageModelMessage {
+  role: "system" | "user" | "assistant";
+  content: string;
+}
+
+/**
+ * Spec-native tool entry for `LanguageModel.create({ tools: [...] })`. The
+ * runtime invokes `execute` whenever the model decides to call this tool;
+ * the returned string is fed back as the tool result.
+ * https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#tool-use
+ */
+export interface LanguageModelTool {
+  name: string;
+  description?: string;
+  inputSchema: object;
+  execute: (args: Record<string, unknown>) => Promise<string> | string;
+}
+
+export interface LanguageModelExpectedIO {
+  type: "text" | "tool-call" | "tool-response" | "image" | "audio";
+  languages?: string[];
+}
+
+export interface LanguageModelCreateOptions {
+  initialPrompts?: LanguageModelMessage[];
+  temperature?: number;
+  topK?: number;
+  tools?: LanguageModelTool[];
+  expectedInputs?: LanguageModelExpectedIO[];
+  expectedOutputs?: LanguageModelExpectedIO[];
+  monitor?: (m: LanguageModelMonitor) => void;
+  signal?: AbortSignal;
+}
+
+export interface LanguageModelParams {
+  defaultTemperature: number;
+  maxTemperature: number;
+  defaultTopK: number;
+  maxTopK: number;
+}
+
+export interface LanguageModelPromptOptions {
+  signal?: AbortSignal;
+  /** JSON Schema constraining the model output at decode time (Chrome 137+). */
+  responseConstraint?: object;
+  /** Skip auto-injection of the schema into the prompt context. */
+  omitResponseConstraintInput?: boolean;
+}
+
+export interface LanguageModelSession {
+  prompt(
+    input: string | LanguageModelMessage[],
+    options?: LanguageModelPromptOptions
+  ): Promise<string>;
+  promptStreaming(
+    input: string | LanguageModelMessage[],
+    options?: LanguageModelPromptOptions
+  ): ReadableStream<string>;
+  measureInputUsage(input: string | LanguageModelMessage[]): Promise<number>;
+  destroy(): void;
+  readonly inputUsage: number;
+  readonly inputQuota: number;
+}
+
+export interface LanguageModelGlobal {
+  availability(): Promise<LanguageModelAvailability>;
+  params(): Promise<LanguageModelParams | null>;
+  create(options?: LanguageModelCreateOptions): Promise<LanguageModelSession>;
+}
+
+declare global {
+  var LanguageModel: LanguageModelGlobal | undefined;
+}
+
+export {};

From 9230378b7a7cf802fce67d61af3bdaa7fe2b5870 Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Wed, 29 Apr 2026 20:23:47 +1000
Subject: [PATCH 2/5] Fixes

---
 ui/src/service-worker/browser-ai.ts  | 60 +++++++++-------------------
 ui/src/services/browser-ai/bridge.ts | 21 ++++++++--
 2 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts
index c8433c5..edcfe0d 100644
--- a/ui/src/service-worker/browser-ai.ts
+++ b/ui/src/service-worker/browser-ai.ts
@@ -45,12 +45,12 @@ export function isBrowserAiModel(model: unknown): boolean {
 }
 
 async function getClient(clientId: string): Promise<Client | null> {
-  if (clientId) {
-    const direct = await self.clients.get(clientId);
-    if (direct) return direct;
-  }
-  const all = await self.clients.matchAll({ type: "window", includeUncontrolled: false });
-  return all[0] ?? null;
+  // Only the originating tab's window can service the request: its bridge
+  // owns the conversation context and abort signal. Falling back to
+  // "first window client" cross-routes between tabs.
+  if (!clientId) return null;
+  const direct = await self.clients.get(clientId);
+  return direct ?? null;
 }
 
 async function sendToBridge<T extends BridgeReply>(
@@ -167,7 +167,10 @@ export async function augmentModelsResponse(
   } catch {
     return response;
   }
-  if (availability === "unavailable") return response;
+  // Only expose the model after the user has explicitly downloaded it via
+  // the wizard. Listing it while merely `downloadable` would trigger a
+  // multi-GB download on first chat use with no progress indication.
+  if (availability !== "available") return response;
 
   let body: { data?: unknown[]; [k: string]: unknown };
   try {
@@ -184,10 +187,7 @@ export async function augmentModelsResponse(
     created: 0,
     owned_by: BROWSER_AI_PROVIDER,
     source: "static",
-    description:
-      availability === "available"
-        ? `On-device ${detected.vendor} model, runs locally in your browser.`
-        : `On-device ${detected.vendor} model, runs locally in your browser. Downloads on first use.`,
+    description: `On-device ${detected.vendor} model, runs locally in your browser.`,
     capabilities: { tools: true, vision: false, streaming: true },
     modalities: { input: ["text"], output: ["text"] },
     tasks: ["chat"],
@@ -608,34 +608,12 @@ async function generateToolModeResponse(
     return jsonError(`Browser AI: ${message}`);
   }
 
-  let envelope = parseEnvelope(raw);
-  // Parse-and-retry safety net. If the model returned text that doesn't
-  // fit the envelope (or fits but is empty), give it one more shot with
-  // an explicit reminder. Limited to a single retry to avoid loops.
-  const empty = !envelope || (envelope.toolCalls.length === 0 && !envelope.text);
-  if (empty) {
-    const retryMessages: LanguageModelMessage[] = [
-      ...messages,
-      { role: "assistant", content: raw || "(empty)" },
-      {
-        role: "user",
-        content:
-          "That reply did not match the required JSON shape. Reply ONLY with a JSON object: " +
-          '{"tool_calls":[{"name":"...","arguments":{...}}]} or {"text":"..."}. ' +
-          "Use the same tool names and argument schemas listed earlier.",
-      },
-    ];
-    try {
-      const retry = await runOnce(retryMessages);
-      raw = retry.raw;
-      inputTokens += retry.inputTokens;
-      outputTokens += retry.outputTokens;
-      envelope = parseEnvelope(raw);
-    } catch {
-      // Fall through with what we have.
-    }
-  }
-
+  // `responseConstraint` enforces the schema at decode time, so JSON.parse
+  // is guaranteed to succeed. The only remaining failure mode is the model
+  // emitting `{}` (both fields are optional in the schema), which a retry
+  // does not reliably correct. We surface whatever we got — empty case is
+  // handled below by falling back to the raw text.
+  const envelope = parseEnvelope(raw);
   const toolCalls = envelope?.toolCalls ?? [];
   const text = envelope?.text ?? "";
   const createdAt = Math.floor(Date.now() / 1000);
@@ -662,8 +640,8 @@ async function generateToolModeResponse(
     });
   }
   if (outputItems.length === 0) {
-    // Both retries produced nothing usable. Surface the raw output so the
-    // user sees what went wrong rather than an empty turn.
+    // Model returned an empty envelope. Surface the raw output so the
+    // user sees what came back rather than a blank turn.
     outputItems.push({
       id: genId("msg"),
       type: "message",
diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts
index b3c5cf5..1317355 100644
--- a/ui/src/services/browser-ai/bridge.ts
+++ b/ui/src/services/browser-ai/bridge.ts
@@ -126,11 +126,15 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P
       signal: abort.signal,
     });
 
+    // Count input tokens across system + conversation messages. The earlier
+    // version only measured `conversation`, which understated usage whenever
+    // a system prompt was supplied (every Hadrian chat turn).
     let inputTokens = 0;
     try {
-      inputTokens = await session.measureInputUsage(conversation);
-    } catch {
+      inputTokens = await session.measureInputUsage(payload.messages);
+    } catch (err) {
       // measureInputUsage may not be implemented on every channel.
+      console.debug("[browser-ai] measureInputUsage(input) failed", err);
     }
 
     let outputText = "";
@@ -170,10 +174,19 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P
       }
     }
 
+    // measureInputUsage of an assistant message also counts role-framing
+    // tokens (a few per message). Subtract the framing baseline so the
+    // reported output count tracks the generated text rather than the
+    // wrapper. Falls back to ~4 chars/token when the API isn't available.
     let outputTokens = 0;
     try {
-      outputTokens = await session.measureInputUsage([{ role: "assistant", content: outputText }]);
-    } catch {
+      const [withText, baseline] = await Promise.all([
+        session.measureInputUsage([{ role: "assistant", content: outputText }]),
+        session.measureInputUsage([{ role: "assistant", content: "" }]),
+      ]);
+      outputTokens = Math.max(0, withText - baseline);
+    } catch (err) {
+      console.debug("[browser-ai] measureInputUsage(output) failed", err);
       outputTokens = Math.max(1, Math.ceil(outputText.length / 4));
     }
 

From 675aace61891dfad6918a86ea3dfde882b75c082 Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Wed, 29 Apr 2026 21:56:13 +1000
Subject: [PATCH 3/5] Review fixes

---
 ui/src/components/WasmSetup/WasmSetup.tsx     |  61 +++---
 .../components/WasmSetup/WasmSetupGuard.tsx   |   9 +-
 ui/src/service-worker/browser-ai.ts           | 181 +++++++++++++++++-
 ui/src/service-worker/sw.ts                   |   4 +
 ui/src/services/browser-ai/bridge.ts          |   3 +
 ui/src/services/browser-ai/types.ts           |   5 +
 6 files changed, 230 insertions(+), 33 deletions(-)

diff --git a/ui/src/components/WasmSetup/WasmSetup.tsx b/ui/src/components/WasmSetup/WasmSetup.tsx
index 8ec881f..c322388 100644
--- a/ui/src/components/WasmSetup/WasmSetup.tsx
+++ b/ui/src/components/WasmSetup/WasmSetup.tsx
@@ -106,6 +106,30 @@ function initialEntries(): ProviderEntry[] {
   return PROVIDER_TEMPLATES.map((t) => createEntry(t, 0));
 }
 
+// Bundle the browser AI state and download callback into a single optional
+// prop. Previously the two were separate optionals with `onBrowserAiDownload
+// ?? (() => {})` as a fallback, which let callers silently no-op the
+// Download button by passing state without a callback. Bundling makes the
+// pairing structural — a caller cannot supply the state without the
+// handler — so the no-op fallback can go away entirely.
+export interface BrowserAiProp {
+  state: BrowserAiState;
+  onDownload: () => void;
+}
+
+interface WasmSetupProps {
+  open: boolean;
+  onComplete: () => void;
+  oauthProviderName?: string | null;
+  oauthError?: string | null;
+  existingProviders?: DynamicProviderResponse[];
+  ollamaDetected?: boolean;
+  ollamaConnecting?: boolean;
+  ollamaConnected?: boolean;
+  onOllamaConnect?: () => void;
+  browserAi?: BrowserAiProp;
+}
+
 export function WasmSetup({
   open,
   onComplete,
@@ -117,20 +141,7 @@ export function WasmSetup({
   ollamaConnected,
   onOllamaConnect,
   browserAi,
-  onBrowserAiDownload,
-}: {
-  open: boolean;
-  onComplete: () => void;
-  oauthProviderName?: string | null;
-  oauthError?: string | null;
-  existingProviders?: DynamicProviderResponse[];
-  ollamaDetected?: boolean;
-  ollamaConnecting?: boolean;
-  ollamaConnected?: boolean;
-  onOllamaConnect?: () => void;
-  browserAi?: BrowserAiState;
-  onBrowserAiDownload?: () => void;
-}) {
+}: WasmSetupProps) {
   const [step, setStep] = useState<Step>("welcome");
   const [entries, setEntries] = useState<ProviderEntry[]>(initialEntries);
 
@@ -249,7 +260,7 @@ export function WasmSetup({
     }
   }, []);
 
-  const browserAiReady = browserAi?.availability === "available";
+  const browserAiReady = browserAi?.state.availability === "available";
   const savedCount =
     entries.filter((e) => e.saved).length +
     (hasExistingOpenRouter ? 1 : 0) +
@@ -274,7 +285,6 @@ export function WasmSetup({
           existingProviders={existingProviders}
           onDeleteExisting={handleDeleteExisting}
           browserAi={browserAi}
-          onBrowserAiDownload={onBrowserAiDownload}
         />
       )}
       {step === "providers" && (
@@ -300,7 +310,6 @@ export function WasmSetup({
           existingProviders={existingProviders}
           onDeleteExisting={handleDeleteExisting}
           browserAi={browserAi}
-          onBrowserAiDownload={onBrowserAiDownload}
         />
       )}
       {step === "done" && <DoneStep savedCount={savedCount} onComplete={onComplete} />}
@@ -322,7 +331,6 @@ function WelcomeStep({
   existingProviders,
   onDeleteExisting,
   browserAi,
-  onBrowserAiDownload,
 }: {
   onNext: () => void;
   onReady: () => void;
@@ -336,10 +344,9 @@ function WelcomeStep({
   onOllamaConnect?: () => void;
   existingProviders?: DynamicProviderResponse[];
   onDeleteExisting: (id: string) => void;
-  browserAi?: BrowserAiState;
-  onBrowserAiDownload?: () => void;
+  browserAi?: BrowserAiProp;
 }) {
-  const hasBrowserAiReady = browserAi?.availability === "available";
+  const hasBrowserAiReady = browserAi?.state.availability === "available";
   const hasProvider = hasExistingOpenRouter || hasExistingOllama || hasBrowserAiReady;
   return (
     <>
@@ -475,8 +482,8 @@ function WelcomeStep({
 
         {browserAi && (
           <BrowserAiCard
-            state={browserAi}
-            onDownload={onBrowserAiDownload ?? (() => {})}
+            state={browserAi.state}
+            onDownload={browserAi.onDownload}
             className="mt-3"
           />
         )}
@@ -549,7 +556,6 @@ function ProvidersStep({
   existingProviders,
   onDeleteExisting,
   browserAi,
-  onBrowserAiDownload,
 }: {
   entries: ProviderEntry[];
   onUpdate: (key: string, update: Partial<ProviderEntry>) => void;
@@ -571,8 +577,7 @@ function ProvidersStep({
   onOllamaConnect?: () => void;
   existingProviders?: DynamicProviderResponse[];
   onDeleteExisting: (id: string) => void;
-  browserAi?: BrowserAiState;
-  onBrowserAiDownload?: () => void;
+  browserAi?: BrowserAiProp;
 }) {
   return (
     <>
@@ -666,8 +671,8 @@ function ProvidersStep({
 
         {browserAi && (
           <BrowserAiCard
-            state={browserAi}
-            onDownload={onBrowserAiDownload ?? (() => {})}
+            state={browserAi.state}
+            onDownload={browserAi.onDownload}
             className="mb-4"
           />
         )}
diff --git a/ui/src/components/WasmSetup/WasmSetupGuard.tsx b/ui/src/components/WasmSetup/WasmSetupGuard.tsx
index e059a56..cb7d933 100644
--- a/ui/src/components/WasmSetup/WasmSetupGuard.tsx
+++ b/ui/src/components/WasmSetup/WasmSetupGuard.tsx
@@ -143,6 +143,12 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
         downloading: false,
         downloadProgress: null,
       }));
+      // Tell the SW its 60s availability cache is stale before we trigger
+      // the model-list refetch; otherwise the freshly-ready model would
+      // not appear until the cache expires organically.
+      navigator.serviceWorker.controller?.postMessage({
+        type: "BROWSER_AI_AVAILABILITY_CHANGED",
+      });
       queryClient.invalidateQueries({ queryKey: apiV1ModelsQueryKey() });
     } catch (err) {
       setBrowserAi((prev) => ({
@@ -256,8 +262,7 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
         ollamaConnecting={ollamaConnecting}
         ollamaConnected={ollamaConnected}
         onOllamaConnect={handleOllamaConnect}
-        browserAi={browserAi}
-        onBrowserAiDownload={handleBrowserAiDownload}
+        browserAi={{ state: browserAi, onDownload: handleBrowserAiDownload }}
       />
     </WasmSetupContext.Provider>
   );
diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts
index edcfe0d..c57e51d 100644
--- a/ui/src/service-worker/browser-ai.ts
+++ b/ui/src/service-worker/browser-ai.ts
@@ -40,6 +40,16 @@ interface PromptToolDef {
 let availabilityCache: { state: BridgeAvailabilityReply["state"]; checkedAt: number } | null = null;
 const AVAILABILITY_TTL_MS = 60_000;
 
+/**
+ * Drop the cached availability state so the next `/v1/models` request
+ * re-queries the bridge. Called by the SW message handler after the window
+ * reports a successful model download — without this, the freshly-ready
+ * model would not appear in the list until the 60s TTL expires.
+ */
+export function invalidateAvailabilityCache(): void {
+  availabilityCache = null;
+}
+
 export function isBrowserAiModel(model: unknown): boolean {
   return typeof model === "string" && model.startsWith(BROWSER_AI_PREFIX);
 }
@@ -1021,14 +1031,26 @@ export async function handleChatCompletionsRequest(
   }
 
   const tools = extractTools(body);
-  if (tools.length > 0) {
-    messages = injectToolPrompt(messages, tools);
-  }
   const id = genId("chatcmpl");
   const created = Math.floor(Date.now() / 1000);
   const model = body.model;
   const stream = body.stream === true;
 
+  if (tools.length > 0) {
+    messages = injectToolPrompt(messages, tools);
+    return generateChatCompletionsToolModeResponse(
+      client,
+      body,
+      messages,
+      tools,
+      request.signal,
+      id,
+      created,
+      model,
+      stream
+    );
+  }
+
   if (!stream) {
     let outputText = "";
     let inputTokens = 0;
@@ -1171,4 +1193,157 @@ export async function handleChatCompletionsRequest(
   return new Response(sseStream, { status: 200, headers: sseHeaders() });
 }
 
+/**
+ * Tool-aware path for chat completions. Mirrors `generateToolModeResponse`:
+ * applies `responseConstraint`, buffers the constrained envelope, parses it,
+ * and surfaces tool invocations in the standard `tool_calls` field rather
+ * than as raw JSON in `content`. Token-by-token streaming is impossible
+ * here (chunks would be malformed JSON), so streaming clients receive a
+ * single `tool_calls` delta followed by the terminal chunk.
+ */
+async function generateChatCompletionsToolModeResponse(
+  client: Client,
+  body: ChatCompletionsPayload,
+  messages: LanguageModelMessage[],
+  tools: PromptToolDef[],
+  signal: AbortSignal,
+  id: string,
+  created: number,
+  model: string,
+  stream: boolean
+): Promise<Response> {
+  const schema = buildToolResponseSchema(tools);
+
+  let raw = "";
+  let inputTokens = 0;
+  let outputTokens = 0;
+  try {
+    await sendToBridge<BridgeReply>(
+      client,
+      {
+        type: "PROMPT",
+        messages,
+        temperature: body.temperature,
+        topK: body.top_k,
+        responseConstraint: schema,
+      },
+      (reply) => {
+        if (reply.type === "DELTA") {
+          raw += reply.text;
+          return false;
+        }
+        if (reply.type === "DONE") {
+          inputTokens = reply.inputTokens;
+          outputTokens = reply.outputTokens;
+          return true;
+        }
+        if (reply.type === "ERROR") throw new Error(reply.message);
+        if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError");
+        return false;
+      },
+      signal
+    );
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    return jsonError(`Browser AI: ${message}`);
+  }
+
+  const envelope = parseEnvelope(raw);
+  const parsedToolCalls = envelope?.toolCalls ?? [];
+  const text = envelope?.text ?? "";
+
+  const toolCalls = parsedToolCalls.map((call) => ({
+    id: genId("call"),
+    type: "function" as const,
+    function: { name: call.name, arguments: call.arguments },
+  }));
+
+  // OpenAI semantics: when the model returns tool calls, the assistant
+  // message has `content: null` and `finish_reason: "tool_calls"`. When
+  // there are none, we fall back to the text envelope (or the raw output
+  // if the envelope was empty).
+  const hasToolCalls = toolCalls.length > 0;
+  const content = hasToolCalls ? null : text || raw.trim() || "";
+  const finishReason = hasToolCalls ? "tool_calls" : "stop";
+
+  if (!stream) {
+    const message: Record<string, unknown> = { role: "assistant", content };
+    if (hasToolCalls) message.tool_calls = toolCalls;
+    return new Response(
+      JSON.stringify({
+        id,
+        object: "chat.completion",
+        created,
+        model,
+        choices: [{ index: 0, message, finish_reason: finishReason }],
+        usage: {
+          prompt_tokens: inputTokens,
+          completion_tokens: outputTokens,
+          total_tokens: inputTokens + outputTokens,
+        },
+      }),
+      { status: 200, headers: { "content-type": "application/json" } }
+    );
+  }
+
+  const encoder = new TextEncoder();
+  const sseStream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      const writeChunk = (chunk: Record<string, unknown>) => {
+        controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
+      };
+
+      writeChunk({
+        id,
+        object: "chat.completion.chunk",
+        created,
+        model,
+        choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }],
+      });
+
+      if (hasToolCalls) {
+        const deltaToolCalls = toolCalls.map((call, index) => ({
+          index,
+          id: call.id,
+          type: call.type,
+          function: { name: call.function.name, arguments: call.function.arguments },
+        }));
+        writeChunk({
+          id,
+          object: "chat.completion.chunk",
+          created,
+          model,
+          choices: [{ index: 0, delta: { tool_calls: deltaToolCalls }, finish_reason: null }],
+        });
+      } else if (content) {
+        writeChunk({
+          id,
+          object: "chat.completion.chunk",
+          created,
+          model,
+          choices: [{ index: 0, delta: { content }, finish_reason: null }],
+        });
+      }
+
+      writeChunk({
+        id,
+        object: "chat.completion.chunk",
+        created,
+        model,
+        choices: [{ index: 0, delta: {}, finish_reason: finishReason }],
+        usage: {
+          prompt_tokens: inputTokens,
+          completion_tokens: outputTokens,
+          total_tokens: inputTokens + outputTokens,
+        },
+      });
+
+      controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+      controller.close();
+    },
+  });
+
+  return new Response(sseStream, { status: 200, headers: sseHeaders() });
+}
+
 export type { ResponsesPayload, ChatCompletionsPayload };
diff --git a/ui/src/service-worker/sw.ts b/ui/src/service-worker/sw.ts
index 3e5ccc7..73d543a 100644
--- a/ui/src/service-worker/sw.ts
+++ b/ui/src/service-worker/sw.ts
@@ -27,6 +27,7 @@ import {
   augmentModelsResponse,
   handleChatCompletionsRequest,
   handleResponsesRequest,
+  invalidateAvailabilityCache,
   isBrowserAiModel,
   type ChatCompletionsPayload,
   type ResponsesPayload,
@@ -59,6 +60,9 @@ self.addEventListener("message", (event) => {
   if (event.data?.type === "CLAIM") {
     self.clients.claim();
   }
+  if (event.data?.type === "BROWSER_AI_AVAILABILITY_CHANGED") {
+    invalidateAvailabilityCache();
+  }
 });
 
 self.addEventListener("fetch", (event) => {
diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts
index 1317355..366ada5 100644
--- a/ui/src/services/browser-ai/bridge.ts
+++ b/ui/src/services/browser-ai/bridge.ts
@@ -120,6 +120,9 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P
       topK: payload.topK,
       monitor(m) {
         m.addEventListener("downloadprogress", (event) => {
+          // `event.loaded` is normalized to a value in [0, 1] per the Prompt
+          // API spec, not a byte count — the spec deliberately omits `total`.
+          // https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#download-progress
           port.postMessage({ type: "DOWNLOAD_PROGRESS", loaded: event.loaded });
         });
       },
diff --git a/ui/src/services/browser-ai/types.ts b/ui/src/services/browser-ai/types.ts
index 275c109..6b3b6a7 100644
--- a/ui/src/services/browser-ai/types.ts
+++ b/ui/src/services/browser-ai/types.ts
@@ -14,6 +14,11 @@ export type LanguageModelAvailability =
   | "unavailable";
 
 export interface LanguageModelMonitor {
+  /**
+   * `loaded` is a fraction in [0, 1], not a byte count. The Prompt API spec
+   * normalizes progress and omits `total` for that reason.
+   * https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#download-progress
+   */
   addEventListener(type: "downloadprogress", listener: (event: { loaded: number }) => void): void;
   removeEventListener(
     type: "downloadprogress",

From 25fc24f1c297e9074919e050d0203323c415fef0 Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Mon, 4 May 2026 23:50:07 +1000
Subject: [PATCH 4/5] Use new context apis

---
 ui/src/services/browser-ai/bridge.ts | 14 +++++++-------
 ui/src/services/browser-ai/types.ts  |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts
index 366ada5..3b9c2af 100644
--- a/ui/src/services/browser-ai/bridge.ts
+++ b/ui/src/services/browser-ai/bridge.ts
@@ -134,10 +134,10 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P
     // a system prompt was supplied (every Hadrian chat turn).
     let inputTokens = 0;
     try {
-      inputTokens = await session.measureInputUsage(payload.messages);
+      inputTokens = await session.measureContextUsage(payload.messages);
     } catch (err) {
-      // measureInputUsage may not be implemented on every channel.
-      console.debug("[browser-ai] measureInputUsage(input) failed", err);
+      // measureContextUsage may not be implemented on every channel.
+      console.debug("[browser-ai] measureContextUsage(input) failed", err);
     }
 
     let outputText = "";
@@ -177,19 +177,19 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P
       }
     }
 
-    // measureInputUsage of an assistant message also counts role-framing
+    // measureContextUsage of an assistant message also counts role-framing
     // tokens (a few per message). Subtract the framing baseline so the
     // reported output count tracks the generated text rather than the
     // wrapper. Falls back to ~4 chars/token when the API isn't available.
     let outputTokens = 0;
     try {
       const [withText, baseline] = await Promise.all([
-        session.measureInputUsage([{ role: "assistant", content: outputText }]),
-        session.measureInputUsage([{ role: "assistant", content: "" }]),
+        session.measureContextUsage([{ role: "assistant", content: outputText }]),
+        session.measureContextUsage([{ role: "assistant", content: "" }]),
       ]);
       outputTokens = Math.max(0, withText - baseline);
     } catch (err) {
-      console.debug("[browser-ai] measureInputUsage(output) failed", err);
+      console.debug("[browser-ai] measureContextUsage(output) failed", err);
       outputTokens = Math.max(1, Math.ceil(outputText.length / 4));
     }
 
diff --git a/ui/src/services/browser-ai/types.ts b/ui/src/services/browser-ai/types.ts
index 6b3b6a7..9744e39 100644
--- a/ui/src/services/browser-ai/types.ts
+++ b/ui/src/services/browser-ai/types.ts
@@ -84,10 +84,10 @@ export interface LanguageModelSession {
     input: string | LanguageModelMessage[],
     options?: LanguageModelPromptOptions
   ): ReadableStream<string>;
-  measureInputUsage(input: string | LanguageModelMessage[]): Promise<number>;
+  measureContextUsage(input: string | LanguageModelMessage[]): Promise<number>;
   destroy(): void;
-  readonly inputUsage: number;
-  readonly inputQuota: number;
+  readonly contextUsage: number;
+  readonly contextWindow: number;
 }
 
 export interface LanguageModelGlobal {

From e13a12b933969e966cf59a1890bc0ed1c3735060 Mon Sep 17 00:00:00 2001
From: ScriptSmith <ScriptSmith@users.noreply.github.com>
Date: Mon, 4 May 2026 23:59:30 +1000
Subject: [PATCH 5/5] Review fixes

---
 .../components/WasmSetup/WasmSetupGuard.tsx   | 14 ++++-
 ui/src/service-worker/browser-ai.ts           | 53 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/ui/src/components/WasmSetup/WasmSetupGuard.tsx b/ui/src/components/WasmSetup/WasmSetupGuard.tsx
index cb7d933..2bee90a 100644
--- a/ui/src/components/WasmSetup/WasmSetupGuard.tsx
+++ b/ui/src/components/WasmSetup/WasmSetupGuard.tsx
@@ -151,11 +151,23 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) {
       });
       queryClient.invalidateQueries({ queryKey: apiV1ModelsQueryKey() });
     } catch (err) {
+      // Re-query the actual availability rather than assuming "downloadable":
+      // a mid-download failure (e.g. storage pressure made the device
+      // ineligible) can transition the API to "unavailable", and resetting
+      // to "downloadable" would resurface a Download button that fails again
+      // on every click.
+      let availability: BrowserAiState["availability"] = "downloadable";
+      try {
+        availability = await getAvailability();
+      } catch {
+        // Bridge unreachable; "downloadable" is the safest default since the
+        // user already saw the download UI.
+      }
       setBrowserAi((prev) => ({
         ...prev,
         downloading: false,
         downloadProgress: null,
-        availability: "downloadable",
+        availability,
         error: formatApiError(err),
       }));
     }
diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts
index c57e51d..e81da82 100644
--- a/ui/src/service-worker/browser-ai.ts
+++ b/ui/src/service-worker/browser-ai.ts
@@ -235,9 +235,23 @@ interface ResponsesPayload {
   tools?: ToolDef[];
 }
 
+interface ChatCompletionsToolCall {
+  id?: string;
+  type?: string;
+  function?: { name?: string; arguments?: string };
+}
+
+interface ChatCompletionsMessage {
+  role: string;
+  content?: string | Array<{ type: string; text?: string }> | null;
+  tool_calls?: ChatCompletionsToolCall[];
+  tool_call_id?: string;
+  name?: string;
+}
+
 interface ChatCompletionsPayload {
   model: string;
-  messages: Array<{ role: string; content: string | Array<{ type: string; text?: string }> }>;
+  messages: ChatCompletionsMessage[];
   stream?: boolean;
   temperature?: number;
   top_k?: number;
@@ -467,7 +481,44 @@ function chatMessagesToBridge(
   messages: ChatCompletionsPayload["messages"]
 ): LanguageModelMessage[] {
   const out: LanguageModelMessage[] = [];
+  // Map tool_call_id → function name so a later `role: "tool"` reply can be
+  // rendered with the tool's name (mirroring the Responses-API path's
+  // `<tool_result name="...">` markup).
+  const callIdToName = new Map<string, string>();
   for (const m of messages) {
+    if (m.role === "assistant" && Array.isArray(m.tool_calls)) {
+      for (const tc of m.tool_calls) {
+        const name = tc.function?.name;
+        if (typeof tc.id === "string" && typeof name === "string") {
+          callIdToName.set(tc.id, name);
+        }
+      }
+    }
+  }
+
+  for (const m of messages) {
+    if (m.role === "tool") {
+      const callId = typeof m.tool_call_id === "string" ? m.tool_call_id : "";
+      const name = callIdToName.get(callId) ?? m.name ?? "tool";
+      const output = flattenContent(m.content);
+      out.push({
+        role: "user",
+        content: `<tool_result name="${name}">${output}</tool_result>`,
+      });
+      continue;
+    }
+    if (m.role === "assistant" && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) {
+      const calls = m.tool_calls
+        .map((tc) => {
+          const name = tc.function?.name ?? "tool";
+          const args = tc.function?.arguments ?? "{}";
+          return `<tool_call name="${name}">${args}</tool_call>`;
+        })
+        .join("");
+      const text = flattenContent(m.content);
+      out.push({ role: "assistant", content: text ? `${text}${calls}` : calls });
+      continue;
+    }
     if (m.role !== "system" && m.role !== "user" && m.role !== "assistant") continue;
     const text = flattenContent(m.content);
     if (!text) continue;