From cb6e83c55920943bb86e629af88d040cdf3f0e83 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Wed, 29 Apr 2026 18:37:46 +1000 Subject: [PATCH 1/5] Add Browser AI API --- ui/src/components/ModelPicker/model-utils.ts | 1 + ui/src/components/WasmSetup/BrowserAiCard.tsx | 138 ++ ui/src/components/WasmSetup/WasmSetup.tsx | 40 +- .../components/WasmSetup/WasmSetupGuard.tsx | 81 +- ui/src/pages/providers/shared.tsx | 4 + ui/src/service-worker/browser-ai.ts | 1196 +++++++++++++++++ ui/src/service-worker/sw.ts | 56 +- ui/src/services/browser-ai/availability.ts | 58 + ui/src/services/browser-ai/bridge.ts | 196 +++ ui/src/services/browser-ai/index.ts | 17 + ui/src/services/browser-ai/types.ts | 98 ++ 11 files changed, 1879 insertions(+), 6 deletions(-) create mode 100644 ui/src/components/WasmSetup/BrowserAiCard.tsx create mode 100644 ui/src/service-worker/browser-ai.ts create mode 100644 ui/src/services/browser-ai/availability.ts create mode 100644 ui/src/services/browser-ai/bridge.ts create mode 100644 ui/src/services/browser-ai/index.ts create mode 100644 ui/src/services/browser-ai/types.ts diff --git a/ui/src/components/ModelPicker/model-utils.ts b/ui/src/components/ModelPicker/model-utils.ts index e6a5415..8752619 100644 --- a/ui/src/components/ModelPicker/model-utils.ts +++ b/ui/src/components/ModelPicker/model-utils.ts @@ -137,6 +137,7 @@ const PROVIDER_LABELS: Record = { qwen: "Qwen", openrouter: "OpenRouter", test: "Test", + browser: "Browser AI", }; export function getProviderInfo( diff --git a/ui/src/components/WasmSetup/BrowserAiCard.tsx b/ui/src/components/WasmSetup/BrowserAiCard.tsx new file mode 100644 index 0000000..44e6dfe --- /dev/null +++ b/ui/src/components/WasmSetup/BrowserAiCard.tsx @@ -0,0 +1,138 @@ +import { CheckCircle2, Cpu, Download, ExternalLink, Loader2, XCircle } from "lucide-react"; +import { Button } from "@/components/Button/Button"; +import { cn } from "@/utils/cn"; +import type { LanguageModelAvailability } from "@/services/browser-ai"; + +export interface BrowserAiState { + /** True if `globalThis.LanguageModel` is exposed by this browser. */ + supported: boolean; + availability: LanguageModelAvailability; + /** 0..1, only meaningful while a download is in progress. */ + downloadProgress: number | null; + /** True while we are actively triggering or awaiting a download. */ + downloading: boolean; + error: string | null; +} + +interface BrowserAiCardProps { + state: BrowserAiState; + onDownload: () => void; + className?: string; +} + +export function BrowserAiCard({ state, onDownload, className }: BrowserAiCardProps) { + if (!state.supported) { + return ( +
+
+
+

Browser AI

+

+ On-device model running locally in your browser +

+
+
+ + Not supported +
+
+

+ Open this page in Chrome 148+ (or recent Edge / Brave / other Chromium).{" "} + + docs + + +

+
+ ); + } + + const isReady = state.availability === "available"; + const isDownloading = state.downloading || state.availability === "downloading"; + const isDownloadable = state.availability === "downloadable" && !isDownloading; + const progressPercent = + state.downloadProgress != null + ? Math.max(0, Math.min(100, state.downloadProgress * 100)) + : null; + + return ( +
+
+
+

Browser AI

+

+ Runs locally on-device. Private, free, no API key. +

+
+ {isReady ? ( +
+ + Ready +
+ ) : isDownloading ? ( +
+ + {progressPercent != null ? `${progressPercent.toFixed(0)}%` : "Downloading"} +
+ ) : isDownloadable ? ( + + ) : ( +
+ + Unavailable +
+ )} +
+ + {isDownloading && progressPercent != null && ( +
+
+
+ )} + + {state.error && ( +

+ + {state.error} +

+ )} + + {state.availability === "unavailable" && !state.error && ( +

+ The browser exposes the API but reports the device as ineligible (typically not enough + memory, storage, or GPU). The model will appear here once your environment qualifies. +

+ )} +
+ ); +} diff --git a/ui/src/components/WasmSetup/WasmSetup.tsx b/ui/src/components/WasmSetup/WasmSetup.tsx index 69c56dc..8ec881f 100644 --- a/ui/src/components/WasmSetup/WasmSetup.tsx +++ b/ui/src/components/WasmSetup/WasmSetup.tsx @@ -33,6 +33,7 @@ import { Input } from "@/components/Input/Input"; import { FormField } from "@/components/FormField/FormField"; import { HadrianIcon } from "@/components/HadrianIcon/HadrianIcon"; import { startOpenRouterOAuth, isInIframe } from "./openrouter-oauth"; +import { BrowserAiCard, type BrowserAiState } from "./BrowserAiCard"; import { cn } from "@/utils/cn"; import { formatApiError } from "@/utils/formatApiError"; @@ -115,6 +116,8 @@ export function WasmSetup({ ollamaConnecting, ollamaConnected, onOllamaConnect, + browserAi, + onBrowserAiDownload, }: { open: boolean; onComplete: () => void; @@ -125,6 +128,8 @@ export function WasmSetup({ ollamaConnecting?: boolean; ollamaConnected?: boolean; onOllamaConnect?: () => void; + browserAi?: BrowserAiState; + onBrowserAiDownload?: () => void; }) { const [step, setStep] = useState("welcome"); const [entries, setEntries] = useState(initialEntries); @@ -244,10 +249,12 @@ export function WasmSetup({ } }, []); + const browserAiReady = browserAi?.availability === "available"; const savedCount = entries.filter((e) => e.saved).length + (hasExistingOpenRouter ? 1 : 0) + - (hasExistingOllama ? 1 : 0); + (hasExistingOllama ? 1 : 0) + + (browserAiReady ? 1 : 0); const hasAnySaved = savedCount > 0; return ( @@ -266,6 +273,8 @@ export function WasmSetup({ onOllamaConnect={onOllamaConnect} existingProviders={existingProviders} onDeleteExisting={handleDeleteExisting} + browserAi={browserAi} + onBrowserAiDownload={onBrowserAiDownload} /> )} {step === "providers" && ( @@ -290,6 +299,8 @@ export function WasmSetup({ onOllamaConnect={onOllamaConnect} existingProviders={existingProviders} onDeleteExisting={handleDeleteExisting} + browserAi={browserAi} + onBrowserAiDownload={onBrowserAiDownload} /> )} {step === "done" && } @@ -310,6 +321,8 @@ function WelcomeStep({ onOllamaConnect, existingProviders, onDeleteExisting, + browserAi, + onBrowserAiDownload, }: { onNext: () => void; onReady: () => void; @@ -323,8 +336,11 @@ function WelcomeStep({ onOllamaConnect?: () => void; existingProviders?: DynamicProviderResponse[]; onDeleteExisting: (id: string) => void; + browserAi?: BrowserAiState; + onBrowserAiDownload?: () => void; }) { - const hasProvider = hasExistingOpenRouter || hasExistingOllama; + const hasBrowserAiReady = browserAi?.availability === "available"; + const hasProvider = hasExistingOpenRouter || hasExistingOllama || hasBrowserAiReady; return ( <> @@ -457,6 +473,14 @@ function WelcomeStep({
)} + {browserAi && ( + {})} + className="mt-3" + /> + )} +

{hasProvider ? "You can also add API keys from OpenAI, Anthropic, or other providers." @@ -524,6 +548,8 @@ function ProvidersStep({ onOllamaConnect, existingProviders, onDeleteExisting, + browserAi, + onBrowserAiDownload, }: { entries: ProviderEntry[]; onUpdate: (key: string, update: Partial) => void; @@ -545,6 +571,8 @@ function ProvidersStep({ onOllamaConnect?: () => void; existingProviders?: DynamicProviderResponse[]; onDeleteExisting: (id: string) => void; + browserAi?: BrowserAiState; + onBrowserAiDownload?: () => void; }) { return ( <> @@ -636,6 +664,14 @@ function ProvidersStep({ ) : null} + {browserAi && ( + {})} + className="mb-4" + /> + )} +

{entries.map((entry) => ( (() => ({ + supported: IS_WASM ? isLanguageModelSupported() : false, + availability: "unavailable", + downloadProgress: null, + downloading: false, + error: null, + })); const queryClient = useQueryClient(); const createProvider = useMutation({ ...meProvidersCreateMutation() }); @@ -85,6 +99,62 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) { return () => controller.abort(); }, []); + // Install the LanguageModel bridge so the WASM service worker can reach + // the on-device Prompt API (only exposed in window scope), and surface the + // current availability state for the UI. + useEffect(() => { + if (!IS_WASM) return; + if (!isLanguageModelSupported()) return; + const uninstall = installBrowserAiBridge(); + let cancelled = false; + getAvailability().then((state) => { + if (cancelled) return; + setBrowserAi((prev) => ({ ...prev, availability: state })); + }); + return () => { + cancelled = true; + uninstall(); + }; + }, []); + + const handleBrowserAiDownload = useCallback(async () => { + const lm = getLanguageModel(); + if (!lm) return; + setBrowserAi((prev) => ({ + ...prev, + downloading: true, + downloadProgress: 0, + availability: "downloading", + error: null, + })); + try { + const session = await lm.create({ + monitor(m) { + m.addEventListener("downloadprogress", (event) => { + setBrowserAi((prev) => ({ ...prev, downloadProgress: event.loaded })); + }); + }, + }); + session.destroy(); + const next = await getAvailability(); + setBrowserAi((prev) => ({ + ...prev, + availability: next, + downloading: false, + downloadProgress: null, + })); + queryClient.invalidateQueries({ queryKey: apiV1ModelsQueryKey() }); + } catch (err) { + setBrowserAi((prev) => ({ + ...prev, + downloading: false, + downloadProgress: null, + availability: "downloadable", + error: formatApiError(err), + })); + } + }, [queryClient]); + const handleOllamaConnect = useCallback(async () => { setOllamaConnecting(true); try { @@ -165,8 +235,13 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) { return {children}; } - // Auto-show: no providers and not previously dismissed - const needsOnboarding = !dismissed && !isLoading && (data?.data?.length ?? 0) === 0; + // Auto-show: no providers and not previously dismissed. Browser AI counts + // as a provider once the model is ready locally, since requests against + // it succeed without any setup the wizard could prompt for. + const dynamicProviderCount = data?.data?.length ?? 0; + const browserAiCounts = browserAi.supported && browserAi.availability === "available"; + const needsOnboarding = + !dismissed && !isLoading && dynamicProviderCount === 0 && !browserAiCounts; return ( @@ -181,6 +256,8 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) { ollamaConnecting={ollamaConnecting} ollamaConnected={ollamaConnected} onOllamaConnect={handleOllamaConnect} + browserAi={browserAi} + onBrowserAiDownload={handleBrowserAiDownload} /> ); diff --git a/ui/src/pages/providers/shared.tsx b/ui/src/pages/providers/shared.tsx index e764f93..b91154a 100644 --- a/ui/src/pages/providers/shared.tsx +++ b/ui/src/pages/providers/shared.tsx @@ -246,4 +246,8 @@ export const PROVIDER_COLORS: Record = { solid: "bg-gray-500", badge: "bg-gray-500/10 text-gray-700 dark:text-gray-400", }, + browser: { + solid: "bg-sky-500", + badge: "bg-sky-500/10 text-sky-700 dark:text-sky-400", + }, }; diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts new file mode 100644 index 0000000..c8433c5 --- /dev/null +++ b/ui/src/service-worker/browser-ai.ts @@ -0,0 +1,1196 @@ +/// + +/** + * Service-worker side of the Browser AI integration. Intercepts requests for + * `browser/*` models and routes them through a postMessage bridge to the + * controlling window: the Prompt API global (`LanguageModel`) is only + * exposed in window/dedicated-worker scopes, not in service workers. + */ + +import type { LanguageModelMessage } from "../services/browser-ai/types"; +import { + BROWSER_AI_PREFIX, + BROWSER_AI_PROVIDER, + detectBrowserAiModel, +} from "../services/browser-ai/availability"; + +declare const self: ServiceWorkerGlobalScope; + +export { BROWSER_AI_PREFIX }; + +interface BridgeAvailabilityReply { + type: "AVAILABILITY"; + state: "available" | "downloadable" | "downloading" | "unavailable"; +} + +type BridgeReply = + | BridgeAvailabilityReply + | { type: "DOWNLOAD_PROGRESS"; loaded: number } + | { type: "DELTA"; text: string } + | { type: "DONE"; inputTokens: number; outputTokens: number } + | { type: "ABORTED" } + | { type: "ERROR"; message: string }; + +interface PromptToolDef { + name: string; + description?: string; + parameters?: object; +} + +let availabilityCache: { state: BridgeAvailabilityReply["state"]; checkedAt: number } | null = null; +const AVAILABILITY_TTL_MS = 60_000; + +export function isBrowserAiModel(model: unknown): boolean { + return typeof model === "string" && model.startsWith(BROWSER_AI_PREFIX); +} + +async function getClient(clientId: string): Promise { + if (clientId) { + const direct = await self.clients.get(clientId); + if (direct) return direct; + } + const all = await self.clients.matchAll({ type: "window", includeUncontrolled: false }); + return all[0] ?? null; +} + +async function sendToBridge( + client: Client, + payload: + | { type: "AVAILABILITY" } + | { + type: "PROMPT"; + messages: LanguageModelMessage[]; + temperature?: number; + topK?: number; + responseConstraint?: object; + }, + onMessage?: (reply: BridgeReply, port: MessagePort) => boolean, + signal?: AbortSignal +): Promise { + return new Promise((resolve, reject) => { + const channel = new MessageChannel(); + const port = channel.port1; + let settled = false; + + const cleanup = () => { + try { + port.close(); + } catch { + // ignored + } + if (signal && abortHandler) signal.removeEventListener("abort", abortHandler); + }; + + const settle = (fn: () => void) => { + if (settled) return; + settled = true; + cleanup(); + fn(); + }; + + let abortHandler: (() => void) | null = null; + if (signal) { + abortHandler = () => { + try { + port.postMessage({ type: "ABORT" }); + } catch { + // ignored + } + settle(() => reject(new DOMException("Aborted", "AbortError"))); + }; + if (signal.aborted) { + abortHandler(); + } else { + signal.addEventListener("abort", abortHandler); + } + } + + port.onmessage = (event: MessageEvent) => { + const reply = event.data as BridgeReply; + try { + if (onMessage) { + const finished = onMessage(reply, port); + if (finished) { + settle(() => resolve(reply as T)); + } + return; + } + // No streaming handler: resolve on first reply. + settle(() => resolve(reply as T)); + } catch (err) { + // Consumers signal terminal errors by throwing inside `onMessage` + // (e.g. on an ERROR reply). Catching here is critical: an uncaught + // throw inside a port.onmessage handler is silently swallowed, so + // without this the outer Promise would never settle and the SW + // request would hang until the page is reloaded. + settle(() => reject(err)); + } + }; + port.start(); + + try { + client.postMessage({ type: "BROWSER_AI_REQUEST", payload }, [channel.port2]); + } catch (err) { + settle(() => reject(err)); + } + }); +} + +export async function getCachedAvailability( + clientId: string +): Promise { + if (availabilityCache && Date.now() - availabilityCache.checkedAt < AVAILABILITY_TTL_MS) { + return availabilityCache.state; + } + const client = await getClient(clientId); + if (!client) return "unavailable"; + try { + const reply = await sendToBridge(client, { type: "AVAILABILITY" }); + if (reply.type === "AVAILABILITY") { + availabilityCache = { state: reply.state, checkedAt: Date.now() }; + return reply.state; + } + } catch { + // Bridge unavailable. + } + return "unavailable"; +} + +/** Append the browser AI model to a `/v1/models` response when supported. */ +export async function augmentModelsResponse( + response: Response, + clientId: string +): Promise { + let availability: BridgeAvailabilityReply["state"]; + try { + availability = await getCachedAvailability(clientId); + } catch { + return response; + } + if (availability === "unavailable") return response; + + let body: { data?: unknown[]; [k: string]: unknown }; + try { + body = await response.clone().json(); + } catch { + return response; + } + const data = Array.isArray(body.data) ? body.data : []; + + const detected = detectBrowserAiModel(); + const entry = { + id: detected.id, + object: "model", + created: 0, + owned_by: BROWSER_AI_PROVIDER, + source: "static", + description: + availability === "available" + ? `On-device ${detected.vendor} model, runs locally in your browser.` + : `On-device ${detected.vendor} model, runs locally in your browser. Downloads on first use.`, + capabilities: { tools: true, vision: false, streaming: true }, + modalities: { input: ["text"], output: ["text"] }, + tasks: ["chat"], + }; + + const augmented = { ...body, data: [...data, entry] }; + const headers = new Headers(response.headers); + headers.delete("content-length"); + return new Response(JSON.stringify(augmented), { + status: response.status, + statusText: response.statusText, + headers, + }); +} + +interface ToolDef { + type?: string; + name: string; + description?: string; + parameters?: object; +} + +interface ResponsesPayload { + model: string; + input: Array<{ + role?: string; + type?: string; + content?: string | Array<{ type: string; text?: string }>; + [k: string]: unknown; + }>; + stream?: boolean; + temperature?: number; + top_k?: number; + max_output_tokens?: number; + tools?: ToolDef[]; +} + +interface ChatCompletionsPayload { + model: string; + messages: Array<{ role: string; content: string | Array<{ type: string; text?: string }> }>; + stream?: boolean; + temperature?: number; + top_k?: number; + max_tokens?: number; + tools?: Array<{ type: string; function?: ToolDef }>; +} + +function flattenContent(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + return content + .map((part) => { + if (typeof part === "string") return part; + if (part && typeof part === "object") { + const p = part as { type?: string; text?: string; value?: unknown }; + if (p.type === "input_text" || p.type === "output_text" || p.type === "text") { + return p.text ?? ""; + } + } + return ""; + }) + .join(""); + } + return ""; +} + +function inputToMessages(input: ResponsesPayload["input"]): LanguageModelMessage[] { + const messages: LanguageModelMessage[] = []; + + // function_call_output items reference a prior function_call by call_id. + // Build a lookup so we can render the result alongside the tool name in + // the synthetic conversation we feed to the Prompt API. + const callIdToName = new Map(); + for (const item of input) { + if ( + item.type === "function_call" && + typeof item.call_id === "string" && + typeof item.name === "string" + ) { + callIdToName.set(item.call_id, item.name); + } + } + + for (const item of input) { + if (item.type === "function_call") { + const name = typeof item.name === "string" ? item.name : "tool"; + const args = typeof item.arguments === "string" ? item.arguments : "{}"; + messages.push({ + role: "assistant", + content: `${args}`, + }); + continue; + } + if (item.type === "function_call_output") { + const callId = typeof item.call_id === "string" ? item.call_id : ""; + const name = callIdToName.get(callId) ?? "tool"; + const output = + typeof item.output === "string" ? item.output : JSON.stringify(item.output ?? ""); + messages.push({ + role: "user", + content: `${output}`, + }); + continue; + } + if (item.type && item.type !== "message") continue; + const role = item.role; + if (role !== "system" && role !== "user" && role !== "assistant") continue; + const text = flattenContent(item.content); + if (!text) continue; + messages.push({ role, content: text }); + } + return messages; +} + +/** Convert OpenAI-style tool definitions into the bridge's payload shape. */ +function extractTools(body: ResponsesPayload | ChatCompletionsPayload): PromptToolDef[] { + const out: PromptToolDef[] = []; + for (const t of body.tools ?? []) { + if (!t) continue; + // Responses API wraps function tools flat: { type: "function", name, description, parameters } + // Chat completions wraps them: { type: "function", function: { name, description, parameters } } + const candidate = + "function" in t && t.function + ? t.function + : (t as { name?: string; description?: string; parameters?: object }); + if (!candidate || typeof candidate.name !== "string") continue; + const tType = (t as { type?: string }).type; + if (tType && tType !== "function") continue; + out.push({ + name: candidate.name, + description: candidate.description, + parameters: candidate.parameters, + }); + } + return out; +} + +/** + * Polyfill for the spec's native `tools` option, which Chrome rejects at + * `LanguageModel.create()` today. Instead of asking the model to emit + * `` markers in free text (which it mutates into markdown + * fences, drops closing tags, mixes with prose, etc), we describe the + * tools in the system prompt and force a JSON-shaped reply via + * `responseConstraint`. Chrome 137+ enforces the schema at decode time, + * which the swyx and dobidev write-ups identify as the only mechanism + * that reliably pins format on Gemini Nano. + */ +function injectToolPrompt( + messages: LanguageModelMessage[], + tools: PromptToolDef[] +): LanguageModelMessage[] { + if (tools.length === 0) return messages; + const toolBlock = tools + .map((t) => { + const params = t.parameters ? JSON.stringify(t.parameters) : "{}"; + const desc = t.description ?? "(no description)"; + return `- ${t.name}: ${desc}\n arguments schema: ${params}`; + }) + .join("\n\n"); + + const instructions = [ + "You can use tools. Your reply will be a JSON object with two optional fields:", + ' "tool_calls": list of tool invocations, each {"name": "...", "arguments": {...}}', + ' "text": plain-text reply to the user', + "", + "Use tool_calls when you need to run a tool. Use text when you have a final answer. You may use both.", + "", + "Available tools:", + "", + toolBlock, + "", + "Examples (these are entire valid replies):", + '{"tool_calls":[{"name":"code_interpreter","arguments":{"code":"print(\'hi\')"}}]}', + '{"tool_calls":[{"name":"code_interpreter","arguments":{"code":"import math\\nprint(math.pi)"}}]}', + '{"text":"Hello! How can I help?"}', + '{"text":"Let me check.","tool_calls":[{"name":"wikipedia","arguments":{"action":"search","query":"Einstein"}}]}', + "", + 'Tool results arrive in the next user message in the form: .... After receiving tool results, reply with {"text":"..."} containing your final answer.', + ].join("\n"); + + const out = messages.slice(); + const systemIdx = out.findIndex((m) => m.role === "system"); + if (systemIdx >= 0) { + out[systemIdx] = { + role: "system", + content: `${out[systemIdx].content}\n\n${instructions}`, + }; + } else { + out.unshift({ role: "system", content: instructions }); + } + return out; +} + +interface ParsedToolCall { + name: string; + arguments: string; +} + +interface ParsedEnvelope { + toolCalls: ParsedToolCall[]; + text: string; +} + +/** + * Build the `responseConstraint` schema for a request that has tools. + * Constrains the model to a `{tool_calls?, text?}` object where every + * tool name comes from the supplied list. Argument schemas are kept as + * plain `object` to avoid tripping up Chrome's JSON Schema implementation + * with per-tool oneOf gymnastics; per-arg validation happens downstream + * in Hadrian's tool executors. + */ +function buildToolResponseSchema(tools: PromptToolDef[]): object { + const toolNames = tools.map((t) => t.name); + return { + type: "object", + properties: { + tool_calls: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string", enum: toolNames }, + arguments: { type: "object" }, + }, + required: ["name", "arguments"], + }, + }, + text: { type: "string" }, + }, + }; +} + +/** + * Parse the constrained JSON envelope returned by the model. Returns + * empty arrays when the body fails to parse so callers can fall back to + * a retry path. + */ +function parseEnvelope(raw: string): ParsedEnvelope | null { + const trimmed = raw.trim(); + if (!trimmed) return null; + let parsed: unknown; + try { + parsed = JSON.parse(trimmed); + } catch { + return null; + } + if (!parsed || typeof parsed !== "object") return null; + const obj = parsed as { tool_calls?: unknown; text?: unknown }; + const toolCalls: ParsedToolCall[] = []; + if (Array.isArray(obj.tool_calls)) { + for (const entry of obj.tool_calls) { + if (!entry || typeof entry !== "object") continue; + const item = entry as { name?: unknown; arguments?: unknown }; + if (typeof item.name !== "string") continue; + const args = + item.arguments && typeof item.arguments === "object" + ? (item.arguments as Record) + : {}; + toolCalls.push({ name: item.name, arguments: JSON.stringify(args) }); + } + } + const text = typeof obj.text === "string" ? obj.text : ""; + return { toolCalls, text }; +} + +function chatMessagesToBridge( + messages: ChatCompletionsPayload["messages"] +): LanguageModelMessage[] { + const out: LanguageModelMessage[] = []; + for (const m of messages) { + if (m.role !== "system" && m.role !== "user" && m.role !== "assistant") continue; + const text = flattenContent(m.content); + if (!text) continue; + out.push({ role: m.role, content: text }); + } + return out; +} + +function jsonError(message: string, status = 503): Response { + return new Response(JSON.stringify({ error: { message, type: "browser_ai_error" } }), { + status, + headers: { "content-type": "application/json" }, + }); +} + +function sseHeaders(): HeadersInit { + return { + "content-type": "text/event-stream; charset=utf-8", + "cache-control": "no-cache, no-transform", + "x-accel-buffering": "no", + }; +} + +function sseEvent(name: string, data: unknown): string { + return `event: ${name}\ndata: ${JSON.stringify(data)}\n\n`; +} + +function genId(prefix: string): string { + return `${prefix}_${Math.random().toString(36).slice(2, 12)}${Date.now().toString(36)}`; +} + +/** Handle `/api/v1/responses` for Browser AI. */ +export async function handleResponsesRequest( + request: Request, + body: ResponsesPayload, + clientId: string +): Promise { + const client = await getClient(clientId); + if (!client) return jsonError("No active client to handle Browser AI request."); + + let messages = inputToMessages(body.input ?? []); + if (messages.length === 0) { + return jsonError("Browser AI requires at least one text message.", 400); + } + + const tools = extractTools(body); + const responseId = genId("resp"); + const model = body.model; + const stream = body.stream !== false; + + if (tools.length > 0) { + messages = injectToolPrompt(messages, tools); + return generateToolModeResponse( + client, + body, + messages, + tools, + request.signal, + responseId, + model, + stream + ); + } + + const messageItemId = genId("msg"); + if (!stream) { + return generateNonStreamingResponse( + client, + body, + messages, + request.signal, + responseId, + messageItemId, + model + ); + } + return generateStreamingResponse( + client, + body, + messages, + request.signal, + responseId, + messageItemId, + model + ); +} + +/** + * Tool-aware path. Buffers the full generated text from the bridge, parses + * `` markers, and emits either function_call output items or a + * single message item depending on what the model produced. Always wraps + * the result in the Responses-API event sequence so the chat UI sees its + * normal lifecycle, even though no text is streamed token-by-token. + */ +async function generateToolModeResponse( + client: Client, + body: ResponsesPayload, + messages: LanguageModelMessage[], + tools: PromptToolDef[], + signal: AbortSignal, + responseId: string, + model: string, + stream: boolean +): Promise { + const schema = buildToolResponseSchema(tools); + + async function runOnce( + msgs: LanguageModelMessage[] + ): Promise<{ raw: string; inputTokens: number; outputTokens: number }> { + let raw = ""; + let inputTokens = 0; + let outputTokens = 0; + await sendToBridge( + client, + { + type: "PROMPT", + messages: msgs, + temperature: body.temperature, + topK: body.top_k, + responseConstraint: schema, + }, + (reply) => { + if (reply.type === "DELTA") { + raw += reply.text; + return false; + } + if (reply.type === "DONE") { + inputTokens = reply.inputTokens; + outputTokens = reply.outputTokens; + return true; + } + if (reply.type === "ERROR") throw new Error(reply.message); + if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError"); + return false; + }, + signal + ); + return { raw, inputTokens, outputTokens }; + } + + let raw: string; + let inputTokens: number; + let outputTokens: number; + try { + ({ raw, inputTokens, outputTokens } = await runOnce(messages)); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return jsonError(`Browser AI: ${message}`); + } + + let envelope = parseEnvelope(raw); + // Parse-and-retry safety net. If the model returned text that doesn't + // fit the envelope (or fits but is empty), give it one more shot with + // an explicit reminder. Limited to a single retry to avoid loops. + const empty = !envelope || (envelope.toolCalls.length === 0 && !envelope.text); + if (empty) { + const retryMessages: LanguageModelMessage[] = [ + ...messages, + { role: "assistant", content: raw || "(empty)" }, + { + role: "user", + content: + "That reply did not match the required JSON shape. Reply ONLY with a JSON object: " + + '{"tool_calls":[{"name":"...","arguments":{...}}]} or {"text":"..."}. ' + + "Use the same tool names and argument schemas listed earlier.", + }, + ]; + try { + const retry = await runOnce(retryMessages); + raw = retry.raw; + inputTokens += retry.inputTokens; + outputTokens += retry.outputTokens; + envelope = parseEnvelope(raw); + } catch { + // Fall through with what we have. + } + } + + const toolCalls = envelope?.toolCalls ?? []; + const text = envelope?.text ?? ""; + const createdAt = Math.floor(Date.now() / 1000); + + const outputItems: Array> = []; + for (const call of toolCalls) { + const fcId = genId("fc"); + outputItems.push({ + id: fcId, + type: "function_call", + call_id: fcId, + name: call.name, + arguments: call.arguments, + status: "completed", + }); + } + if (text) { + outputItems.push({ + id: genId("msg"), + type: "message", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text }], + }); + } + if (outputItems.length === 0) { + // Both retries produced nothing usable. Surface the raw output so the + // user sees what went wrong rather than an empty turn. + outputItems.push({ + id: genId("msg"), + type: "message", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: raw.trim() || "(no response)" }], + }); + } + + const completedResponse = { + id: responseId, + object: "response", + created_at: createdAt, + status: "completed" as const, + model, + output: outputItems, + output_text: text, + usage: { + input_tokens: inputTokens, + output_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }; + + if (!stream) { + return new Response(JSON.stringify(completedResponse), { + status: 200, + headers: { "content-type": "application/json" }, + }); + } + + const encoder = new TextEncoder(); + const sseStream = new ReadableStream({ + start(controller) { + const enqueue = (event: string, data: unknown) => { + controller.enqueue(encoder.encode(sseEvent(event, data))); + }; + + enqueue("response.created", { + type: "response.created", + response: { ...completedResponse, status: "in_progress", output: [] }, + }); + + let outputIndex = 0; + for (const item of outputItems) { + const isFunctionCall = item.type === "function_call"; + enqueue("response.output_item.added", { + type: "response.output_item.added", + output_index: outputIndex, + item: isFunctionCall + ? { ...item, arguments: "" } + : { ...item, status: "in_progress", content: [] }, + }); + + if (isFunctionCall) { + enqueue("response.function_call_arguments.delta", { + type: "response.function_call_arguments.delta", + item_id: item.id, + output_index: outputIndex, + delta: item.arguments, + }); + enqueue("response.function_call_arguments.done", { + type: "response.function_call_arguments.done", + item_id: item.id, + output_index: outputIndex, + arguments: item.arguments, + }); + } else { + const text = (item.content as Array<{ text: string }>)[0]?.text ?? ""; + enqueue("response.content_part.added", { + type: "response.content_part.added", + item_id: item.id, + output_index: outputIndex, + content_index: 0, + part: { type: "output_text", text: "" }, + }); + enqueue("response.output_text.delta", { + type: "response.output_text.delta", + item_id: item.id, + output_index: outputIndex, + content_index: 0, + delta: text, + }); + enqueue("response.output_text.done", { + type: "response.output_text.done", + item_id: item.id, + output_index: outputIndex, + content_index: 0, + text, + }); + enqueue("response.content_part.done", { + type: "response.content_part.done", + item_id: item.id, + output_index: outputIndex, + content_index: 0, + part: { type: "output_text", text }, + }); + } + + enqueue("response.output_item.done", { + type: "response.output_item.done", + output_index: outputIndex, + item, + }); + outputIndex += 1; + } + + enqueue("response.completed", { + type: "response.completed", + response: completedResponse, + }); + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + }, + }); + + return new Response(sseStream, { status: 200, headers: sseHeaders() }); +} + +async function generateNonStreamingResponse( + client: Client, + body: ResponsesPayload, + messages: LanguageModelMessage[], + signal: AbortSignal, + responseId: string, + messageItemId: string, + model: string +): Promise { + let outputText = ""; + let inputTokens = 0; + let outputTokens = 0; + try { + await sendToBridge( + client, + { + type: "PROMPT", + messages, + temperature: body.temperature, + topK: body.top_k, + }, + (reply) => { + if (reply.type === "DELTA") { + outputText += reply.text; + return false; + } + if (reply.type === "DONE") { + inputTokens = reply.inputTokens; + outputTokens = reply.outputTokens; + return true; + } + if (reply.type === "ERROR") { + throw new Error(reply.message); + } + if (reply.type === "ABORTED") { + throw new DOMException("Aborted", "AbortError"); + } + return false; + }, + signal + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return jsonError(`Browser AI: ${message}`); + } + + return new Response( + JSON.stringify({ + id: responseId, + object: "response", + created_at: Math.floor(Date.now() / 1000), + status: "completed", + model, + output: [ + { + id: messageItemId, + type: "message", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: outputText }], + }, + ], + output_text: outputText, + usage: { + input_tokens: inputTokens, + output_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }), + { status: 200, headers: { "content-type": "application/json" } } + ); +} + +async function generateStreamingResponse( + client: Client, + body: ResponsesPayload, + messages: LanguageModelMessage[], + signal: AbortSignal, + responseId: string, + messageItemId: string, + model: string +): Promise { + const encoder = new TextEncoder(); + const createdAt = Math.floor(Date.now() / 1000); + + const stream = new ReadableStream({ + async start(controller) { + const enqueue = (event: string, data: unknown) => { + controller.enqueue(encoder.encode(sseEvent(event, data))); + }; + + const baseResponse = { + id: responseId, + object: "response", + created_at: createdAt, + model, + status: "in_progress", + output: [] as unknown[], + }; + + enqueue("response.created", { type: "response.created", response: baseResponse }); + + enqueue("response.output_item.added", { + type: "response.output_item.added", + output_index: 0, + item: { + id: messageItemId, + type: "message", + role: "assistant", + status: "in_progress", + content: [], + }, + }); + + enqueue("response.content_part.added", { + type: "response.content_part.added", + item_id: messageItemId, + output_index: 0, + content_index: 0, + part: { type: "output_text", text: "" }, + }); + + let outputText = ""; + let inputTokens = 0; + let outputTokens = 0; + let downloading = false; + + try { + await sendToBridge( + client, + { + type: "PROMPT", + messages, + temperature: body.temperature, + topK: body.top_k, + }, + (reply) => { + if (reply.type === "DOWNLOAD_PROGRESS") { + if (!downloading) { + downloading = true; + enqueue("response.browser_ai.download.started", { + type: "response.browser_ai.download.started", + }); + } + enqueue("response.browser_ai.download.progress", { + type: "response.browser_ai.download.progress", + loaded: reply.loaded, + }); + return false; + } + if (reply.type === "DELTA") { + outputText += reply.text; + enqueue("response.output_text.delta", { + type: "response.output_text.delta", + item_id: messageItemId, + output_index: 0, + content_index: 0, + delta: reply.text, + }); + return false; + } + if (reply.type === "DONE") { + inputTokens = reply.inputTokens; + outputTokens = reply.outputTokens; + return true; + } + if (reply.type === "ERROR") { + throw new Error(reply.message); + } + if (reply.type === "ABORTED") { + throw new DOMException("Aborted", "AbortError"); + } + return false; + }, + signal + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + enqueue("response.error", { + type: "response.error", + error: { message: `Browser AI: ${message}`, type: "browser_ai_error" }, + }); + controller.close(); + return; + } + + enqueue("response.output_text.done", { + type: "response.output_text.done", + item_id: messageItemId, + output_index: 0, + content_index: 0, + text: outputText, + }); + + enqueue("response.content_part.done", { + type: "response.content_part.done", + item_id: messageItemId, + output_index: 0, + content_index: 0, + part: { type: "output_text", text: outputText }, + }); + + enqueue("response.output_item.done", { + type: "response.output_item.done", + output_index: 0, + item: { + id: messageItemId, + type: "message", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: outputText }], + }, + }); + + enqueue("response.completed", { + type: "response.completed", + response: { + ...baseResponse, + status: "completed", + output: [ + { + id: messageItemId, + type: "message", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text: outputText }], + }, + ], + output_text: outputText, + usage: { + input_tokens: inputTokens, + output_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }, + }); + + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + }, + }); + + return new Response(stream, { status: 200, headers: sseHeaders() }); +} + +/** Handle `/v1/chat/completions` for Browser AI. */ +export async function handleChatCompletionsRequest( + request: Request, + body: ChatCompletionsPayload, + clientId: string +): Promise { + const client = await getClient(clientId); + if (!client) return jsonError("No active client to handle Browser AI request."); + + let messages = chatMessagesToBridge(body.messages ?? []); + if (messages.length === 0) { + return jsonError("Browser AI requires at least one text message.", 400); + } + + const tools = extractTools(body); + if (tools.length > 0) { + messages = injectToolPrompt(messages, tools); + } + const id = genId("chatcmpl"); + const created = Math.floor(Date.now() / 1000); + const model = body.model; + const stream = body.stream === true; + + if (!stream) { + let outputText = ""; + let inputTokens = 0; + let outputTokens = 0; + try { + await sendToBridge( + client, + { + type: "PROMPT", + messages, + temperature: body.temperature, + topK: body.top_k, + }, + (reply) => { + if (reply.type === "DELTA") { + outputText += reply.text; + return false; + } + if (reply.type === "DONE") { + inputTokens = reply.inputTokens; + outputTokens = reply.outputTokens; + return true; + } + if (reply.type === "ERROR") throw new Error(reply.message); + if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError"); + return false; + }, + request.signal + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return jsonError(`Browser AI: ${message}`); + } + + return new Response( + JSON.stringify({ + id, + object: "chat.completion", + created, + model, + choices: [ + { + index: 0, + message: { role: "assistant", content: outputText }, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: inputTokens, + completion_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + } + + const encoder = new TextEncoder(); + const sseStream = new ReadableStream({ + async start(controller) { + const writeChunk = (chunk: Record) => { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`)); + }; + + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }], + }); + + let outputText = ""; + let inputTokens = 0; + let outputTokens = 0; + + try { + await sendToBridge( + client, + { + type: "PROMPT", + messages, + temperature: body.temperature, + topK: body.top_k, + }, + (reply) => { + if (reply.type === "DELTA") { + outputText += reply.text; + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: { content: reply.text }, finish_reason: null }], + }); + return false; + } + if (reply.type === "DONE") { + inputTokens = reply.inputTokens; + outputTokens = reply.outputTokens; + return true; + } + if (reply.type === "ERROR") throw new Error(reply.message); + if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError"); + return false; + }, + request.signal + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + writeChunk({ + error: { message: `Browser AI: ${message}`, type: "browser_ai_error" }, + }); + controller.close(); + return; + } + + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: {}, finish_reason: "stop" }], + usage: { + prompt_tokens: inputTokens, + completion_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }); + + // Acknowledge unused output for type-checker: `outputText` tracks the + // streamed text but we don't replay it at the end. + void outputText; + + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + }, + }); + + return new Response(sseStream, { status: 200, headers: sseHeaders() }); +} + +export type { ResponsesPayload, ChatCompletionsPayload }; diff --git a/ui/src/service-worker/sw.ts b/ui/src/service-worker/sw.ts index 9b70b21..3e5ccc7 100644 --- a/ui/src/service-worker/sw.ts +++ b/ui/src/service-worker/sw.ts @@ -23,6 +23,15 @@ import "./sqlite-bridge"; import wasmInit, { HadrianGateway } from "/wasm/hadrian.js"; import { formatApiError } from "../utils/formatApiError"; +import { + augmentModelsResponse, + handleChatCompletionsRequest, + handleResponsesRequest, + isBrowserAiModel, + type ChatCompletionsPayload, + type ResponsesPayload, +} from "./browser-ai"; + let gateway: HadrianGateway | null = null; let initPromise: Promise | null = null; @@ -59,10 +68,10 @@ self.addEventListener("fetch", (event) => { if (url.origin !== self.location.origin) return; if (!GATEWAY_PATHS.some((p) => url.pathname.startsWith(p))) return; - event.respondWith(handleRequest(event.request)); + event.respondWith(handleRequest(event.request, url, event.clientId)); }); -async function handleRequest(request: Request): Promise { +async function handleRequest(request: Request, url: URL, clientId: string): Promise { // Lazy-init the WASM gateway on first intercepted request if (!gateway) { if (!initPromise) { @@ -90,6 +99,8 @@ async function handleRequest(request: Request): Promise { } try { + const intercepted = await maybeHandleBrowserAi(request, url, clientId); + if (intercepted) return intercepted; return await gateway!.handle(request); } catch (error) { console.error("Hadrian WASM gateway error:", error); @@ -108,3 +119,44 @@ async function handleRequest(request: Request): Promise { ); } } + +function isResponsesPath(pathname: string): boolean { + return pathname.endsWith("/v1/responses"); +} + +function isChatCompletionsPath(pathname: string): boolean { + return pathname.endsWith("/v1/chat/completions"); +} + +function isModelsPath(pathname: string): boolean { + return pathname.endsWith("/v1/models"); +} + +async function maybeHandleBrowserAi( + request: Request, + url: URL, + clientId: string +): Promise { + if (request.method === "GET" && isModelsPath(url.pathname)) { + const upstream = await gateway!.handle(request); + return augmentModelsResponse(upstream, clientId); + } + + if (request.method !== "POST") return null; + if (!isResponsesPath(url.pathname) && !isChatCompletionsPath(url.pathname)) return null; + + let body: unknown; + try { + body = await request.clone().json(); + } catch { + return null; + } + if (!body || typeof body !== "object") return null; + const model = (body as { model?: unknown }).model; + if (!isBrowserAiModel(model)) return null; + + if (isResponsesPath(url.pathname)) { + return handleResponsesRequest(request, body as ResponsesPayload, clientId); + } + return handleChatCompletionsRequest(request, body as ChatCompletionsPayload, clientId); +} diff --git a/ui/src/services/browser-ai/availability.ts b/ui/src/services/browser-ai/availability.ts new file mode 100644 index 0000000..ffc6f64 --- /dev/null +++ b/ui/src/services/browser-ai/availability.ts @@ -0,0 +1,58 @@ +import type { LanguageModelAvailability, LanguageModelGlobal } from "./types"; + +export const BROWSER_AI_PROVIDER = "browser"; +export const BROWSER_AI_PREFIX = `${BROWSER_AI_PROVIDER}/`; + +/** + * Best-guess identifier for the on-device model behind the Prompt API. The + * spec exposes no model name, so we infer one from the user agent. Chrome and + * Brave ship Gemini Nano; Edge announced Phi-4 Mini for its on-device stack. + * Anything else falls back to a generic id that still lets the routing layer + * recognise the model via the `browser/` prefix. + */ +export function detectBrowserAiModel(): { id: string; modelName: string; vendor: string } { + const ua = typeof navigator !== "undefined" ? (navigator.userAgent ?? "") : ""; + // The doubled `browser-` prefix on the model name surfaces as + // "Browser " in the model picker after the provider segment is + // stripped (formatModelName splits on hyphens). Without it the picker + // would show just "Gemini Nano" with no Browser-AI cue. + if (/\bEdg\//.test(ua)) { + return { + id: `${BROWSER_AI_PREFIX}browser-phi-4-mini`, + modelName: "browser-phi-4-mini", + vendor: "Edge", + }; + } + if (/\b(?:Chrome|Chromium|Brave)\//.test(ua) || ua.includes(" Brave/")) { + return { + id: `${BROWSER_AI_PREFIX}browser-gemini-nano`, + modelName: "browser-gemini-nano", + vendor: "Chromium", + }; + } + return { + id: `${BROWSER_AI_PREFIX}browser-on-device`, + modelName: "browser-on-device", + vendor: "Browser", + }; +} + +export function getLanguageModel(): LanguageModelGlobal | null { + if (typeof globalThis === "undefined") return null; + const lm = (globalThis as unknown as { LanguageModel?: LanguageModelGlobal }).LanguageModel; + return lm ?? null; +} + +export function isLanguageModelSupported(): boolean { + return getLanguageModel() !== null; +} + +export async function getAvailability(): Promise { + const lm = getLanguageModel(); + if (!lm) return "unavailable"; + try { + return await lm.availability(); + } catch { + return "unavailable"; + } +} diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts new file mode 100644 index 0000000..b3c5cf5 --- /dev/null +++ b/ui/src/services/browser-ai/bridge.ts @@ -0,0 +1,196 @@ +import { getAvailability, getLanguageModel } from "./availability"; +import type { LanguageModelMessage, LanguageModelSession } from "./types"; + +/** + * Window-side bridge that responds to LanguageModel requests from the + * service worker. The Prompt API (`globalThis.LanguageModel`) is only + * exposed in window/dedicated-worker scopes, so we relay calls from the + * SW through this bridge over a `MessageChannel` per request. + * + * Tools are not passed through to the model: the spec defines a native + * `tools` option, but Chrome rejects sessions that supply one + * ("the device is unable to create a session to run the model"). Until + * that ships, the SW polyfills tools by injecting their descriptions into + * the system prompt and parsing `` markers from the streamed + * text. This bridge stays intentionally tool-agnostic so it works on every + * Chromium channel that ships the Prompt API. + */ + +interface PromptRequestPayload { + type: "PROMPT"; + messages: LanguageModelMessage[]; + temperature?: number; + topK?: number; + /** + * JSON Schema for `responseConstraint`. When set the bridge runs the + * non-streaming `prompt()` API: the model output is forced to match the + * schema, and partial chunks would be malformed JSON anyway. + */ + responseConstraint?: object; +} + +interface AvailabilityRequestPayload { + type: "AVAILABILITY"; +} + +type BridgeRequest = PromptRequestPayload | AvailabilityRequestPayload; + +export function installBrowserAiBridge(): () => void { + if (typeof navigator === "undefined" || !("serviceWorker" in navigator)) { + return () => {}; + } + + const handler = (event: MessageEvent) => { + const data = event.data as { type?: string; payload?: BridgeRequest } | null; + if (!data || data.type !== "BROWSER_AI_REQUEST" || !data.payload) return; + const port = event.ports?.[0]; + if (!port) return; + handleRequest(port, data.payload).catch((err: unknown) => { + const message = err instanceof Error ? err.message : String(err); + try { + port.postMessage({ type: "ERROR", message }); + port.close(); + } catch { + // Port already closed. + } + }); + }; + + navigator.serviceWorker.addEventListener("message", handler); + return () => navigator.serviceWorker.removeEventListener("message", handler); +} + +async function handleRequest(port: MessagePort, payload: BridgeRequest): Promise { + if (payload.type === "AVAILABILITY") { + port.postMessage({ type: "AVAILABILITY", state: await getAvailability() }); + port.close(); + return; + } + + if (payload.type === "PROMPT") { + await handlePrompt(port, payload); + return; + } + + port.postMessage({ + type: "ERROR", + message: `Unknown bridge request type: ${(payload as { type?: string }).type}`, + }); + port.close(); +} + +async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): Promise { + const lm = getLanguageModel(); + if (!lm) { + port.postMessage({ + type: "ERROR", + message: "Browser AI is not available in this browser.", + }); + port.close(); + return; + } + + const abort = new AbortController(); + let session: LanguageModelSession | null = null; + // Tear down the on-device session on cancel. Chrome's LanguageModel + // implementation does not always honour AbortSignal mid-prompt, so an + // abort that only fires the controller can leave the call hanging + // indefinitely. Destroying the session forces it to release. + abort.signal.addEventListener("abort", () => { + try { + session?.destroy(); + } catch { + // ignored + } + }); + port.addEventListener("message", (event: MessageEvent) => { + if ((event.data as { type?: string } | null)?.type === "ABORT") { + abort.abort(); + } + }); + port.start(); + + const systemMessages = payload.messages.filter((m) => m.role === "system"); + const conversation = payload.messages.filter((m) => m.role !== "system"); + + try { + session = await lm.create({ + initialPrompts: systemMessages.length > 0 ? systemMessages : undefined, + temperature: payload.temperature, + topK: payload.topK, + monitor(m) { + m.addEventListener("downloadprogress", (event) => { + port.postMessage({ type: "DOWNLOAD_PROGRESS", loaded: event.loaded }); + }); + }, + signal: abort.signal, + }); + + let inputTokens = 0; + try { + inputTokens = await session.measureInputUsage(conversation); + } catch { + // measureInputUsage may not be implemented on every channel. + } + + let outputText = ""; + if (payload.responseConstraint) { + // Constrained output: token chunks would be malformed JSON, so use + // the non-streaming API and surface the full response as one delta. + outputText = await session.prompt(conversation, { + signal: abort.signal, + responseConstraint: payload.responseConstraint, + }); + if (outputText) port.postMessage({ type: "DELTA", text: outputText }); + } else { + const stream = session.promptStreaming(conversation, { signal: abort.signal }); + const reader = stream.getReader(); + let cumulative = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + // Most Chromium channels stream deltas; older channels streamed + // cumulative text. Detect and normalise to deltas. + let delta: string; + if ( + value.length >= cumulative.length && + value.startsWith(cumulative) && + cumulative.length > 0 + ) { + delta = value.slice(cumulative.length); + cumulative = value; + } else { + delta = value; + cumulative += value; + } + if (!delta) continue; + outputText += delta; + port.postMessage({ type: "DELTA", text: delta }); + } + } + + let outputTokens = 0; + try { + outputTokens = await session.measureInputUsage([{ role: "assistant", content: outputText }]); + } catch { + outputTokens = Math.max(1, Math.ceil(outputText.length / 4)); + } + + port.postMessage({ type: "DONE", inputTokens, outputTokens }); + } catch (err: unknown) { + if (abort.signal.aborted) { + port.postMessage({ type: "ABORTED" }); + } else { + const message = err instanceof Error ? err.message : String(err); + port.postMessage({ type: "ERROR", message }); + } + } finally { + session?.destroy(); + try { + port.close(); + } catch { + // Already closed. + } + } +} diff --git a/ui/src/services/browser-ai/index.ts b/ui/src/services/browser-ai/index.ts new file mode 100644 index 0000000..60cdf21 --- /dev/null +++ b/ui/src/services/browser-ai/index.ts @@ -0,0 +1,17 @@ +export { + BROWSER_AI_PREFIX, + BROWSER_AI_PROVIDER, + detectBrowserAiModel, + getAvailability, + getLanguageModel, + isLanguageModelSupported, +} from "./availability"; +export { installBrowserAiBridge } from "./bridge"; +export type { + LanguageModelAvailability, + LanguageModelGlobal, + LanguageModelMessage, + LanguageModelMonitor, + LanguageModelParams, + LanguageModelSession, +} from "./types"; diff --git a/ui/src/services/browser-ai/types.ts b/ui/src/services/browser-ai/types.ts new file mode 100644 index 0000000..275c109 --- /dev/null +++ b/ui/src/services/browser-ai/types.ts @@ -0,0 +1,98 @@ +/** + * Type declarations for the on-device LanguageModel API exposed by recent + * Chromium browsers (Chrome, Edge, Brave, etc.). Lives on `window` and + * dedicated workers; not exposed in service workers (see the bridge in + * `service-worker/browser-ai.ts` for how the SW reaches it). + * + * Spec: https://github.com/webmachinelearning/prompt-api + */ + +export type LanguageModelAvailability = + | "available" + | "downloadable" + | "downloading" + | "unavailable"; + +export interface LanguageModelMonitor { + addEventListener(type: "downloadprogress", listener: (event: { loaded: number }) => void): void; + removeEventListener( + type: "downloadprogress", + listener: (event: { loaded: number }) => void + ): void; +} + +export interface LanguageModelMessage { + role: "system" | "user" | "assistant"; + content: string; +} + +/** + * Spec-native tool entry for `LanguageModel.create({ tools: [...] })`. The + * runtime invokes `execute` whenever the model decides to call this tool; + * the returned string is fed back as the tool result. + * https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#tool-use + */ +export interface LanguageModelTool { + name: string; + description?: string; + inputSchema: object; + execute: (args: Record) => Promise | string; +} + +export interface LanguageModelExpectedIO { + type: "text" | "tool-call" | "tool-response" | "image" | "audio"; + languages?: string[]; +} + +export interface LanguageModelCreateOptions { + initialPrompts?: LanguageModelMessage[]; + temperature?: number; + topK?: number; + tools?: LanguageModelTool[]; + expectedInputs?: LanguageModelExpectedIO[]; + expectedOutputs?: LanguageModelExpectedIO[]; + monitor?: (m: LanguageModelMonitor) => void; + signal?: AbortSignal; +} + +export interface LanguageModelParams { + defaultTemperature: number; + maxTemperature: number; + defaultTopK: number; + maxTopK: number; +} + +export interface LanguageModelPromptOptions { + signal?: AbortSignal; + /** JSON Schema constraining the model output at decode time (Chrome 137+). */ + responseConstraint?: object; + /** Skip auto-injection of the schema into the prompt context. */ + omitResponseConstraintInput?: boolean; +} + +export interface LanguageModelSession { + prompt( + input: string | LanguageModelMessage[], + options?: LanguageModelPromptOptions + ): Promise; + promptStreaming( + input: string | LanguageModelMessage[], + options?: LanguageModelPromptOptions + ): ReadableStream; + measureInputUsage(input: string | LanguageModelMessage[]): Promise; + destroy(): void; + readonly inputUsage: number; + readonly inputQuota: number; +} + +export interface LanguageModelGlobal { + availability(): Promise; + params(): Promise; + create(options?: LanguageModelCreateOptions): Promise; +} + +declare global { + var LanguageModel: LanguageModelGlobal | undefined; +} + +export {}; From 9230378b7a7cf802fce67d61af3bdaa7fe2b5870 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Wed, 29 Apr 2026 20:23:47 +1000 Subject: [PATCH 2/5] Fixes --- ui/src/service-worker/browser-ai.ts | 60 +++++++++------------------- ui/src/services/browser-ai/bridge.ts | 21 ++++++++-- 2 files changed, 36 insertions(+), 45 deletions(-) diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts index c8433c5..edcfe0d 100644 --- a/ui/src/service-worker/browser-ai.ts +++ b/ui/src/service-worker/browser-ai.ts @@ -45,12 +45,12 @@ export function isBrowserAiModel(model: unknown): boolean { } async function getClient(clientId: string): Promise { - if (clientId) { - const direct = await self.clients.get(clientId); - if (direct) return direct; - } - const all = await self.clients.matchAll({ type: "window", includeUncontrolled: false }); - return all[0] ?? null; + // Only the originating tab's window can service the request: its bridge + // owns the conversation context and abort signal. Falling back to + // "first window client" cross-routes between tabs. + if (!clientId) return null; + const direct = await self.clients.get(clientId); + return direct ?? null; } async function sendToBridge( @@ -167,7 +167,10 @@ export async function augmentModelsResponse( } catch { return response; } - if (availability === "unavailable") return response; + // Only expose the model after the user has explicitly downloaded it via + // the wizard. Listing it while merely `downloadable` would trigger a + // multi-GB download on first chat use with no progress indication. + if (availability !== "available") return response; let body: { data?: unknown[]; [k: string]: unknown }; try { @@ -184,10 +187,7 @@ export async function augmentModelsResponse( created: 0, owned_by: BROWSER_AI_PROVIDER, source: "static", - description: - availability === "available" - ? `On-device ${detected.vendor} model, runs locally in your browser.` - : `On-device ${detected.vendor} model, runs locally in your browser. Downloads on first use.`, + description: `On-device ${detected.vendor} model, runs locally in your browser.`, capabilities: { tools: true, vision: false, streaming: true }, modalities: { input: ["text"], output: ["text"] }, tasks: ["chat"], @@ -608,34 +608,12 @@ async function generateToolModeResponse( return jsonError(`Browser AI: ${message}`); } - let envelope = parseEnvelope(raw); - // Parse-and-retry safety net. If the model returned text that doesn't - // fit the envelope (or fits but is empty), give it one more shot with - // an explicit reminder. Limited to a single retry to avoid loops. - const empty = !envelope || (envelope.toolCalls.length === 0 && !envelope.text); - if (empty) { - const retryMessages: LanguageModelMessage[] = [ - ...messages, - { role: "assistant", content: raw || "(empty)" }, - { - role: "user", - content: - "That reply did not match the required JSON shape. Reply ONLY with a JSON object: " + - '{"tool_calls":[{"name":"...","arguments":{...}}]} or {"text":"..."}. ' + - "Use the same tool names and argument schemas listed earlier.", - }, - ]; - try { - const retry = await runOnce(retryMessages); - raw = retry.raw; - inputTokens += retry.inputTokens; - outputTokens += retry.outputTokens; - envelope = parseEnvelope(raw); - } catch { - // Fall through with what we have. - } - } - + // `responseConstraint` enforces the schema at decode time, so JSON.parse + // is guaranteed to succeed. The only remaining failure mode is the model + // emitting `{}` (both fields are optional in the schema), which a retry + // does not reliably correct. We surface whatever we got — empty case is + // handled below by falling back to the raw text. + const envelope = parseEnvelope(raw); const toolCalls = envelope?.toolCalls ?? []; const text = envelope?.text ?? ""; const createdAt = Math.floor(Date.now() / 1000); @@ -662,8 +640,8 @@ async function generateToolModeResponse( }); } if (outputItems.length === 0) { - // Both retries produced nothing usable. Surface the raw output so the - // user sees what went wrong rather than an empty turn. + // Model returned an empty envelope. Surface the raw output so the + // user sees what came back rather than a blank turn. outputItems.push({ id: genId("msg"), type: "message", diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts index b3c5cf5..1317355 100644 --- a/ui/src/services/browser-ai/bridge.ts +++ b/ui/src/services/browser-ai/bridge.ts @@ -126,11 +126,15 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P signal: abort.signal, }); + // Count input tokens across system + conversation messages. The earlier + // version only measured `conversation`, which understated usage whenever + // a system prompt was supplied (every Hadrian chat turn). let inputTokens = 0; try { - inputTokens = await session.measureInputUsage(conversation); - } catch { + inputTokens = await session.measureInputUsage(payload.messages); + } catch (err) { // measureInputUsage may not be implemented on every channel. + console.debug("[browser-ai] measureInputUsage(input) failed", err); } let outputText = ""; @@ -170,10 +174,19 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P } } + // measureInputUsage of an assistant message also counts role-framing + // tokens (a few per message). Subtract the framing baseline so the + // reported output count tracks the generated text rather than the + // wrapper. Falls back to ~4 chars/token when the API isn't available. let outputTokens = 0; try { - outputTokens = await session.measureInputUsage([{ role: "assistant", content: outputText }]); - } catch { + const [withText, baseline] = await Promise.all([ + session.measureInputUsage([{ role: "assistant", content: outputText }]), + session.measureInputUsage([{ role: "assistant", content: "" }]), + ]); + outputTokens = Math.max(0, withText - baseline); + } catch (err) { + console.debug("[browser-ai] measureInputUsage(output) failed", err); outputTokens = Math.max(1, Math.ceil(outputText.length / 4)); } From 675aace61891dfad6918a86ea3dfde882b75c082 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Wed, 29 Apr 2026 21:56:13 +1000 Subject: [PATCH 3/5] Review fixes --- ui/src/components/WasmSetup/WasmSetup.tsx | 61 +++--- .../components/WasmSetup/WasmSetupGuard.tsx | 9 +- ui/src/service-worker/browser-ai.ts | 181 +++++++++++++++++- ui/src/service-worker/sw.ts | 4 + ui/src/services/browser-ai/bridge.ts | 3 + ui/src/services/browser-ai/types.ts | 5 + 6 files changed, 230 insertions(+), 33 deletions(-) diff --git a/ui/src/components/WasmSetup/WasmSetup.tsx b/ui/src/components/WasmSetup/WasmSetup.tsx index 8ec881f..c322388 100644 --- a/ui/src/components/WasmSetup/WasmSetup.tsx +++ b/ui/src/components/WasmSetup/WasmSetup.tsx @@ -106,6 +106,30 @@ function initialEntries(): ProviderEntry[] { return PROVIDER_TEMPLATES.map((t) => createEntry(t, 0)); } +// Bundle the browser AI state and download callback into a single optional +// prop. Previously the two were separate optionals with `onBrowserAiDownload +// ?? (() => {})` as a fallback, which let callers silently no-op the +// Download button by passing state without a callback. Bundling makes the +// pairing structural — a caller cannot supply the state without the +// handler — so the no-op fallback can go away entirely. +export interface BrowserAiProp { + state: BrowserAiState; + onDownload: () => void; +} + +interface WasmSetupProps { + open: boolean; + onComplete: () => void; + oauthProviderName?: string | null; + oauthError?: string | null; + existingProviders?: DynamicProviderResponse[]; + ollamaDetected?: boolean; + ollamaConnecting?: boolean; + ollamaConnected?: boolean; + onOllamaConnect?: () => void; + browserAi?: BrowserAiProp; +} + export function WasmSetup({ open, onComplete, @@ -117,20 +141,7 @@ export function WasmSetup({ ollamaConnected, onOllamaConnect, browserAi, - onBrowserAiDownload, -}: { - open: boolean; - onComplete: () => void; - oauthProviderName?: string | null; - oauthError?: string | null; - existingProviders?: DynamicProviderResponse[]; - ollamaDetected?: boolean; - ollamaConnecting?: boolean; - ollamaConnected?: boolean; - onOllamaConnect?: () => void; - browserAi?: BrowserAiState; - onBrowserAiDownload?: () => void; -}) { +}: WasmSetupProps) { const [step, setStep] = useState("welcome"); const [entries, setEntries] = useState(initialEntries); @@ -249,7 +260,7 @@ export function WasmSetup({ } }, []); - const browserAiReady = browserAi?.availability === "available"; + const browserAiReady = browserAi?.state.availability === "available"; const savedCount = entries.filter((e) => e.saved).length + (hasExistingOpenRouter ? 1 : 0) + @@ -274,7 +285,6 @@ export function WasmSetup({ existingProviders={existingProviders} onDeleteExisting={handleDeleteExisting} browserAi={browserAi} - onBrowserAiDownload={onBrowserAiDownload} /> )} {step === "providers" && ( @@ -300,7 +310,6 @@ export function WasmSetup({ existingProviders={existingProviders} onDeleteExisting={handleDeleteExisting} browserAi={browserAi} - onBrowserAiDownload={onBrowserAiDownload} /> )} {step === "done" && } @@ -322,7 +331,6 @@ function WelcomeStep({ existingProviders, onDeleteExisting, browserAi, - onBrowserAiDownload, }: { onNext: () => void; onReady: () => void; @@ -336,10 +344,9 @@ function WelcomeStep({ onOllamaConnect?: () => void; existingProviders?: DynamicProviderResponse[]; onDeleteExisting: (id: string) => void; - browserAi?: BrowserAiState; - onBrowserAiDownload?: () => void; + browserAi?: BrowserAiProp; }) { - const hasBrowserAiReady = browserAi?.availability === "available"; + const hasBrowserAiReady = browserAi?.state.availability === "available"; const hasProvider = hasExistingOpenRouter || hasExistingOllama || hasBrowserAiReady; return ( <> @@ -475,8 +482,8 @@ function WelcomeStep({ {browserAi && ( {})} + state={browserAi.state} + onDownload={browserAi.onDownload} className="mt-3" /> )} @@ -549,7 +556,6 @@ function ProvidersStep({ existingProviders, onDeleteExisting, browserAi, - onBrowserAiDownload, }: { entries: ProviderEntry[]; onUpdate: (key: string, update: Partial) => void; @@ -571,8 +577,7 @@ function ProvidersStep({ onOllamaConnect?: () => void; existingProviders?: DynamicProviderResponse[]; onDeleteExisting: (id: string) => void; - browserAi?: BrowserAiState; - onBrowserAiDownload?: () => void; + browserAi?: BrowserAiProp; }) { return ( <> @@ -666,8 +671,8 @@ function ProvidersStep({ {browserAi && ( {})} + state={browserAi.state} + onDownload={browserAi.onDownload} className="mb-4" /> )} diff --git a/ui/src/components/WasmSetup/WasmSetupGuard.tsx b/ui/src/components/WasmSetup/WasmSetupGuard.tsx index e059a56..cb7d933 100644 --- a/ui/src/components/WasmSetup/WasmSetupGuard.tsx +++ b/ui/src/components/WasmSetup/WasmSetupGuard.tsx @@ -143,6 +143,12 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) { downloading: false, downloadProgress: null, })); + // Tell the SW its 60s availability cache is stale before we trigger + // the model-list refetch; otherwise the freshly-ready model would + // not appear until the cache expires organically. + navigator.serviceWorker.controller?.postMessage({ + type: "BROWSER_AI_AVAILABILITY_CHANGED", + }); queryClient.invalidateQueries({ queryKey: apiV1ModelsQueryKey() }); } catch (err) { setBrowserAi((prev) => ({ @@ -256,8 +262,7 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) { ollamaConnecting={ollamaConnecting} ollamaConnected={ollamaConnected} onOllamaConnect={handleOllamaConnect} - browserAi={browserAi} - onBrowserAiDownload={handleBrowserAiDownload} + browserAi={{ state: browserAi, onDownload: handleBrowserAiDownload }} /> ); diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts index edcfe0d..c57e51d 100644 --- a/ui/src/service-worker/browser-ai.ts +++ b/ui/src/service-worker/browser-ai.ts @@ -40,6 +40,16 @@ interface PromptToolDef { let availabilityCache: { state: BridgeAvailabilityReply["state"]; checkedAt: number } | null = null; const AVAILABILITY_TTL_MS = 60_000; +/** + * Drop the cached availability state so the next `/v1/models` request + * re-queries the bridge. Called by the SW message handler after the window + * reports a successful model download — without this, the freshly-ready + * model would not appear in the list until the 60s TTL expires. + */ +export function invalidateAvailabilityCache(): void { + availabilityCache = null; +} + export function isBrowserAiModel(model: unknown): boolean { return typeof model === "string" && model.startsWith(BROWSER_AI_PREFIX); } @@ -1021,14 +1031,26 @@ export async function handleChatCompletionsRequest( } const tools = extractTools(body); - if (tools.length > 0) { - messages = injectToolPrompt(messages, tools); - } const id = genId("chatcmpl"); const created = Math.floor(Date.now() / 1000); const model = body.model; const stream = body.stream === true; + if (tools.length > 0) { + messages = injectToolPrompt(messages, tools); + return generateChatCompletionsToolModeResponse( + client, + body, + messages, + tools, + request.signal, + id, + created, + model, + stream + ); + } + if (!stream) { let outputText = ""; let inputTokens = 0; @@ -1171,4 +1193,157 @@ export async function handleChatCompletionsRequest( return new Response(sseStream, { status: 200, headers: sseHeaders() }); } +/** + * Tool-aware path for chat completions. Mirrors `generateToolModeResponse`: + * applies `responseConstraint`, buffers the constrained envelope, parses it, + * and surfaces tool invocations in the standard `tool_calls` field rather + * than as raw JSON in `content`. Token-by-token streaming is impossible + * here (chunks would be malformed JSON), so streaming clients receive a + * single `tool_calls` delta followed by the terminal chunk. + */ +async function generateChatCompletionsToolModeResponse( + client: Client, + body: ChatCompletionsPayload, + messages: LanguageModelMessage[], + tools: PromptToolDef[], + signal: AbortSignal, + id: string, + created: number, + model: string, + stream: boolean +): Promise { + const schema = buildToolResponseSchema(tools); + + let raw = ""; + let inputTokens = 0; + let outputTokens = 0; + try { + await sendToBridge( + client, + { + type: "PROMPT", + messages, + temperature: body.temperature, + topK: body.top_k, + responseConstraint: schema, + }, + (reply) => { + if (reply.type === "DELTA") { + raw += reply.text; + return false; + } + if (reply.type === "DONE") { + inputTokens = reply.inputTokens; + outputTokens = reply.outputTokens; + return true; + } + if (reply.type === "ERROR") throw new Error(reply.message); + if (reply.type === "ABORTED") throw new DOMException("Aborted", "AbortError"); + return false; + }, + signal + ); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return jsonError(`Browser AI: ${message}`); + } + + const envelope = parseEnvelope(raw); + const parsedToolCalls = envelope?.toolCalls ?? []; + const text = envelope?.text ?? ""; + + const toolCalls = parsedToolCalls.map((call) => ({ + id: genId("call"), + type: "function" as const, + function: { name: call.name, arguments: call.arguments }, + })); + + // OpenAI semantics: when the model returns tool calls, the assistant + // message has `content: null` and `finish_reason: "tool_calls"`. When + // there are none, we fall back to the text envelope (or the raw output + // if the envelope was empty). + const hasToolCalls = toolCalls.length > 0; + const content = hasToolCalls ? null : text || raw.trim() || ""; + const finishReason = hasToolCalls ? "tool_calls" : "stop"; + + if (!stream) { + const message: Record = { role: "assistant", content }; + if (hasToolCalls) message.tool_calls = toolCalls; + return new Response( + JSON.stringify({ + id, + object: "chat.completion", + created, + model, + choices: [{ index: 0, message, finish_reason: finishReason }], + usage: { + prompt_tokens: inputTokens, + completion_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }), + { status: 200, headers: { "content-type": "application/json" } } + ); + } + + const encoder = new TextEncoder(); + const sseStream = new ReadableStream({ + start(controller) { + const writeChunk = (chunk: Record) => { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`)); + }; + + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: { role: "assistant" }, finish_reason: null }], + }); + + if (hasToolCalls) { + const deltaToolCalls = toolCalls.map((call, index) => ({ + index, + id: call.id, + type: call.type, + function: { name: call.function.name, arguments: call.function.arguments }, + })); + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: { tool_calls: deltaToolCalls }, finish_reason: null }], + }); + } else if (content) { + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: { content }, finish_reason: null }], + }); + } + + writeChunk({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: {}, finish_reason: finishReason }], + usage: { + prompt_tokens: inputTokens, + completion_tokens: outputTokens, + total_tokens: inputTokens + outputTokens, + }, + }); + + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + }, + }); + + return new Response(sseStream, { status: 200, headers: sseHeaders() }); +} + export type { ResponsesPayload, ChatCompletionsPayload }; diff --git a/ui/src/service-worker/sw.ts b/ui/src/service-worker/sw.ts index 3e5ccc7..73d543a 100644 --- a/ui/src/service-worker/sw.ts +++ b/ui/src/service-worker/sw.ts @@ -27,6 +27,7 @@ import { augmentModelsResponse, handleChatCompletionsRequest, handleResponsesRequest, + invalidateAvailabilityCache, isBrowserAiModel, type ChatCompletionsPayload, type ResponsesPayload, @@ -59,6 +60,9 @@ self.addEventListener("message", (event) => { if (event.data?.type === "CLAIM") { self.clients.claim(); } + if (event.data?.type === "BROWSER_AI_AVAILABILITY_CHANGED") { + invalidateAvailabilityCache(); + } }); self.addEventListener("fetch", (event) => { diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts index 1317355..366ada5 100644 --- a/ui/src/services/browser-ai/bridge.ts +++ b/ui/src/services/browser-ai/bridge.ts @@ -120,6 +120,9 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P topK: payload.topK, monitor(m) { m.addEventListener("downloadprogress", (event) => { + // `event.loaded` is normalized to a value in [0, 1] per the Prompt + // API spec, not a byte count — the spec deliberately omits `total`. + // https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#download-progress port.postMessage({ type: "DOWNLOAD_PROGRESS", loaded: event.loaded }); }); }, diff --git a/ui/src/services/browser-ai/types.ts b/ui/src/services/browser-ai/types.ts index 275c109..6b3b6a7 100644 --- a/ui/src/services/browser-ai/types.ts +++ b/ui/src/services/browser-ai/types.ts @@ -14,6 +14,11 @@ export type LanguageModelAvailability = | "unavailable"; export interface LanguageModelMonitor { + /** + * `loaded` is a fraction in [0, 1], not a byte count. The Prompt API spec + * normalizes progress and omits `total` for that reason. + * https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#download-progress + */ addEventListener(type: "downloadprogress", listener: (event: { loaded: number }) => void): void; removeEventListener( type: "downloadprogress", From 25fc24f1c297e9074919e050d0203323c415fef0 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Mon, 4 May 2026 23:50:07 +1000 Subject: [PATCH 4/5] Use new context apis --- ui/src/services/browser-ai/bridge.ts | 14 +++++++------- ui/src/services/browser-ai/types.ts | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ui/src/services/browser-ai/bridge.ts b/ui/src/services/browser-ai/bridge.ts index 366ada5..3b9c2af 100644 --- a/ui/src/services/browser-ai/bridge.ts +++ b/ui/src/services/browser-ai/bridge.ts @@ -134,10 +134,10 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P // a system prompt was supplied (every Hadrian chat turn). let inputTokens = 0; try { - inputTokens = await session.measureInputUsage(payload.messages); + inputTokens = await session.measureContextUsage(payload.messages); } catch (err) { - // measureInputUsage may not be implemented on every channel. - console.debug("[browser-ai] measureInputUsage(input) failed", err); + // measureContextUsage may not be implemented on every channel. + console.debug("[browser-ai] measureContextUsage(input) failed", err); } let outputText = ""; @@ -177,19 +177,19 @@ async function handlePrompt(port: MessagePort, payload: PromptRequestPayload): P } } - // measureInputUsage of an assistant message also counts role-framing + // measureContextUsage of an assistant message also counts role-framing // tokens (a few per message). Subtract the framing baseline so the // reported output count tracks the generated text rather than the // wrapper. Falls back to ~4 chars/token when the API isn't available. let outputTokens = 0; try { const [withText, baseline] = await Promise.all([ - session.measureInputUsage([{ role: "assistant", content: outputText }]), - session.measureInputUsage([{ role: "assistant", content: "" }]), + session.measureContextUsage([{ role: "assistant", content: outputText }]), + session.measureContextUsage([{ role: "assistant", content: "" }]), ]); outputTokens = Math.max(0, withText - baseline); } catch (err) { - console.debug("[browser-ai] measureInputUsage(output) failed", err); + console.debug("[browser-ai] measureContextUsage(output) failed", err); outputTokens = Math.max(1, Math.ceil(outputText.length / 4)); } diff --git a/ui/src/services/browser-ai/types.ts b/ui/src/services/browser-ai/types.ts index 6b3b6a7..9744e39 100644 --- a/ui/src/services/browser-ai/types.ts +++ b/ui/src/services/browser-ai/types.ts @@ -84,10 +84,10 @@ export interface LanguageModelSession { input: string | LanguageModelMessage[], options?: LanguageModelPromptOptions ): ReadableStream; - measureInputUsage(input: string | LanguageModelMessage[]): Promise; + measureContextUsage(input: string | LanguageModelMessage[]): Promise; destroy(): void; - readonly inputUsage: number; - readonly inputQuota: number; + readonly contextUsage: number; + readonly contextWindow: number; } export interface LanguageModelGlobal { From e13a12b933969e966cf59a1890bc0ed1c3735060 Mon Sep 17 00:00:00 2001 From: ScriptSmith Date: Mon, 4 May 2026 23:59:30 +1000 Subject: [PATCH 5/5] Review fixes --- .../components/WasmSetup/WasmSetupGuard.tsx | 14 ++++- ui/src/service-worker/browser-ai.ts | 53 ++++++++++++++++++- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/ui/src/components/WasmSetup/WasmSetupGuard.tsx b/ui/src/components/WasmSetup/WasmSetupGuard.tsx index cb7d933..2bee90a 100644 --- a/ui/src/components/WasmSetup/WasmSetupGuard.tsx +++ b/ui/src/components/WasmSetup/WasmSetupGuard.tsx @@ -151,11 +151,23 @@ export function WasmSetupGuard({ children }: { children: ReactNode }) { }); queryClient.invalidateQueries({ queryKey: apiV1ModelsQueryKey() }); } catch (err) { + // Re-query the actual availability rather than assuming "downloadable": + // a mid-download failure (e.g. storage pressure made the device + // ineligible) can transition the API to "unavailable", and resetting + // to "downloadable" would resurface a Download button that fails again + // on every click. + let availability: BrowserAiState["availability"] = "downloadable"; + try { + availability = await getAvailability(); + } catch { + // Bridge unreachable; "downloadable" is the safest default since the + // user already saw the download UI. + } setBrowserAi((prev) => ({ ...prev, downloading: false, downloadProgress: null, - availability: "downloadable", + availability, error: formatApiError(err), })); } diff --git a/ui/src/service-worker/browser-ai.ts b/ui/src/service-worker/browser-ai.ts index c57e51d..e81da82 100644 --- a/ui/src/service-worker/browser-ai.ts +++ b/ui/src/service-worker/browser-ai.ts @@ -235,9 +235,23 @@ interface ResponsesPayload { tools?: ToolDef[]; } +interface ChatCompletionsToolCall { + id?: string; + type?: string; + function?: { name?: string; arguments?: string }; +} + +interface ChatCompletionsMessage { + role: string; + content?: string | Array<{ type: string; text?: string }> | null; + tool_calls?: ChatCompletionsToolCall[]; + tool_call_id?: string; + name?: string; +} + interface ChatCompletionsPayload { model: string; - messages: Array<{ role: string; content: string | Array<{ type: string; text?: string }> }>; + messages: ChatCompletionsMessage[]; stream?: boolean; temperature?: number; top_k?: number; @@ -467,7 +481,44 @@ function chatMessagesToBridge( messages: ChatCompletionsPayload["messages"] ): LanguageModelMessage[] { const out: LanguageModelMessage[] = []; + // Map tool_call_id → function name so a later `role: "tool"` reply can be + // rendered with the tool's name (mirroring the Responses-API path's + // `` markup). + const callIdToName = new Map(); for (const m of messages) { + if (m.role === "assistant" && Array.isArray(m.tool_calls)) { + for (const tc of m.tool_calls) { + const name = tc.function?.name; + if (typeof tc.id === "string" && typeof name === "string") { + callIdToName.set(tc.id, name); + } + } + } + } + + for (const m of messages) { + if (m.role === "tool") { + const callId = typeof m.tool_call_id === "string" ? m.tool_call_id : ""; + const name = callIdToName.get(callId) ?? m.name ?? "tool"; + const output = flattenContent(m.content); + out.push({ + role: "user", + content: `${output}`, + }); + continue; + } + if (m.role === "assistant" && Array.isArray(m.tool_calls) && m.tool_calls.length > 0) { + const calls = m.tool_calls + .map((tc) => { + const name = tc.function?.name ?? "tool"; + const args = tc.function?.arguments ?? "{}"; + return `${args}`; + }) + .join(""); + const text = flattenContent(m.content); + out.push({ role: "assistant", content: text ? `${text}${calls}` : calls }); + continue; + } if (m.role !== "system" && m.role !== "user" && m.role !== "assistant") continue; const text = flattenContent(m.content); if (!text) continue;