chriswritescode-dev · chriswritescode-dev · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/docs/features/stt.md b/docs/features/stt.md
@@ -69,6 +69,12 @@ Any OpenAI-compatible transcription API works:
 - Self-hosted Whisper servers
 - Local STT servers with OpenAI-compatible API
 
+### Performance
+
+When external voice input is enabled, the browser audio pipeline is warmed up ahead of time so the first and subsequent recordings start faster. The audio context and worklet processor are prepared without requesting microphone access, and are retained between recordings; only the microphone track is stopped after each use. Resources are released entirely when external STT is disabled or the voice input UI unmounts.
+
+This optimization applies only to the external API provider. It does not affect the initial permission prompt — the browser still asks for microphone access on the first recording.
+
 ## Using Voice Input
 
 ### Tap-to-Start / Tap-to-Stop

diff --git a/frontend/public/audio-worklet-processor.js b/frontend/public/audio-worklet-processor.js
@@ -59,12 +59,16 @@ class RecorderProcessor extends AudioWorkletProcessor {
   }
 
   _flushBuffer() {
-    const int16 = new Int16Array(this._buffer.length)
-    for (let i = 0; i < this._buffer.length; i++) {
+    const length = this._buffer.length
+    const int16 = new Int16Array(length)
+    let sumSquares = 0
+    for (let i = 0; i < length; i++) {
       const sample = Math.max(-1, Math.min(1, this._buffer[i]))
+      sumSquares += sample * sample
       int16[i] = sample < 0 ? sample * 32768 : sample * 32767
     }
-    this.port.postMessage(int16, [int16.buffer])
+    const rms = length > 0 ? Math.sqrt(sumSquares / length) : 0
+    this.port.postMessage({ samples: int16, rms }, [int16.buffer])
     this._buffer = []
   }
 }

diff --git a/frontend/src/hooks/useSTT.test.tsx b/frontend/src/hooks/useSTT.test.tsx
@@ -0,0 +1,138 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+import { renderHook, act, waitFor } from '@testing-library/react'
+import { useSTT } from './useSTT'
+
+type MockRecorder = {
+  start: ReturnType<typeof vi.fn>
+  stop: ReturnType<typeof vi.fn>
+  abort: ReturnType<typeof vi.fn>
+  dispose: ReturnType<typeof vi.fn>
+  prepare: ReturnType<typeof vi.fn>
+  setOnStateChange: ReturnType<typeof vi.fn>
+  setOnError: ReturnType<typeof vi.fn>
+  setOnDataAvailable: ReturnType<typeof vi.fn>
+  setOnNoSpeech: ReturnType<typeof vi.fn>
+}
+
+const mocks = vi.hoisted(() => ({
+  useSettings: vi.fn(),
+  AudioRecorder: vi.fn(),
+  getWebSpeechRecognizer: vi.fn(),
+  isWebRecognitionSupported: vi.fn(),
+}))
+
+vi.mock('@/hooks/useSettings', () => ({
+  useSettings: mocks.useSettings,
+}))
+
+vi.mock('@/lib/audioRecorder', () => ({
+  AudioRecorder: mocks.AudioRecorder,
+}))
+
+vi.mock('@/lib/webSpeechRecognizer', () => ({
+  getWebSpeechRecognizer: mocks.getWebSpeechRecognizer,
+  isWebRecognitionSupported: mocks.isWebRecognitionSupported,
+}))
+
+const externalSTTPreferences = {
+  preferences: {
+    stt: {
+      enabled: true,
+      provider: 'external' as const,
+      endpoint: 'https://api.openai.com',
+      apiKey: 'test-key',
+      model: 'whisper-1',
+      language: 'en-US',
+    },
+  },
+}
+
+describe('useSTT external provider lifecycle', () => {
+  let mockRecorder: MockRecorder
+
+  beforeEach(() => {
+    vi.clearAllMocks()
+
+    mockRecorder = {
+      start: vi.fn().mockResolvedValue(undefined),
+      stop: vi.fn(),
+      abort: vi.fn(),
+      dispose: vi.fn(),
+      prepare: vi.fn().mockResolvedValue(undefined),
+      setOnStateChange: vi.fn(),
+      setOnError: vi.fn(),
+      setOnDataAvailable: vi.fn(),
+      setOnNoSpeech: vi.fn(),
+    }
+
+    mocks.AudioRecorder.mockImplementation(() => mockRecorder)
+    mocks.useSettings.mockReturnValue(externalSTTPreferences)
+    mocks.getWebSpeechRecognizer.mockReturnValue({
+      start: vi.fn(),
+      stop: vi.fn(),
+      abort: vi.fn(),
+      clearCallbacks: vi.fn(),
+      onResult: vi.fn(),
+      onInterimResult: vi.fn(),
+      onError: vi.fn(),
+      onEnd: vi.fn(),
+      onStart: vi.fn(),
+    })
+    mocks.isWebRecognitionSupported.mockReturnValue(true)
+  })
+
+  it('does not start external recording until startRecording is called', async () => {
+    const { result } = renderHook(() => useSTT())
+
+    await waitFor(() => {
+      expect(mocks.AudioRecorder).toHaveBeenCalledTimes(1)
+    })
+
+    expect(mockRecorder.start).not.toHaveBeenCalled()
+    expect(mockRecorder.prepare).toHaveBeenCalledTimes(1)
+    expect(mockRecorder.setOnStateChange).toHaveBeenCalledTimes(1)
+    expect(mockRecorder.setOnError).toHaveBeenCalledTimes(1)
+    expect(mockRecorder.setOnDataAvailable).toHaveBeenCalledTimes(1)
+    expect(mockRecorder.setOnNoSpeech).toHaveBeenCalledTimes(1)
+
+    await act(async () => {
+      await result.current.startRecording()
+    })
+
+    expect(mockRecorder.start).toHaveBeenCalledTimes(1)
+  })
+
+  it('clears processing without an error when no speech is detected', async () => {
+    const { result } = renderHook(() => useSTT())
+
+    await waitFor(() => {
+      expect(mockRecorder.setOnNoSpeech).toHaveBeenCalledTimes(1)
+    })
+
+    const onNoSpeech = mockRecorder.setOnNoSpeech.mock.calls[0][0] as () => void
+
+    act(() => {
+      onNoSpeech()
+    })
+
+    expect(result.current.isProcessing).toBe(false)
+    expect(result.current.isRecording).toBe(false)
+    expect(result.current.isError).toBe(false)
+    expect(result.current.error).toBeNull()
+  })
+
+  it('disposes external recorder resources on unmount', async () => {
+    const { unmount } = renderHook(() => useSTT())
+
+    await waitFor(() => {
+      expect(mocks.AudioRecorder).toHaveBeenCalledTimes(1)
+    })
+
+    const recorder = mockRecorder
+
+    unmount()
+
+    expect(recorder.dispose).toHaveBeenCalledTimes(1)
+    expect(recorder.abort).not.toHaveBeenCalled()
+  })
+})
diff --git a/frontend/src/hooks/useSTT.ts b/frontend/src/hooks/useSTT.ts
@@ -139,6 +139,13 @@ export function useSTT(userId = 'default') {
       }, 3000)
     })
 
+    recorder.setOnNoSpeech(() => {
+      setIsProcessing(false)
+      setIsRecording(false)
+      setInterimTranscript('')
+      setState('idle')
+    })
+
     recorder.setOnDataAvailable(async (blob) => {
       if (lastProcessedBlobRef.current === blob) {
         return
@@ -185,26 +192,36 @@ export function useSTT(userId = 'default') {
     })
   }, [])
 
-  useEffect(() => {
-    if (!isEnabled || !isExternalProvider) {
-      return
-    }
-
+  const ensureAudioRecorder = useCallback((): AudioRecorder => {
     if (!audioRecorder.current) {
       audioRecorder.current = new AudioRecorder()
     }
-
     if (!recorderConfiguredRef.current) {
       setupAudioRecorder(audioRecorder.current)
       recorderConfiguredRef.current = true
     }
+    return audioRecorder.current
+  }, [setupAudioRecorder])
+
+  const disposeAudioRecorder = useCallback(() => {
+    if (audioRecorder.current) {
+      audioRecorder.current.dispose()
+      audioRecorder.current = null
+    }
+    recorderConfiguredRef.current = false
+  }, [])
+
+  useEffect(() => {
+    if (!isEnabled || !isExternalProvider) {
+      return
+    }
+
+    void ensureAudioRecorder().prepare().catch(() => undefined)
 
     return () => {
-      if (audioRecorder.current) {
-        audioRecorder.current.abort()
-      }
+      disposeAudioRecorder()
     }
-  }, [isEnabled, isExternalProvider, setupAudioRecorder])
+  }, [isEnabled, isExternalProvider, ensureAudioRecorder, disposeAudioRecorder])
 
   const clearStartupTimeout = useCallback(() => {
     if (startupTimeoutRef.current) {
@@ -214,8 +231,8 @@ export function useSTT(userId = 'default') {
   }, [])
 
   const abortAndResetOnTimeout = useCallback(() => {
-    if (isExternalProvider && audioRecorder.current) {
-      audioRecorder.current.abort()
+    if (isExternalProvider) {
+      disposeAudioRecorder()
     } else {
       recognizer.current.abort()
     }
@@ -224,7 +241,38 @@ export function useSTT(userId = 'default') {
     setState('idle')
     setIsError(true)
     setError('Microphone start timed out')
-  }, [isExternalProvider])
+  }, [isExternalProvider, disposeAudioRecorder])
+
+  const runStartupWithTimeout = useCallback(
+    async (startup: () => Promise<void>, startOpId: number): Promise<boolean> => {
+      try {
+        const startupPromise = startup()
+        const timeoutPromise = new Promise<never>((_, reject) => {
+          startupTimeoutRef.current = setTimeout(() => {
+            if (startOpIdRef.current !== startOpId) return
+            reject(new Error('Microphone start timed out'))
+          }, STT_START_TIMEOUT_MS)
+        })
+
+        await Promise.race([startupPromise, timeoutPromise])
+        clearStartupTimeout()
+
+        return startOpIdRef.current === startOpId
+      } catch (err) {
+        clearStartupTimeout()
+        if (startOpIdRef.current !== startOpId) return false
+        setIsProcessing(false)
+        if (err instanceof Error && err.message === 'Microphone start timed out') {
+          abortAndResetOnTimeout()
+          return false
+        }
+        setIsError(true)
+        setError(err instanceof Error ? err.message : 'Failed to start recording')
+        return false
+      }
+    },
+    [clearStartupTimeout, abortAndResetOnTimeout],
+  )
 
   const startRecording = useCallback(async (): Promise<boolean> => {
     if (!isSupported) {
@@ -249,79 +297,25 @@ export function useSTT(userId = 'default') {
     clearStartupTimeout()
 
     if (isExternalProvider) {
-      if (!audioRecorder.current) {
-        audioRecorder.current = new AudioRecorder()
-        setupAudioRecorder(audioRecorder.current)
-      }
-
-      try {
-        setIsProcessing(true)
-
-        const startupPromise = audioRecorder.current.start()
-        const timeoutPromise = new Promise<never>((_, reject) => {
-          startupTimeoutRef.current = setTimeout(() => {
-            if (startOpIdRef.current !== startOpId) return
-            reject(new Error('Microphone start timed out'))
-          }, STT_START_TIMEOUT_MS)
-        })
-
-        await Promise.race([startupPromise, timeoutPromise])
-        clearStartupTimeout()
-
-        if (startOpIdRef.current !== startOpId) return false
+      const recorder = ensureAudioRecorder()
 
+      setIsProcessing(true)
+      const started = await runStartupWithTimeout(() => recorder.start(), startOpId)
+      if (started) {
         setIsProcessing(false)
-        return true
-      } catch (err) {
-        clearStartupTimeout()
-        if (startOpIdRef.current !== startOpId) return false
-        setIsProcessing(false)
-        if (err instanceof Error && err.message === 'Microphone start timed out') {
-          abortAndResetOnTimeout()
-          return false
-        }
-        setIsError(true)
-        setError(err instanceof Error ? err.message : 'Failed to start recording')
-        return false
       }
+      return started
     } else {
       const options: SpeechRecognitionOptions = {
         language: config.language,
         interimResults: true,
         maxAlternatives: 1,
       }
 
-      try {
-        setIsProcessing(true)
-
-        const startupPromise = recognizer.current.start(options)
-        const timeoutPromise = new Promise<never>((_, reject) => {
-          startupTimeoutRef.current = setTimeout(() => {
-            if (startOpIdRef.current !== startOpId) return
-            reject(new Error('Microphone start timed out'))
-          }, STT_START_TIMEOUT_MS)
-        })
-
-        await Promise.race([startupPromise, timeoutPromise])
-        clearStartupTimeout()
-
-        if (startOpIdRef.current !== startOpId) return false
-
-        return true
-      } catch (err) {
-        clearStartupTimeout()
-        if (startOpIdRef.current !== startOpId) return false
-        setIsProcessing(false)
-        if (err instanceof Error && err.message === 'Microphone start timed out') {
-          abortAndResetOnTimeout()
-          return false
-        }
-        setIsError(true)
-        setError(err instanceof Error ? err.message : 'Failed to start recording')
-        return false
-      }
+      setIsProcessing(true)
+      return runStartupWithTimeout(() => recognizer.current.start(options), startOpId)
     }
-  }, [isSupported, isEnabled, isExternalProvider, config.language, setupAudioRecorder, clearStartupTimeout, abortAndResetOnTimeout])
+  }, [isSupported, isEnabled, isExternalProvider, config.language, clearStartupTimeout, ensureAudioRecorder, runStartupWithTimeout])
 
   const stopRecording = useCallback(() => {
     if (isExternalProvider && audioRecorder.current) {