Replace Whisper-tiny offline fallback with Parakeet.js (NVIDIA 0.6B v2)

Swap @xenova/transformers (whisper-tiny, ~45MB) for parakeet.js (Parakeet TDT 0.6B v2, ~634MB) loaded from CDN at runtime. Much higher transcription accuracy at the cost of larger initial model download. Uses indirect dynamic import to avoid Next.js/webpack bundling issues. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 01:26:29 +00:00 · 2026-02-17 01:26:29 +00:00 · fbbe8d38d1
parent d1bdb126af
commit fbbe8d38d1
6 changed files with 161 additions and 3971 deletions
--- a/next.config.mjs
+++ b/next.config.mjs
@ -2,8 +2,8 @@
 const nextConfig = {
  output: 'standalone',
  webpack: (config, { isServer, webpack }) => {
-    // @xenova/transformers depends on onnxruntime-node (native .node binaries)
-    // which can't be bundled by webpack. We only use the web ONNX runtime.
+    // Ignore onnxruntime-node if any dependency pulls it in.
+    // We only use the browser ONNX runtime (loaded from CDN at runtime).
    config.plugins.push(
      new webpack.IgnorePlugin({
        resourceRegExp: /onnxruntime-node/,
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -23,7 +23,6 @@
    "@tiptap/pm": "^3.19.0",
    "@tiptap/react": "^3.19.0",
    "@tiptap/starter-kit": "^3.19.0",
-    "@xenova/transformers": "^2.17.2",
    "dompurify": "^3.2.0",
    "lowlight": "^3.3.0",
    "marked": "^15.0.0",
--- a/src/components/VoiceRecorder.tsx
+++ b/src/components/VoiceRecorder.tsx
@ -338,8 +338,8 @@ export function VoiceRecorder({ onResult, className }: VoiceRecorderProps) {
      if (!transcript) {
        // Fallback 2: offline Whisper via Transformers.js in browser
        try {
-          setOfflineProgress({ status: 'loading', message: 'Loading offline model...' });
-          const { transcribeOffline } = await import('@/lib/whisperOffline');
+          setOfflineProgress({ status: 'loading', message: 'Loading Parakeet model...' });
+          const { transcribeOffline } = await import('@/lib/parakeetOffline');
          transcript = await transcribeOffline(blob, (p) => setOfflineProgress(p));
          setOfflineProgress(null);
        } catch (offlineErr) {
--- a/src/lib/parakeetOffline.ts
+++ b/src/lib/parakeetOffline.ts
@ -0,0 +1,157 @@
+/**
+ * Offline transcription using parakeet.js (NVIDIA Parakeet TDT 0.6B v2).
+ * Loaded at runtime from CDN to avoid Next.js/webpack bundling issues
+ * with onnxruntime-web's node-specific files.
+ * Model is ~634 MB (int8) on first download, cached in IndexedDB after.
+ * Much higher accuracy than Whisper-tiny at the cost of larger model size.
+ */
+
+const CACHE_KEY = 'parakeet-offline-cached';
+
+export interface WhisperProgress {
+  status: 'checking' | 'downloading' | 'loading' | 'transcribing' | 'done' | 'error';
+  progress?: number;
+  file?: string;
+  message?: string;
+}
+
+type ProgressCallback = (progress: WhisperProgress) => void;
+
+// Singleton model — don't reload on subsequent calls
+let cachedModel: any = null;
+let loadingPromise: Promise<any> | null = null;
+
+/**
+ * Check if the Parakeet model has been downloaded before.
+ * Best-effort check via localStorage flag; actual cache is in IndexedDB.
+ */
+export function isModelCached(): boolean {
+  if (typeof window === 'undefined') return false;
+  return localStorage.getItem(CACHE_KEY) === 'true';
+}
+
+/**
+ * Detect WebGPU availability in the current browser.
+ */
+async function detectWebGPU(): Promise<boolean> {
+  if (typeof navigator === 'undefined' || !(navigator as any).gpu) return false;
+  try {
+    const adapter = await (navigator as any).gpu.requestAdapter();
+    return !!adapter;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Get or create the Parakeet model singleton.
+ */
+async function getModel(onProgress?: ProgressCallback): Promise<any> {
+  if (cachedModel) return cachedModel;
+  if (loadingPromise) return loadingPromise;
+
+  loadingPromise = (async () => {
+    onProgress?.({ status: 'loading', message: 'Loading Parakeet model...' });
+
+    // Load from CDN at runtime — avoids webpack/Terser issues with onnxruntime-web.
+    // Use indirect dynamic import so webpack can't statically analyze the URL.
+    const importModule = new Function('url', 'return import(url)');
+    const { fromHub } = await importModule('https://esm.sh/parakeet.js@1.1.2');
+
+    const backend = (await detectWebGPU()) ? 'webgpu' : 'wasm';
+    const fileProgress: Record<string, { loaded: number; total: number }> = {};
+
+    const model = await fromHub('parakeet-tdt-0.6b-v2', {
+      backend,
+      progress: ({ file, loaded, total }: { file: string; loaded: number; total: number }) => {
+        fileProgress[file] = { loaded, total };
+
+        let totalBytes = 0;
+        let loadedBytes = 0;
+        for (const fp of Object.values(fileProgress)) {
+          totalBytes += fp.total || 0;
+          loadedBytes += fp.loaded || 0;
+        }
+
+        if (totalBytes > 0) {
+          const pct = Math.round((loadedBytes / totalBytes) * 100);
+          onProgress?.({
+            status: 'downloading',
+            progress: pct,
+            file,
+            message: `Downloading Parakeet model... ${pct}%`,
+          });
+        }
+      },
+    });
+
+    localStorage.setItem(CACHE_KEY, 'true');
+    onProgress?.({ status: 'loading', message: 'Model loaded' });
+
+    cachedModel = model;
+    loadingPromise = null;
+    return model;
+  })();
+
+  return loadingPromise;
+}
+
+/**
+ * Decode an audio Blob to Float32Array at 16 kHz mono.
+ */
+async function decodeAudioBlob(blob: Blob): Promise<Float32Array> {
+  const arrayBuffer = await blob.arrayBuffer();
+  const audioCtx = new AudioContext({ sampleRate: 16000 });
+  try {
+    const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
+
+    // Already 16 kHz mono — return directly
+    if (audioBuffer.sampleRate === 16000 && audioBuffer.numberOfChannels === 1) {
+      return audioBuffer.getChannelData(0);
+    }
+
+    // Resample via OfflineAudioContext
+    const numSamples = Math.ceil(audioBuffer.duration * 16000);
+    const offlineCtx = new OfflineAudioContext(1, numSamples, 16000);
+    const source = offlineCtx.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(offlineCtx.destination);
+    source.start();
+    const resampled = await offlineCtx.startRendering();
+    return resampled.getChannelData(0);
+  } finally {
+    await audioCtx.close();
+  }
+}
+
+/**
+ * Transcribe an audio Blob offline using Parakeet in the browser.
+ *
+ * First call downloads the model (~634 MB). Subsequent calls use cached model.
+ * Returns the transcribed text.
+ */
+export async function transcribeOffline(
+  audioBlob: Blob,
+  onProgress?: ProgressCallback
+): Promise<string> {
+  try {
+    const model = await getModel(onProgress);
+
+    onProgress?.({ status: 'transcribing', message: 'Transcribing audio...' });
+
+    const audioData = await decodeAudioBlob(audioBlob);
+
+    const result = await model.transcribe(audioData, 16000, {
+      returnTimestamps: false,
+      enableProfiling: false,
+    });
+
+    const text = result.utterance_text?.trim() || '';
+    onProgress?.({ status: 'done', message: 'Transcription complete' });
+    return text;
+  } catch (err) {
+    const message = err instanceof Error ? err.message : 'Transcription failed';
+    onProgress?.({ status: 'error', message });
+    throw err;
+  }
+}
--- a/src/lib/whisperOffline.ts
+++ b/src/lib/whisperOffline.ts
@ -1,120 +0,0 @@
-/**
- * Offline Whisper transcription using @xenova/transformers (Transformers.js v2).
- * Dynamically imports the library to avoid SSR issues.
- * Uses Xenova/whisper-tiny with quantized weights (~45MB download).
- * Model is cached by the browser after first download.
- */
-
-const MODEL_ID = 'Xenova/whisper-tiny';
-const CACHE_KEY = 'whisper-offline-cached';
-
-export interface WhisperProgress {
-  status: 'checking' | 'downloading' | 'loading' | 'transcribing' | 'done' | 'error';
-  progress?: number; // 0-100 for download progress
-  file?: string;
-  message?: string;
-}
-
-type ProgressCallback = (progress: WhisperProgress) => void;
-
-// Keep a singleton pipeline so we don't reload on subsequent calls
-let cachedPipeline: any = null;
-let loadingPromise: Promise<any> | null = null;
-
-/**
- * Check if the Whisper model has been downloaded before.
- * Note: this is a best-effort check via localStorage flag.
- * The actual model cache is managed by Transformers.js via Cache API.
- */
-export function isModelCached(): boolean {
-  if (typeof window === 'undefined') return false;
-  return localStorage.getItem(CACHE_KEY) === 'true';
-}
-
-/**
- * Get or create the Whisper pipeline singleton.
- */
-async function getPipeline(onProgress?: ProgressCallback): Promise<any> {
-  if (cachedPipeline) return cachedPipeline;
-
-  // Prevent multiple concurrent loads
-  if (loadingPromise) return loadingPromise;
-
-  loadingPromise = (async () => {
-    onProgress?.({ status: 'loading', message: 'Loading Whisper model...' });
-
-    const { pipeline, env } = await import('@xenova/transformers');
-
-    // Disable local model checks — always use browser cache / HF Hub
-    env.allowLocalModels = false;
-
-    const pipe = await pipeline('automatic-speech-recognition', MODEL_ID, {
-      quantized: true,
-      progress_callback: (p: any) => {
-        if (p.status === 'progress' && p.progress !== undefined) {
-          onProgress?.({
-            status: 'downloading',
-            progress: Math.round(p.progress),
-            file: p.file,
-            message: `Downloading model... ${Math.round(p.progress)}%`,
-          });
-        } else if (p.status === 'ready') {
-          localStorage.setItem(CACHE_KEY, 'true');
-          onProgress?.({ status: 'loading', message: 'Model loaded' });
-        }
-      },
-    });
-
-    cachedPipeline = pipe;
-    loadingPromise = null;
-    return pipe;
-  })();
-
-  return loadingPromise;
-}
-
-/**
- * Decode an audio Blob to Float32Array at 16kHz mono.
- */
-async function decodeAudioBlob(blob: Blob): Promise<Float32Array> {
-  const arrayBuffer = await blob.arrayBuffer();
-  const audioCtx = new AudioContext({ sampleRate: 16000 });
-  try {
-    const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
-    return audioBuffer.getChannelData(0);
-  } finally {
-    await audioCtx.close();
-  }
-}
-
-/**
- * Transcribe an audio Blob offline using Whisper in the browser.
- *
- * First call will download the model (~45MB). Subsequent calls use the cached model.
- * Returns the transcribed text.
- */
-export async function transcribeOffline(
-  audioBlob: Blob,
-  onProgress?: ProgressCallback
-): Promise<string> {
-  try {
-    const pipe = await getPipeline(onProgress);
-
-    onProgress?.({ status: 'transcribing', message: 'Transcribing audio...' });
-
-    const audioData = await decodeAudioBlob(audioBlob);
-
-    const result = await pipe(audioData, {
-      language: 'en',
-      return_timestamps: false,
-    });
-
-    const text = (result as any).text?.trim() || '';
-    onProgress?.({ status: 'done', message: 'Transcription complete' });
-    return text;
-  } catch (err) {
-    const message = err instanceof Error ? err.message : 'Transcription failed';
-    onProgress?.({ status: 'error', message });
-    throw err;
-  }
-}