From d236b81a11f72fbddc862b7f114d90c3dfd3e101 Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Tue, 24 Feb 2026 17:43:04 -0800 Subject: [PATCH] feat: add /voice PWA route with 3-tier live transcription Dedicated standalone voice recorder page at /voice that works as an installable PWA. Records audio with three transcription tiers running in parallel: WebSocket streaming (live segments), Web Speech API (live local), and batch Whisper API (high quality). Falls back to offline Parakeet.js if all network tiers fail. Includes editable transcript, notebook selection, copy-to-clipboard, and keyboard shortcuts. PWA manifest updated with Voice Note shortcut for quick access from taskbar right-click menu. Co-Authored-By: Claude Opus 4.6 --- public/manifest.json | 28 ++ src/app/voice/page.tsx | 747 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 775 insertions(+) create mode 100644 src/app/voice/page.tsx diff --git a/public/manifest.json b/public/manifest.json index 127797d..f3916a3 100644 --- a/public/manifest.json +++ b/public/manifest.json @@ -35,5 +35,33 @@ "type": "image/png", "purpose": "maskable" } + ], + "shortcuts": [ + { + "name": "Voice Note", + "short_name": "Voice", + "description": "Record a voice note with live transcription", + "url": "/voice", + "icons": [ + { + "src": "/icon-192.png", + "sizes": "192x192", + "type": "image/png" + } + ] + }, + { + "name": "New Note", + "short_name": "Note", + "description": "Create a new note", + "url": "/notes/new", + "icons": [ + { + "src": "/icon-192.png", + "sizes": "192x192", + "type": "image/png" + } + ] + } ] } diff --git a/src/app/voice/page.tsx b/src/app/voice/page.tsx new file mode 100644 index 0000000..90c892a --- /dev/null +++ b/src/app/voice/page.tsx @@ -0,0 +1,747 @@ +'use client'; + +import { useState, useRef, useCallback, useEffect } from 'react'; +import { useRouter } from 'next/navigation'; +import { authFetch } from '@/lib/authFetch'; + +// --- Types --- + +interface Segment { + id: number; + text: string; + start: number; + end: number; +} + +interface WhisperProgress { + status: 'checking' | 'downloading' | 'loading' | 'transcribing' | 'done' | 'error'; + progress?: number; + message?: string; +} + +interface NotebookOption { + id: string; + title: string; +} + +type RecorderState = 'idle' | 'recording' | 'processing' | 'done'; + +// --- Constants --- + +const VOICE_WS_URL = + process.env.NEXT_PUBLIC_VOICE_WS_URL || 'wss://voice.jeffemmett.com'; + +// Web Speech API types +interface ISpeechRecognition extends EventTarget { + continuous: boolean; + interimResults: boolean; + lang: string; + onresult: ((event: any) => void) | null; + onerror: ((event: any) => void) | null; + onend: (() => void) | null; + start(): void; + stop(): void; +} + +function getSpeechRecognition(): (new () => ISpeechRecognition) | null { + if (typeof window === 'undefined') return null; + return (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition || null; +} + +// --- Component --- + +export default function VoicePage() { + const router = useRouter(); + + // Recording state + const [state, setState] = useState('idle'); + const [elapsed, setElapsed] = useState(0); + const [streaming, setStreaming] = useState(false); + + // Transcript + const [segments, setSegments] = useState([]); + const [liveText, setLiveText] = useState(''); + const [interimText, setInterimText] = useState(''); + const [finalTranscript, setFinalTranscript] = useState(''); + const [isEditing, setIsEditing] = useState(false); + + // Audio + const [audioUrl, setAudioUrl] = useState(null); + const [duration, setDuration] = useState(0); + + // Upload state + const [uploadedFileUrl, setUploadedFileUrl] = useState(''); + const [uploadedMimeType, setUploadedMimeType] = useState(''); + const [uploadedFileSize, setUploadedFileSize] = useState(0); + + // UI + const [notebooks, setNotebooks] = useState([]); + const [notebookId, setNotebookId] = useState(''); + const [status, setStatus] = useState<{ message: string; type: 'success' | 'error' | 'loading' } | null>(null); + const [offlineProgress, setOfflineProgress] = useState(null); + const [saving, setSaving] = useState(false); + + // Refs + const mediaRecorderRef = useRef(null); + const chunksRef = useRef([]); + const audioBlobRef = useRef(null); + const timerRef = useRef | null>(null); + const startTimeRef = useRef(0); + const recognitionRef = useRef(null); + const liveTextRef = useRef(''); + const segmentsRef = useRef([]); + const wsRef = useRef(null); + const audioContextRef = useRef(null); + const workletNodeRef = useRef(null); + const sourceNodeRef = useRef(null); + const transcriptRef = useRef(null); + const editRef = useRef(null); + + // Load notebooks + useEffect(() => { + authFetch('/api/notebooks') + .then((res) => res.json()) + .then((data) => { + if (Array.isArray(data)) { + setNotebooks(data.map((nb: any) => ({ id: nb.id, title: nb.title }))); + } + }) + .catch(() => {}); + }, []); + + // Cleanup on unmount + useEffect(() => { + return () => { + if (timerRef.current) clearInterval(timerRef.current); + if (audioUrl) URL.revokeObjectURL(audioUrl); + }; + }, [audioUrl]); + + // Auto-scroll transcript + useEffect(() => { + if (transcriptRef.current) { + transcriptRef.current.scrollTop = transcriptRef.current.scrollHeight; + } + }, [segments, liveText, interimText]); + + const formatTime = (s: number) => { + const m = Math.floor(s / 60).toString().padStart(2, '0'); + const sec = (s % 60).toString().padStart(2, '0'); + return `${m}:${sec}`; + }; + + // --- WebSocket live streaming --- + + const setupWebSocket = useCallback(async (stream: MediaStream) => { + try { + const ws = new WebSocket(`${VOICE_WS_URL}/api/voice/stream`); + wsRef.current = ws; + + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { ws.close(); reject(new Error('timeout')); }, 5000); + ws.onopen = () => { clearTimeout(timeout); resolve(); }; + ws.onerror = () => { clearTimeout(timeout); reject(new Error('failed')); }; + }); + + ws.onmessage = (event) => { + try { + const data = JSON.parse(event.data); + if (data.type === 'segment') { + const seg = { id: data.id, text: data.text, start: data.start, end: data.end }; + segmentsRef.current = [...segmentsRef.current, seg]; + setSegments([...segmentsRef.current]); + } + } catch {} + }; + + // AudioWorklet for PCM16 streaming at 16kHz + const audioCtx = new AudioContext({ sampleRate: 16000 }); + audioContextRef.current = audioCtx; + const source = audioCtx.createMediaStreamSource(stream); + sourceNodeRef.current = source; + + await audioCtx.audioWorklet.addModule('/pcm-processor.js'); + const workletNode = new AudioWorkletNode(audioCtx, 'pcm-processor'); + workletNodeRef.current = workletNode; + + workletNode.port.onmessage = (e) => { + if (ws.readyState === WebSocket.OPEN) ws.send(e.data as ArrayBuffer); + }; + + source.connect(workletNode); + setStreaming(true); + } catch { + setStreaming(false); + } + }, []); + + // --- Web Speech API (live local) --- + + const startSpeechRecognition = useCallback(() => { + const SpeechRecognition = getSpeechRecognition(); + if (!SpeechRecognition) return; + + const recognition = new SpeechRecognition(); + recognition.continuous = true; + recognition.interimResults = true; + recognition.lang = 'en-US'; + + recognition.onresult = (event: any) => { + let finalized = ''; + let interim = ''; + for (let i = 0; i < event.results.length; i++) { + if (event.results[i].isFinal) { + finalized += event.results[i][0].transcript.trim() + ' '; + } else { + interim += event.results[i][0].transcript; + } + } + liveTextRef.current = finalized.trim(); + setLiveText(finalized.trim()); + setInterimText(interim.trim()); + }; + + recognition.onerror = () => {}; + recognition.onend = () => { + // Auto-restart (Chrome stops after ~60s silence) + if (recognitionRef.current === recognition) { + try { recognition.start(); } catch {} + } + }; + + recognitionRef.current = recognition; + try { recognition.start(); } catch {} + }, []); + + const stopSpeechRecognition = useCallback(() => { + if (recognitionRef.current) { + const ref = recognitionRef.current; + recognitionRef.current = null; + try { ref.stop(); } catch {} + } + setInterimText(''); + }, []); + + // --- Cleanup streaming --- + + const cleanupStreaming = useCallback(() => { + if (workletNodeRef.current) { workletNodeRef.current.disconnect(); workletNodeRef.current = null; } + if (sourceNodeRef.current) { sourceNodeRef.current.disconnect(); sourceNodeRef.current = null; } + if (audioContextRef.current && audioContextRef.current.state !== 'closed') { + audioContextRef.current.close().catch(() => {}); + audioContextRef.current = null; + } + if (wsRef.current) { + if (wsRef.current.readyState === WebSocket.OPEN) wsRef.current.close(); + wsRef.current = null; + } + setStreaming(false); + }, []); + + // --- Start recording --- + + const startRecording = useCallback(async () => { + setSegments([]); + segmentsRef.current = []; + setLiveText(''); + liveTextRef.current = ''; + setInterimText(''); + setFinalTranscript(''); + setIsEditing(false); + setStatus(null); + setOfflineProgress(null); + + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + + const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') + ? 'audio/webm;codecs=opus' + : 'audio/webm'; + + const recorder = new MediaRecorder(stream, { mimeType }); + chunksRef.current = []; + recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); }; + recorder.start(1000); + mediaRecorderRef.current = recorder; + + startTimeRef.current = Date.now(); + setState('recording'); + setElapsed(0); + timerRef.current = setInterval(() => { + setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000)); + }, 1000); + + // Start both transcription methods in parallel + setupWebSocket(stream); + startSpeechRecognition(); + + } catch (err) { + setStatus({ message: err instanceof Error ? err.message : 'Microphone access denied', type: 'error' }); + } + }, [setupWebSocket, startSpeechRecognition]); + + // --- Stop recording --- + + const stopRecording = useCallback(async () => { + const recorder = mediaRecorderRef.current; + if (!recorder || recorder.state === 'inactive') return; + + if (timerRef.current) { clearInterval(timerRef.current); timerRef.current = null; } + const dur = Math.floor((Date.now() - startTimeRef.current) / 1000); + setDuration(dur); + + // Capture live text before stopping + const capturedLive = liveTextRef.current; + stopSpeechRecognition(); + + // Get WS final text + let wsFullText = ''; + if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) { + try { + const ws = wsRef.current; + wsFullText = await new Promise((resolve) => { + const timeout = setTimeout(() => resolve(''), 5000); + const handler = (event: MessageEvent) => { + try { + const data = JSON.parse(event.data); + if (data.type === 'segment') { + const seg = { id: data.id, text: data.text, start: data.start, end: data.end }; + segmentsRef.current = [...segmentsRef.current, seg]; + setSegments([...segmentsRef.current]); + } + if (data.type === 'done') { + clearTimeout(timeout); + ws.removeEventListener('message', handler); + resolve(data.fullText || ''); + } + } catch {} + }; + ws.addEventListener('message', handler); + ws.send(JSON.stringify({ type: 'end' })); + }); + } catch {} + } + cleanupStreaming(); + + setState('processing'); + + // Stop recorder + const blob = await new Promise((resolve) => { + recorder.onstop = () => { + recorder.stream.getTracks().forEach((t) => t.stop()); + resolve(new Blob(chunksRef.current, { type: recorder.mimeType })); + }; + recorder.stop(); + }); + audioBlobRef.current = blob; + + if (audioUrl) URL.revokeObjectURL(audioUrl); + const url = URL.createObjectURL(blob); + setAudioUrl(url); + + // --- Three-tier transcription cascade --- + + // Show immediate live text while we process + const immediateLive = wsFullText || (segmentsRef.current.length > 0 + ? segmentsRef.current.map(s => s.text).join(' ') + : capturedLive); + if (immediateLive) setFinalTranscript(immediateLive); + + // Tier 1: Upload + batch API + let bestTranscript = ''; + try { + setStatus({ message: 'Uploading recording...', type: 'loading' }); + const uploadForm = new FormData(); + uploadForm.append('file', blob, 'voice-note.webm'); + const uploadRes = await authFetch('/api/uploads', { method: 'POST', body: uploadForm }); + + if (uploadRes.ok) { + const uploadResult = await uploadRes.json(); + setUploadedFileUrl(uploadResult.url); + setUploadedMimeType(uploadResult.mimeType); + setUploadedFileSize(uploadResult.size); + + setStatus({ message: 'Transcribing...', type: 'loading' }); + const tForm = new FormData(); + tForm.append('audio', blob, 'voice-note.webm'); + const tRes = await authFetch('/api/voice/transcribe', { method: 'POST', body: tForm }); + if (tRes.ok) { + const tResult = await tRes.json(); + bestTranscript = tResult.text || ''; + } + } + } catch { + console.warn('Tier 1 (batch API) failed'); + } + + // Tier 2: WebSocket / Web Speech API (already captured) + if (!bestTranscript) bestTranscript = immediateLive || ''; + + // Tier 3: Offline Parakeet.js + if (!bestTranscript) { + try { + setStatus({ message: 'Loading offline model...', type: 'loading' }); + const { transcribeOffline } = await import('@/lib/parakeetOffline'); + bestTranscript = await transcribeOffline(blob, (p) => setOfflineProgress(p)); + setOfflineProgress(null); + } catch { + setOfflineProgress(null); + } + } + + setFinalTranscript(bestTranscript); + setStatus(null); + setState('done'); + }, [audioUrl, stopSpeechRecognition, cleanupStreaming]); + + // --- Toggle --- + + const toggleRecording = useCallback(() => { + if (state === 'idle' || state === 'done') startRecording(); + else if (state === 'recording') stopRecording(); + }, [state, startRecording, stopRecording]); + + // --- Save --- + + const saveToRNotes = useCallback(async () => { + setSaving(true); + setStatus({ message: 'Saving...', type: 'loading' }); + + const now = new Date(); + const timeStr = now.toLocaleString('en-US', { + month: 'short', day: 'numeric', hour: 'numeric', minute: '2-digit', hour12: true + }); + + const transcript = finalTranscript.trim(); + const body: Record = { + title: `Voice note - ${timeStr}`, + content: transcript + ? `

${transcript.replace(/\n/g, '

')}

` + : '

Voice recording (no transcript)

', + type: 'AUDIO', + mimeType: uploadedMimeType || 'audio/webm', + fileUrl: uploadedFileUrl, + fileSize: uploadedFileSize, + duration, + tags: ['voice'], + }; + if (notebookId) body.notebookId = notebookId; + + // If upload failed earlier, try uploading now + if (!uploadedFileUrl && audioBlobRef.current) { + try { + const form = new FormData(); + form.append('file', audioBlobRef.current, 'voice-note.webm'); + const res = await authFetch('/api/uploads', { method: 'POST', body: form }); + if (res.ok) { + const result = await res.json(); + body.fileUrl = result.url; + body.mimeType = result.mimeType; + body.fileSize = result.size; + } + } catch {} + } + + try { + const res = await authFetch('/api/notes', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); + + if (!res.ok) throw new Error('Save failed'); + + const note = await res.json(); + setStatus({ message: 'Saved!', type: 'success' }); + setTimeout(() => router.push(`/notes/${note.id}`), 1000); + } catch (err) { + setStatus({ message: err instanceof Error ? err.message : 'Save failed', type: 'error' }); + } finally { + setSaving(false); + } + }, [finalTranscript, uploadedFileUrl, uploadedMimeType, uploadedFileSize, duration, notebookId, router]); + + // --- Copy --- + + const copyTranscript = useCallback(async () => { + if (!finalTranscript.trim()) return; + try { + await navigator.clipboard.writeText(finalTranscript); + setStatus({ message: 'Copied!', type: 'success' }); + setTimeout(() => setStatus(null), 2000); + } catch { + setStatus({ message: 'Copy failed', type: 'error' }); + } + }, [finalTranscript]); + + // --- Reset --- + + const discard = useCallback(() => { + setState('idle'); + setSegments([]); + segmentsRef.current = []; + setLiveText(''); + liveTextRef.current = ''; + setInterimText(''); + setFinalTranscript(''); + setIsEditing(false); + setElapsed(0); + setDuration(0); + setStatus(null); + setOfflineProgress(null); + setUploadedFileUrl(''); + setUploadedMimeType(''); + setUploadedFileSize(0); + if (audioUrl) { URL.revokeObjectURL(audioUrl); setAudioUrl(null); } + audioBlobRef.current = null; + }, [audioUrl]); + + // --- Keyboard --- + + useEffect(() => { + const handler = (e: KeyboardEvent) => { + const target = e.target as HTMLElement; + if (target.tagName === 'TEXTAREA' || target.tagName === 'INPUT' || target.isContentEditable) return; + + if (e.code === 'Space') { + e.preventDefault(); + toggleRecording(); + } + if ((e.ctrlKey || e.metaKey) && e.code === 'Enter' && state === 'done') { + e.preventDefault(); + saveToRNotes(); + } + }; + window.addEventListener('keydown', handler); + return () => window.removeEventListener('keydown', handler); + }, [toggleRecording, saveToRNotes, state]); + + // --- Render --- + + const hasLiveText = liveText || interimText || segments.length > 0; + const hasTranscript = state === 'done' && finalTranscript.trim().length > 0; + + return ( +
+ {/* Header */} +
+
+
+ + + + +
+
+

rVoice

+

Voice notes for rNotes

+
+
+
+ {streaming && ( + + + Live + + )} + {getSpeechRecognition() && state === 'recording' && !streaming && ( + + + Local + + )} +
+
+ + {/* Main content */} +
+ + {/* Record button + timer */} +
+ + +
+ {formatTime(state === 'done' ? duration : elapsed)} +
+ +

+ {state === 'idle' && 'Tap to record or press Space'} + {state === 'recording' && 'Recording... tap to stop'} + {state === 'processing' && (offlineProgress?.message || 'Processing...')} + {state === 'done' && 'Recording complete'} +

+
+ + {/* Offline model progress bar */} + {offlineProgress && offlineProgress.status === 'downloading' && ( +
+
{offlineProgress.message}
+
+
+
+
+ )} + + {/* Live transcript (while recording) */} + {state === 'recording' && hasLiveText && ( +
+
Live transcript
+
+ {segments.length > 0 && ( +
+ {segments.map((seg) => ( +

{seg.text}

+ ))} +
+ )} + {segments.length === 0 && liveText && ( +

{liveText}

+ )} + {interimText && ( +

{interimText}

+ )} +
+
+ )} + + {/* Audio player + transcript (after recording) */} + {(state === 'done' || state === 'processing') && audioUrl && ( +
+