diff --git a/public/manifest.json b/public/manifest.json index 127797d..f3916a3 100644 --- a/public/manifest.json +++ b/public/manifest.json @@ -35,5 +35,33 @@ "type": "image/png", "purpose": "maskable" } + ], + "shortcuts": [ + { + "name": "Voice Note", + "short_name": "Voice", + "description": "Record a voice note with live transcription", + "url": "/voice", + "icons": [ + { + "src": "/icon-192.png", + "sizes": "192x192", + "type": "image/png" + } + ] + }, + { + "name": "New Note", + "short_name": "Note", + "description": "Create a new note", + "url": "/notes/new", + "icons": [ + { + "src": "/icon-192.png", + "sizes": "192x192", + "type": "image/png" + } + ] + } ] } diff --git a/src/app/voice/page.tsx b/src/app/voice/page.tsx new file mode 100644 index 0000000..90c892a --- /dev/null +++ b/src/app/voice/page.tsx @@ -0,0 +1,747 @@ +'use client'; + +import { useState, useRef, useCallback, useEffect } from 'react'; +import { useRouter } from 'next/navigation'; +import { authFetch } from '@/lib/authFetch'; + +// --- Types --- + +interface Segment { + id: number; + text: string; + start: number; + end: number; +} + +interface WhisperProgress { + status: 'checking' | 'downloading' | 'loading' | 'transcribing' | 'done' | 'error'; + progress?: number; + message?: string; +} + +interface NotebookOption { + id: string; + title: string; +} + +type RecorderState = 'idle' | 'recording' | 'processing' | 'done'; + +// --- Constants --- + +const VOICE_WS_URL = + process.env.NEXT_PUBLIC_VOICE_WS_URL || 'wss://voice.jeffemmett.com'; + +// Web Speech API types +interface ISpeechRecognition extends EventTarget { + continuous: boolean; + interimResults: boolean; + lang: string; + onresult: ((event: any) => void) | null; + onerror: ((event: any) => void) | null; + onend: (() => void) | null; + start(): void; + stop(): void; +} + +function getSpeechRecognition(): (new () => ISpeechRecognition) | null { + if (typeof window === 'undefined') return null; + return (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition || null; +} + +// --- Component --- + +export default function VoicePage() { + const router = useRouter(); + + // Recording state + const [state, setState] = useState('idle'); + const [elapsed, setElapsed] = useState(0); + const [streaming, setStreaming] = useState(false); + + // Transcript + const [segments, setSegments] = useState([]); + const [liveText, setLiveText] = useState(''); + const [interimText, setInterimText] = useState(''); + const [finalTranscript, setFinalTranscript] = useState(''); + const [isEditing, setIsEditing] = useState(false); + + // Audio + const [audioUrl, setAudioUrl] = useState(null); + const [duration, setDuration] = useState(0); + + // Upload state + const [uploadedFileUrl, setUploadedFileUrl] = useState(''); + const [uploadedMimeType, setUploadedMimeType] = useState(''); + const [uploadedFileSize, setUploadedFileSize] = useState(0); + + // UI + const [notebooks, setNotebooks] = useState([]); + const [notebookId, setNotebookId] = useState(''); + const [status, setStatus] = useState<{ message: string; type: 'success' | 'error' | 'loading' } | null>(null); + const [offlineProgress, setOfflineProgress] = useState(null); + const [saving, setSaving] = useState(false); + + // Refs + const mediaRecorderRef = useRef(null); + const chunksRef = useRef([]); + const audioBlobRef = useRef(null); + const timerRef = useRef | null>(null); + const startTimeRef = useRef(0); + const recognitionRef = useRef(null); + const liveTextRef = useRef(''); + const segmentsRef = useRef([]); + const wsRef = useRef(null); + const audioContextRef = useRef(null); + const workletNodeRef = useRef(null); + const sourceNodeRef = useRef(null); + const transcriptRef = useRef(null); + const editRef = useRef(null); + + // Load notebooks + useEffect(() => { + authFetch('/api/notebooks') + .then((res) => res.json()) + .then((data) => { + if (Array.isArray(data)) { + setNotebooks(data.map((nb: any) => ({ id: nb.id, title: nb.title }))); + } + }) + .catch(() => {}); + }, []); + + // Cleanup on unmount + useEffect(() => { + return () => { + if (timerRef.current) clearInterval(timerRef.current); + if (audioUrl) URL.revokeObjectURL(audioUrl); + }; + }, [audioUrl]); + + // Auto-scroll transcript + useEffect(() => { + if (transcriptRef.current) { + transcriptRef.current.scrollTop = transcriptRef.current.scrollHeight; + } + }, [segments, liveText, interimText]); + + const formatTime = (s: number) => { + const m = Math.floor(s / 60).toString().padStart(2, '0'); + const sec = (s % 60).toString().padStart(2, '0'); + return `${m}:${sec}`; + }; + + // --- WebSocket live streaming --- + + const setupWebSocket = useCallback(async (stream: MediaStream) => { + try { + const ws = new WebSocket(`${VOICE_WS_URL}/api/voice/stream`); + wsRef.current = ws; + + await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { ws.close(); reject(new Error('timeout')); }, 5000); + ws.onopen = () => { clearTimeout(timeout); resolve(); }; + ws.onerror = () => { clearTimeout(timeout); reject(new Error('failed')); }; + }); + + ws.onmessage = (event) => { + try { + const data = JSON.parse(event.data); + if (data.type === 'segment') { + const seg = { id: data.id, text: data.text, start: data.start, end: data.end }; + segmentsRef.current = [...segmentsRef.current, seg]; + setSegments([...segmentsRef.current]); + } + } catch {} + }; + + // AudioWorklet for PCM16 streaming at 16kHz + const audioCtx = new AudioContext({ sampleRate: 16000 }); + audioContextRef.current = audioCtx; + const source = audioCtx.createMediaStreamSource(stream); + sourceNodeRef.current = source; + + await audioCtx.audioWorklet.addModule('/pcm-processor.js'); + const workletNode = new AudioWorkletNode(audioCtx, 'pcm-processor'); + workletNodeRef.current = workletNode; + + workletNode.port.onmessage = (e) => { + if (ws.readyState === WebSocket.OPEN) ws.send(e.data as ArrayBuffer); + }; + + source.connect(workletNode); + setStreaming(true); + } catch { + setStreaming(false); + } + }, []); + + // --- Web Speech API (live local) --- + + const startSpeechRecognition = useCallback(() => { + const SpeechRecognition = getSpeechRecognition(); + if (!SpeechRecognition) return; + + const recognition = new SpeechRecognition(); + recognition.continuous = true; + recognition.interimResults = true; + recognition.lang = 'en-US'; + + recognition.onresult = (event: any) => { + let finalized = ''; + let interim = ''; + for (let i = 0; i < event.results.length; i++) { + if (event.results[i].isFinal) { + finalized += event.results[i][0].transcript.trim() + ' '; + } else { + interim += event.results[i][0].transcript; + } + } + liveTextRef.current = finalized.trim(); + setLiveText(finalized.trim()); + setInterimText(interim.trim()); + }; + + recognition.onerror = () => {}; + recognition.onend = () => { + // Auto-restart (Chrome stops after ~60s silence) + if (recognitionRef.current === recognition) { + try { recognition.start(); } catch {} + } + }; + + recognitionRef.current = recognition; + try { recognition.start(); } catch {} + }, []); + + const stopSpeechRecognition = useCallback(() => { + if (recognitionRef.current) { + const ref = recognitionRef.current; + recognitionRef.current = null; + try { ref.stop(); } catch {} + } + setInterimText(''); + }, []); + + // --- Cleanup streaming --- + + const cleanupStreaming = useCallback(() => { + if (workletNodeRef.current) { workletNodeRef.current.disconnect(); workletNodeRef.current = null; } + if (sourceNodeRef.current) { sourceNodeRef.current.disconnect(); sourceNodeRef.current = null; } + if (audioContextRef.current && audioContextRef.current.state !== 'closed') { + audioContextRef.current.close().catch(() => {}); + audioContextRef.current = null; + } + if (wsRef.current) { + if (wsRef.current.readyState === WebSocket.OPEN) wsRef.current.close(); + wsRef.current = null; + } + setStreaming(false); + }, []); + + // --- Start recording --- + + const startRecording = useCallback(async () => { + setSegments([]); + segmentsRef.current = []; + setLiveText(''); + liveTextRef.current = ''; + setInterimText(''); + setFinalTranscript(''); + setIsEditing(false); + setStatus(null); + setOfflineProgress(null); + + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + + const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') + ? 'audio/webm;codecs=opus' + : 'audio/webm'; + + const recorder = new MediaRecorder(stream, { mimeType }); + chunksRef.current = []; + recorder.ondataavailable = (e) => { if (e.data.size > 0) chunksRef.current.push(e.data); }; + recorder.start(1000); + mediaRecorderRef.current = recorder; + + startTimeRef.current = Date.now(); + setState('recording'); + setElapsed(0); + timerRef.current = setInterval(() => { + setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000)); + }, 1000); + + // Start both transcription methods in parallel + setupWebSocket(stream); + startSpeechRecognition(); + + } catch (err) { + setStatus({ message: err instanceof Error ? err.message : 'Microphone access denied', type: 'error' }); + } + }, [setupWebSocket, startSpeechRecognition]); + + // --- Stop recording --- + + const stopRecording = useCallback(async () => { + const recorder = mediaRecorderRef.current; + if (!recorder || recorder.state === 'inactive') return; + + if (timerRef.current) { clearInterval(timerRef.current); timerRef.current = null; } + const dur = Math.floor((Date.now() - startTimeRef.current) / 1000); + setDuration(dur); + + // Capture live text before stopping + const capturedLive = liveTextRef.current; + stopSpeechRecognition(); + + // Get WS final text + let wsFullText = ''; + if (wsRef.current && wsRef.current.readyState === WebSocket.OPEN) { + try { + const ws = wsRef.current; + wsFullText = await new Promise((resolve) => { + const timeout = setTimeout(() => resolve(''), 5000); + const handler = (event: MessageEvent) => { + try { + const data = JSON.parse(event.data); + if (data.type === 'segment') { + const seg = { id: data.id, text: data.text, start: data.start, end: data.end }; + segmentsRef.current = [...segmentsRef.current, seg]; + setSegments([...segmentsRef.current]); + } + if (data.type === 'done') { + clearTimeout(timeout); + ws.removeEventListener('message', handler); + resolve(data.fullText || ''); + } + } catch {} + }; + ws.addEventListener('message', handler); + ws.send(JSON.stringify({ type: 'end' })); + }); + } catch {} + } + cleanupStreaming(); + + setState('processing'); + + // Stop recorder + const blob = await new Promise((resolve) => { + recorder.onstop = () => { + recorder.stream.getTracks().forEach((t) => t.stop()); + resolve(new Blob(chunksRef.current, { type: recorder.mimeType })); + }; + recorder.stop(); + }); + audioBlobRef.current = blob; + + if (audioUrl) URL.revokeObjectURL(audioUrl); + const url = URL.createObjectURL(blob); + setAudioUrl(url); + + // --- Three-tier transcription cascade --- + + // Show immediate live text while we process + const immediateLive = wsFullText || (segmentsRef.current.length > 0 + ? segmentsRef.current.map(s => s.text).join(' ') + : capturedLive); + if (immediateLive) setFinalTranscript(immediateLive); + + // Tier 1: Upload + batch API + let bestTranscript = ''; + try { + setStatus({ message: 'Uploading recording...', type: 'loading' }); + const uploadForm = new FormData(); + uploadForm.append('file', blob, 'voice-note.webm'); + const uploadRes = await authFetch('/api/uploads', { method: 'POST', body: uploadForm }); + + if (uploadRes.ok) { + const uploadResult = await uploadRes.json(); + setUploadedFileUrl(uploadResult.url); + setUploadedMimeType(uploadResult.mimeType); + setUploadedFileSize(uploadResult.size); + + setStatus({ message: 'Transcribing...', type: 'loading' }); + const tForm = new FormData(); + tForm.append('audio', blob, 'voice-note.webm'); + const tRes = await authFetch('/api/voice/transcribe', { method: 'POST', body: tForm }); + if (tRes.ok) { + const tResult = await tRes.json(); + bestTranscript = tResult.text || ''; + } + } + } catch { + console.warn('Tier 1 (batch API) failed'); + } + + // Tier 2: WebSocket / Web Speech API (already captured) + if (!bestTranscript) bestTranscript = immediateLive || ''; + + // Tier 3: Offline Parakeet.js + if (!bestTranscript) { + try { + setStatus({ message: 'Loading offline model...', type: 'loading' }); + const { transcribeOffline } = await import('@/lib/parakeetOffline'); + bestTranscript = await transcribeOffline(blob, (p) => setOfflineProgress(p)); + setOfflineProgress(null); + } catch { + setOfflineProgress(null); + } + } + + setFinalTranscript(bestTranscript); + setStatus(null); + setState('done'); + }, [audioUrl, stopSpeechRecognition, cleanupStreaming]); + + // --- Toggle --- + + const toggleRecording = useCallback(() => { + if (state === 'idle' || state === 'done') startRecording(); + else if (state === 'recording') stopRecording(); + }, [state, startRecording, stopRecording]); + + // --- Save --- + + const saveToRNotes = useCallback(async () => { + setSaving(true); + setStatus({ message: 'Saving...', type: 'loading' }); + + const now = new Date(); + const timeStr = now.toLocaleString('en-US', { + month: 'short', day: 'numeric', hour: 'numeric', minute: '2-digit', hour12: true + }); + + const transcript = finalTranscript.trim(); + const body: Record = { + title: `Voice note - ${timeStr}`, + content: transcript + ? `

${transcript.replace(/\n/g, '

')}

` + : '

Voice recording (no transcript)

', + type: 'AUDIO', + mimeType: uploadedMimeType || 'audio/webm', + fileUrl: uploadedFileUrl, + fileSize: uploadedFileSize, + duration, + tags: ['voice'], + }; + if (notebookId) body.notebookId = notebookId; + + // If upload failed earlier, try uploading now + if (!uploadedFileUrl && audioBlobRef.current) { + try { + const form = new FormData(); + form.append('file', audioBlobRef.current, 'voice-note.webm'); + const res = await authFetch('/api/uploads', { method: 'POST', body: form }); + if (res.ok) { + const result = await res.json(); + body.fileUrl = result.url; + body.mimeType = result.mimeType; + body.fileSize = result.size; + } + } catch {} + } + + try { + const res = await authFetch('/api/notes', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + }); + + if (!res.ok) throw new Error('Save failed'); + + const note = await res.json(); + setStatus({ message: 'Saved!', type: 'success' }); + setTimeout(() => router.push(`/notes/${note.id}`), 1000); + } catch (err) { + setStatus({ message: err instanceof Error ? err.message : 'Save failed', type: 'error' }); + } finally { + setSaving(false); + } + }, [finalTranscript, uploadedFileUrl, uploadedMimeType, uploadedFileSize, duration, notebookId, router]); + + // --- Copy --- + + const copyTranscript = useCallback(async () => { + if (!finalTranscript.trim()) return; + try { + await navigator.clipboard.writeText(finalTranscript); + setStatus({ message: 'Copied!', type: 'success' }); + setTimeout(() => setStatus(null), 2000); + } catch { + setStatus({ message: 'Copy failed', type: 'error' }); + } + }, [finalTranscript]); + + // --- Reset --- + + const discard = useCallback(() => { + setState('idle'); + setSegments([]); + segmentsRef.current = []; + setLiveText(''); + liveTextRef.current = ''; + setInterimText(''); + setFinalTranscript(''); + setIsEditing(false); + setElapsed(0); + setDuration(0); + setStatus(null); + setOfflineProgress(null); + setUploadedFileUrl(''); + setUploadedMimeType(''); + setUploadedFileSize(0); + if (audioUrl) { URL.revokeObjectURL(audioUrl); setAudioUrl(null); } + audioBlobRef.current = null; + }, [audioUrl]); + + // --- Keyboard --- + + useEffect(() => { + const handler = (e: KeyboardEvent) => { + const target = e.target as HTMLElement; + if (target.tagName === 'TEXTAREA' || target.tagName === 'INPUT' || target.isContentEditable) return; + + if (e.code === 'Space') { + e.preventDefault(); + toggleRecording(); + } + if ((e.ctrlKey || e.metaKey) && e.code === 'Enter' && state === 'done') { + e.preventDefault(); + saveToRNotes(); + } + }; + window.addEventListener('keydown', handler); + return () => window.removeEventListener('keydown', handler); + }, [toggleRecording, saveToRNotes, state]); + + // --- Render --- + + const hasLiveText = liveText || interimText || segments.length > 0; + const hasTranscript = state === 'done' && finalTranscript.trim().length > 0; + + return ( +
+ {/* Header */} +
+
+
+ + + + +
+
+

rVoice

+

Voice notes for rNotes

+
+
+
+ {streaming && ( + + + Live + + )} + {getSpeechRecognition() && state === 'recording' && !streaming && ( + + + Local + + )} +
+
+ + {/* Main content */} +
+ + {/* Record button + timer */} +
+ + +
+ {formatTime(state === 'done' ? duration : elapsed)} +
+ +

+ {state === 'idle' && 'Tap to record or press Space'} + {state === 'recording' && 'Recording... tap to stop'} + {state === 'processing' && (offlineProgress?.message || 'Processing...')} + {state === 'done' && 'Recording complete'} +

+
+ + {/* Offline model progress bar */} + {offlineProgress && offlineProgress.status === 'downloading' && ( +
+
{offlineProgress.message}
+
+
+
+
+ )} + + {/* Live transcript (while recording) */} + {state === 'recording' && hasLiveText && ( +
+
Live transcript
+
+ {segments.length > 0 && ( +
+ {segments.map((seg) => ( +

{seg.text}

+ ))} +
+ )} + {segments.length === 0 && liveText && ( +

{liveText}

+ )} + {interimText && ( +

{interimText}

+ )} +
+
+ )} + + {/* Audio player + transcript (after recording) */} + {(state === 'done' || state === 'processing') && audioUrl && ( +
+