/** * — Standalone voice recorder web component. * * Full-page recorder with MediaRecorder, SpeechDictation (live), * and three-tier transcription cascade: * 1. Server (voice-command-api) * 2. Live (Web Speech API captured during recording) * 3. Offline (Parakeet TDT 0.6B in-browser) * * Saves AUDIO notes to rNotes via REST API with Tiptap-JSON formatted * timestamped transcript segments. */ import { SpeechDictation } from '../../../lib/speech-dictation'; import { transcribeOffline, isModelCached } from '../../../lib/parakeet-offline'; import type { TranscriptionProgress } from '../../../lib/parakeet-offline'; import type { TranscriptSegment } from '../../../lib/folk-transcription'; import { getAccessToken } from '../../../shared/components/rstack-identity'; type RecorderState = 'idle' | 'recording' | 'processing' | 'done'; class FolkVoiceRecorder extends HTMLElement { private shadow!: ShadowRoot; private space = ''; private state: RecorderState = 'idle'; private mediaRecorder: MediaRecorder | null = null; private audioChunks: Blob[] = []; private dictation: SpeechDictation | null = null; private segments: TranscriptSegment[] = []; private liveTranscript = ''; private finalTranscript = ''; private recordingStartTime = 0; private durationTimer: ReturnType | null = null; private elapsedSeconds = 0; private audioBlob: Blob | null = null; private audioUrl: string | null = null; private progressMessage = ''; private selectedNotebookId = ''; private notebooks: { id: string; title: string }[] = []; private tags = ''; constructor() { super(); this.shadow = this.attachShadow({ mode: 'open' }); } connectedCallback() { this.space = this.getAttribute('space') || 'demo'; this.loadNotebooks(); this.render(); } disconnectedCallback() { this.cleanup(); } private cleanup() { this.stopDurationTimer(); this.dictation?.destroy(); this.dictation = null; if (this.mediaRecorder?.state === 'recording') { this.mediaRecorder.stop(); } this.mediaRecorder = null; if (this.audioUrl) URL.revokeObjectURL(this.audioUrl); } private getApiBase(): string { const path = window.location.pathname; const match = path.match(/^(\/[^/]+)?\/rnotes/); return match ? match[0] : ''; } private authHeaders(extra?: Record): Record { const headers: Record = { ...extra }; const token = getAccessToken(); if (token) headers['Authorization'] = `Bearer ${token}`; return headers; } private async loadNotebooks() { try { const base = this.getApiBase(); const res = await fetch(`${base}/api/notebooks`, { headers: this.authHeaders() }); const data = await res.json(); this.notebooks = (data.notebooks || []).map((nb: any) => ({ id: nb.id, title: nb.title })); if (this.notebooks.length > 0 && !this.selectedNotebookId) { this.selectedNotebookId = this.notebooks[0].id; } this.render(); } catch { /* fallback: empty list */ } } private async startRecording() { try { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); // Determine supported mimeType const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : MediaRecorder.isTypeSupported('audio/webm') ? 'audio/webm' : 'audio/mp4'; this.audioChunks = []; this.segments = []; this.mediaRecorder = new MediaRecorder(stream, { mimeType }); this.mediaRecorder.ondataavailable = (e) => { if (e.data.size > 0) this.audioChunks.push(e.data); }; this.mediaRecorder.onstop = () => { stream.getTracks().forEach(t => t.stop()); this.audioBlob = new Blob(this.audioChunks, { type: mimeType }); if (this.audioUrl) URL.revokeObjectURL(this.audioUrl); this.audioUrl = URL.createObjectURL(this.audioBlob); this.processRecording(); }; this.mediaRecorder.start(1000); // 1s timeslice // Start live transcription via Web Speech API with segment tracking this.liveTranscript = ''; if (SpeechDictation.isSupported()) { this.dictation = new SpeechDictation({ onInterim: (text) => { const interimIdx = this.segments.findIndex(s => !s.isFinal); if (interimIdx >= 0) { this.segments[interimIdx].text = text; } else { this.segments.push({ id: crypto.randomUUID(), text, timestamp: this.elapsedSeconds, isFinal: false, }); } this.renderTranscriptSegments(); }, onFinal: (text) => { const interimIdx = this.segments.findIndex(s => !s.isFinal); if (interimIdx >= 0) { this.segments[interimIdx] = { ...this.segments[interimIdx], text, isFinal: true }; } else { this.segments.push({ id: crypto.randomUUID(), text, timestamp: this.elapsedSeconds, isFinal: true, }); } this.liveTranscript = this.segments.filter(s => s.isFinal).map(s => s.text).join(' '); this.renderTranscriptSegments(); }, }); this.dictation.start(); } // Start timer this.recordingStartTime = Date.now(); this.elapsedSeconds = 0; this.durationTimer = setInterval(() => { this.elapsedSeconds = Math.floor((Date.now() - this.recordingStartTime) / 1000); const timerEl = this.shadow.querySelector('.recording-timer'); if (timerEl) timerEl.textContent = this.formatTime(this.elapsedSeconds); }, 1000); this.state = 'recording'; this.render(); } catch (err) { console.error('Failed to start recording:', err); } } private stopRecording() { this.stopDurationTimer(); this.dictation?.stop(); if (this.mediaRecorder?.state === 'recording') { this.mediaRecorder.stop(); } } private stopDurationTimer() { if (this.durationTimer) { clearInterval(this.durationTimer); this.durationTimer = null; } } /** Targeted DOM update of transcript segments container (avoids full re-render). */ private renderTranscriptSegments() { const container = this.shadow.querySelector('.live-transcript-segments'); if (!container) return; const esc = (s: string) => { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; }; container.innerHTML = this.segments.map(seg => `
[${this.formatTime(seg.timestamp)}] ${esc(seg.text)}
`).join(''); // Auto-scroll to bottom container.scrollTop = container.scrollHeight; } /** Convert final segments to Tiptap JSON document with timestamped paragraphs. */ private segmentsToTiptapJSON(): object { const finalSegments = this.segments.filter(s => s.isFinal); if (finalSegments.length === 0) return { type: 'doc', content: [{ type: 'paragraph' }] }; return { type: 'doc', content: finalSegments.map(seg => ({ type: 'paragraph', content: [ { type: 'text', marks: [{ type: 'code' }], text: `[${this.formatTime(seg.timestamp)}]` }, { type: 'text', text: ` ${seg.text}` }, ], })), }; } private async processRecording() { this.state = 'processing'; this.progressMessage = 'Processing recording...'; this.render(); // Three-tier transcription cascade let transcript = ''; // Tier 1: Server transcription if (this.audioBlob && this.space !== 'demo') { try { this.progressMessage = 'Sending to server for transcription...'; this.render(); const base = this.getApiBase(); const formData = new FormData(); formData.append('file', this.audioBlob, 'recording.webm'); const res = await fetch(`${base}/api/voice/transcribe`, { method: 'POST', headers: this.authHeaders(), body: formData, }); if (res.ok) { const data = await res.json(); transcript = data.text || data.transcript || ''; } } catch { /* fall through to next tier */ } } // Tier 2: Live transcript from segments if (!transcript && this.liveTranscript.trim()) { transcript = this.liveTranscript.trim(); } // Tier 3: Offline Parakeet transcription if (!transcript && this.audioBlob) { try { transcript = await transcribeOffline(this.audioBlob, (p: TranscriptionProgress) => { this.progressMessage = p.message || 'Processing...'; this.render(); }); } catch { this.progressMessage = 'Transcription failed. You can still save the recording.'; this.render(); } } this.finalTranscript = transcript; this.state = 'done'; this.progressMessage = ''; this.render(); } private async saveNote() { if (!this.audioBlob || !this.selectedNotebookId) return; const base = this.getApiBase(); // Upload audio file let fileUrl = ''; try { const formData = new FormData(); formData.append('file', this.audioBlob, 'recording.webm'); const uploadRes = await fetch(`${base}/api/uploads`, { method: 'POST', headers: this.authHeaders(), body: formData, }); if (uploadRes.ok) { const uploadData = await uploadRes.json(); fileUrl = uploadData.url; } } catch { /* continue without file */ } // Build content: use Tiptap JSON with segments if available, else raw text const hasFinalSegments = this.segments.some(s => s.isFinal); const content = hasFinalSegments ? JSON.stringify(this.segmentsToTiptapJSON()) : (this.finalTranscript || ''); const contentFormat = hasFinalSegments ? 'tiptap-json' : undefined; // Create the note const tagList = this.tags.split(',').map(t => t.trim()).filter(Boolean); tagList.push('voice'); try { const res = await fetch(`${base}/api/notes`, { method: 'POST', headers: this.authHeaders({ 'Content-Type': 'application/json' }), body: JSON.stringify({ notebook_id: this.selectedNotebookId, title: `Voice Note — ${new Date().toLocaleDateString()}`, content, content_format: contentFormat, type: 'AUDIO', tags: tagList, file_url: fileUrl, mime_type: this.audioBlob.type, duration: this.elapsedSeconds, }), }); if (res.ok) { this.state = 'idle'; this.finalTranscript = ''; this.liveTranscript = ''; this.segments = []; this.audioBlob = null; if (this.audioUrl) { URL.revokeObjectURL(this.audioUrl); this.audioUrl = null; } this.render(); // Show success briefly this.progressMessage = 'Note saved!'; this.render(); setTimeout(() => { this.progressMessage = ''; this.render(); }, 2000); } } catch (err) { this.progressMessage = 'Failed to save note'; this.render(); } } private discard() { this.cleanup(); this.state = 'idle'; this.finalTranscript = ''; this.liveTranscript = ''; this.segments = []; this.audioBlob = null; this.audioUrl = null; this.elapsedSeconds = 0; this.progressMessage = ''; this.render(); } private formatTime(s: number): string { const m = Math.floor(s / 60); const sec = s % 60; return `${m}:${String(sec).padStart(2, '0')}`; } private render() { const esc = (s: string) => { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; }; let body = ''; switch (this.state) { case 'idle': body = `

Voice Recorder

Record voice notes with automatic transcription

${isModelCached() ? '

Offline model cached

' : ''}
`; break; case 'recording': body = `
${this.formatTime(this.elapsedSeconds)}

Recording...

`; break; case 'processing': body = `

${esc(this.progressMessage)}

`; break; case 'done': body = `

Recording Complete

${this.audioUrl ? `` : ''}
Duration: ${this.formatTime(this.elapsedSeconds)}
`; break; } this.shadow.innerHTML = `
${body}
${this.progressMessage && this.state === 'idle' ? `
${esc(this.progressMessage)}
` : ''} `; this.attachListeners(); // Re-render segments after DOM is in place (recording state) if (this.state === 'recording' && this.segments.length > 0) { this.renderTranscriptSegments(); } } private attachListeners() { this.shadow.getElementById('btn-start')?.addEventListener('click', () => this.startRecording()); this.shadow.getElementById('btn-stop')?.addEventListener('click', () => this.stopRecording()); this.shadow.getElementById('btn-save')?.addEventListener('click', () => this.saveNote()); this.shadow.getElementById('btn-discard')?.addEventListener('click', () => this.discard()); this.shadow.getElementById('btn-copy')?.addEventListener('click', () => { const textarea = this.shadow.getElementById('transcript-edit') as HTMLTextAreaElement; if (textarea) navigator.clipboard.writeText(textarea.value); }); const nbSelect = this.shadow.getElementById('notebook-select') as HTMLSelectElement; if (nbSelect) nbSelect.addEventListener('change', () => { this.selectedNotebookId = nbSelect.value; }); const tagsInput = this.shadow.getElementById('tags-input') as HTMLInputElement; if (tagsInput) tagsInput.addEventListener('input', () => { this.tags = tagsInput.value; }); const transcriptEdit = this.shadow.getElementById('transcript-edit') as HTMLTextAreaElement; if (transcriptEdit) transcriptEdit.addEventListener('input', () => { this.finalTranscript = transcriptEdit.value; }); } private getStyles(): string { return ` :host { display: block; font-family: system-ui, -apple-system, sans-serif; color: var(--rs-text-primary); } * { box-sizing: border-box; } .voice-recorder { max-width: 600px; margin: 0 auto; padding: 40px 20px; display: flex; flex-direction: column; align-items: center; text-align: center; } h2 { font-size: 24px; font-weight: 700; margin: 16px 0 4px; } h3 { font-size: 18px; font-weight: 600; margin: 0 0 16px; } .recorder-subtitle { color: var(--rs-text-muted); margin: 0 0 24px; } .recorder-icon { color: var(--rs-primary); margin-bottom: 8px; } .recorder-config { display: flex; flex-direction: column; gap: 12px; width: 100%; max-width: 400px; margin-bottom: 24px; text-align: left; } .recorder-config label { font-size: 13px; color: var(--rs-text-secondary); display: flex; flex-direction: column; gap: 4px; } .recorder-config select, .recorder-config input { padding: 8px 12px; border-radius: 6px; border: 1px solid var(--rs-input-border); background: var(--rs-input-bg); color: var(--rs-input-text); font-size: 14px; font-family: inherit; } .record-btn { padding: 14px 36px; border-radius: 50px; border: none; background: var(--rs-error, #ef4444); color: #fff; font-size: 16px; font-weight: 600; cursor: pointer; transition: all 0.2s; } .record-btn:hover { transform: scale(1.05); filter: brightness(1.1); } .model-status { font-size: 11px; color: var(--rs-text-muted); margin-top: 12px; } /* Recording state */ .recorder-recording { display: flex; flex-direction: column; align-items: center; gap: 16px; } .recording-pulse { width: 80px; height: 80px; border-radius: 50%; background: var(--rs-error, #ef4444); animation: pulse 1.5s infinite; } @keyframes pulse { 0% { transform: scale(1); opacity: 1; box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); } 70% { transform: scale(1.05); opacity: 0.8; box-shadow: 0 0 0 20px rgba(239, 68, 68, 0); } 100% { transform: scale(1); opacity: 1; box-shadow: 0 0 0 0 rgba(239, 68, 68, 0); } } .recording-timer { font-size: 48px; font-weight: 700; font-variant-numeric: tabular-nums; } .recording-status { color: var(--rs-error, #ef4444); font-weight: 500; } /* Live transcript segments */ .live-transcript-segments { width: 100%; max-width: 500px; max-height: 250px; overflow-y: auto; text-align: left; padding: 8px 0; } .transcript-segment { display: flex; gap: 8px; padding: 4px 12px; border-radius: 4px; font-size: 14px; line-height: 1.6; } .transcript-segment.interim { font-style: italic; color: var(--rs-text-muted); background: var(--rs-bg-surface-raised); } .segment-time { flex-shrink: 0; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 12px; color: var(--rs-text-muted); padding-top: 2px; } .segment-text { flex: 1; } .stop-btn { padding: 12px 32px; border-radius: 50px; border: none; background: var(--rs-text-primary); color: var(--rs-bg-surface); font-size: 15px; font-weight: 600; cursor: pointer; } /* Processing */ .recorder-processing { display: flex; flex-direction: column; align-items: center; gap: 16px; padding: 40px; } .processing-spinner { width: 48px; height: 48px; border: 3px solid var(--rs-border); border-top-color: var(--rs-primary); border-radius: 50%; animation: spin 0.8s linear infinite; } @keyframes spin { to { transform: rotate(360deg); } } /* Done */ .recorder-done { display: flex; flex-direction: column; align-items: center; gap: 12px; width: 100%; } .result-audio { width: 100%; max-width: 500px; height: 40px; margin-bottom: 8px; } .result-duration { font-size: 13px; color: var(--rs-text-muted); } .transcript-section { width: 100%; max-width: 500px; text-align: left; } .transcript-section label { font-size: 12px; font-weight: 600; color: var(--rs-text-muted); text-transform: uppercase; letter-spacing: 0.05em; } .transcript-textarea { width: 100%; min-height: 120px; padding: 12px; margin-top: 4px; border-radius: 8px; border: 1px solid var(--rs-input-border); background: var(--rs-input-bg); color: var(--rs-input-text); font-size: 14px; font-family: inherit; line-height: 1.6; resize: vertical; } .result-actions { display: flex; gap: 8px; margin-top: 8px; } .save-btn { padding: 10px 24px; border-radius: 8px; border: none; background: var(--rs-primary); color: #fff; font-weight: 600; cursor: pointer; } .copy-btn, .discard-btn { padding: 10px 20px; border-radius: 8px; font-weight: 500; cursor: pointer; border: 1px solid var(--rs-border); background: transparent; color: var(--rs-text-secondary); } .discard-btn { color: var(--rs-error, #ef4444); border-color: var(--rs-error, #ef4444); } .toast { position: fixed; bottom: 20px; left: 50%; transform: translateX(-50%); padding: 10px 20px; border-radius: 8px; background: var(--rs-primary); color: #fff; font-size: 13px; font-weight: 500; z-index: 100; } `; } } customElements.define('folk-voice-recorder', FolkVoiceRecorder);