From d7a2372a56d053a5f70c99c27594ffa31c4a435a Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Sun, 15 Feb 2026 08:34:50 -0700 Subject: [PATCH] feat: add AUDIO note type with voice recording and transcription - Add AUDIO to NoteType enum, duration field to Note model - New VoiceRecorder component (MediaRecorder API, upload, transcribe) - New /api/voice/transcribe proxy route to voice-command-api container - Audio MIME types added to upload whitelist - Audio player + transcript display on note detail page - AUDIO type button on new note page with recorder UI Co-Authored-By: Claude Opus 4.6 --- docker-compose.yml | 1 + prisma/schema.prisma | 2 + src/app/api/notebooks/[id]/notes/route.ts | 3 +- src/app/api/notes/route.ts | 3 +- src/app/api/uploads/route.ts | 3 + src/app/api/voice/transcribe/route.ts | 42 ++++ src/app/notes/[id]/page.tsx | 12 ++ src/app/notes/new/page.tsx | 50 ++++- src/components/NoteCard.tsx | 1 + src/components/VoiceRecorder.tsx | 235 ++++++++++++++++++++++ 10 files changed, 341 insertions(+), 11 deletions(-) create mode 100644 src/app/api/voice/transcribe/route.ts create mode 100644 src/components/VoiceRecorder.tsx diff --git a/docker-compose.yml b/docker-compose.yml index 160c5e0..df5cc57 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,6 +11,7 @@ services: - RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000} - NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://encryptid.jeffemmett.com} - RSPACE_INTERNAL_KEY=${RSPACE_INTERNAL_KEY} + - VOICE_API_URL=${VOICE_API_URL:-http://voice-command-api:8000} volumes: - uploads_data:/app/uploads labels: diff --git a/prisma/schema.prisma b/prisma/schema.prisma index d86122e..4b9786e 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -79,6 +79,7 @@ model Note { mimeType String? fileUrl String? fileSize Int? + duration Int? isPinned Boolean @default(false) canvasShapeId String? sortOrder Int @default(0) @@ -100,6 +101,7 @@ enum NoteType { CODE IMAGE FILE + AUDIO } // ─── Tags ─────────────────────────────────────────────────────────── diff --git a/src/app/api/notebooks/[id]/notes/route.ts b/src/app/api/notebooks/[id]/notes/route.ts index f3206b6..1a08b15 100644 --- a/src/app/api/notebooks/[id]/notes/route.ts +++ b/src/app/api/notebooks/[id]/notes/route.ts @@ -37,7 +37,7 @@ export async function POST( } const body = await request.json(); - const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize } = body; + const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize, duration } = body; if (!title?.trim()) { return NextResponse.json({ error: 'Title is required' }, { status: 400 }); @@ -73,6 +73,7 @@ export async function POST( fileUrl: fileUrl || null, mimeType: mimeType || null, fileSize: fileSize || null, + duration: duration || null, tags: { create: tagRecords.map((tag) => ({ tagId: tag.id, diff --git a/src/app/api/notes/route.ts b/src/app/api/notes/route.ts index 27daa51..c7f8f7c 100644 --- a/src/app/api/notes/route.ts +++ b/src/app/api/notes/route.ts @@ -43,7 +43,7 @@ export async function POST(request: NextRequest) { if (!isAuthed(auth)) return auth; const { user } = auth; const body = await request.json(); - const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize } = body; + const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body; if (!title?.trim()) { return NextResponse.json({ error: 'Title is required' }, { status: 400 }); @@ -79,6 +79,7 @@ export async function POST(request: NextRequest) { fileUrl: fileUrl || null, mimeType: mimeType || null, fileSize: fileSize || null, + duration: duration || null, tags: { create: tagRecords.map((tag) => ({ tagId: tag.id, diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts index defb67d..d5e94db 100644 --- a/src/app/api/uploads/route.ts +++ b/src/app/api/uploads/route.ts @@ -19,6 +19,9 @@ const ALLOWED_MIME_TYPES = new Set([ // Code 'text/javascript', 'text/typescript', 'text/html', 'text/css', 'application/x-python-code', 'text/x-python', + // Audio + 'audio/webm', 'audio/mpeg', 'audio/wav', 'audio/ogg', + 'audio/mp4', 'audio/x-m4a', 'audio/aac', 'audio/flac', ]); function sanitizeFilename(name: string): string { diff --git a/src/app/api/voice/transcribe/route.ts b/src/app/api/voice/transcribe/route.ts new file mode 100644 index 0000000..9b030b8 --- /dev/null +++ b/src/app/api/voice/transcribe/route.ts @@ -0,0 +1,42 @@ +import { NextRequest, NextResponse } from 'next/server'; +import { requireAuth, isAuthed } from '@/lib/auth'; + +const VOICE_API_URL = process.env.VOICE_API_URL || 'http://voice-command-api:8000'; + +export async function POST(request: NextRequest) { + try { + const auth = await requireAuth(request); + if (!isAuthed(auth)) return auth; + + const formData = await request.formData(); + const audio = formData.get('audio') as File | null; + + if (!audio) { + return NextResponse.json({ error: 'No audio file provided' }, { status: 400 }); + } + + // Forward to voice-command API + const proxyForm = new FormData(); + proxyForm.append('audio', audio, audio.name || 'recording.webm'); + + const res = await fetch(`${VOICE_API_URL}/api/voice/transcribe`, { + method: 'POST', + body: proxyForm, + }); + + if (!res.ok) { + const err = await res.text(); + console.error('Voice API error:', res.status, err); + return NextResponse.json( + { error: 'Transcription failed' }, + { status: res.status } + ); + } + + const result = await res.json(); + return NextResponse.json(result); + } catch (error) { + console.error('Transcribe proxy error:', error); + return NextResponse.json({ error: 'Transcription failed' }, { status: 500 }); + } +} diff --git a/src/app/notes/[id]/page.tsx b/src/app/notes/[id]/page.tsx index ae9f62e..10685a8 100644 --- a/src/app/notes/[id]/page.tsx +++ b/src/app/notes/[id]/page.tsx @@ -15,6 +15,7 @@ const TYPE_COLORS: Record = { CODE: 'bg-green-500/20 text-green-400', IMAGE: 'bg-pink-500/20 text-pink-400', FILE: 'bg-slate-500/20 text-slate-400', + AUDIO: 'bg-red-500/20 text-red-400', }; interface NoteData { @@ -28,6 +29,7 @@ interface NoteData { fileUrl: string | null; mimeType: string | null; fileSize: number | null; + duration: number | null; isPinned: boolean; canvasShapeId: string | null; createdAt: string; @@ -248,6 +250,16 @@ export default function NoteDetailPage() { )} + {note.fileUrl && note.type === 'AUDIO' && ( +
+
+ )} {/* Content */} {editing ? ( diff --git a/src/app/notes/new/page.tsx b/src/app/notes/new/page.tsx index f45f85a..05c1c75 100644 --- a/src/app/notes/new/page.tsx +++ b/src/app/notes/new/page.tsx @@ -5,6 +5,7 @@ import { useRouter, useSearchParams } from 'next/navigation'; import Link from 'next/link'; import { NoteEditor } from '@/components/NoteEditor'; import { FileUpload } from '@/components/FileUpload'; +import { VoiceRecorder } from '@/components/VoiceRecorder'; import { UserMenu } from '@/components/UserMenu'; import { authFetch } from '@/lib/authFetch'; @@ -15,6 +16,7 @@ const NOTE_TYPES = [ { value: 'CODE', label: 'Code', desc: 'Code snippet' }, { value: 'IMAGE', label: 'Image', desc: 'Upload image' }, { value: 'FILE', label: 'File', desc: 'Upload file' }, + { value: 'AUDIO', label: 'Audio', desc: 'Voice recording' }, ]; interface NotebookOption { @@ -51,6 +53,7 @@ function NewNoteForm() { const [fileUrl, setFileUrl] = useState(''); const [mimeType, setMimeType] = useState(''); const [fileSize, setFileSize] = useState(0); + const [duration, setDuration] = useState(0); const [notebookId, setNotebookId] = useState(preselectedNotebook || ''); const [notebooks, setNotebooks] = useState([]); const [saving, setSaving] = useState(false); @@ -80,6 +83,7 @@ function NewNoteForm() { if (fileUrl) body.fileUrl = fileUrl; if (mimeType) body.mimeType = mimeType; if (fileSize) body.fileSize = fileSize; + if (duration) body.duration = duration; const endpoint = notebookId ? `/api/notebooks/${notebookId}/notes` @@ -105,6 +109,7 @@ function NewNoteForm() { const showUrl = ['CLIP', 'BOOKMARK'].includes(type); const showUpload = ['IMAGE', 'FILE'].includes(type); const showLanguage = type === 'CODE'; + const showRecorder = type === 'AUDIO'; return (
@@ -234,16 +239,43 @@ function NewNoteForm() {
)} + {/* Voice recorder */} + {showRecorder && ( +
+ + { + setFileUrl(result.fileUrl); + setMimeType(result.mimeType); + setFileSize(result.fileSize); + setDuration(result.duration); + setContent(result.transcript); + if (!title) setTitle(`Voice note ${new Date().toLocaleDateString()}`); + }} + /> + {content && ( +
+ +
+ {content} +
+
+ )} +
+ )} + {/* Content */} -
- - -
+ {!showRecorder && ( +
+ + +
+ )} {/* Notebook */}
diff --git a/src/components/NoteCard.tsx b/src/components/NoteCard.tsx index 51d1284..d2e7f7c 100644 --- a/src/components/NoteCard.tsx +++ b/src/components/NoteCard.tsx @@ -10,6 +10,7 @@ const TYPE_COLORS: Record = { CODE: 'bg-green-500/20 text-green-400', IMAGE: 'bg-pink-500/20 text-pink-400', FILE: 'bg-slate-500/20 text-slate-400', + AUDIO: 'bg-red-500/20 text-red-400', }; interface NoteCardProps { diff --git a/src/components/VoiceRecorder.tsx b/src/components/VoiceRecorder.tsx new file mode 100644 index 0000000..8fa4673 --- /dev/null +++ b/src/components/VoiceRecorder.tsx @@ -0,0 +1,235 @@ +'use client'; + +import { useState, useRef, useCallback, useEffect } from 'react'; +import { authFetch } from '@/lib/authFetch'; + +interface VoiceRecorderResult { + fileUrl: string; + mimeType: string; + fileSize: number; + duration: number; + transcript: string; +} + +interface VoiceRecorderProps { + onResult: (result: VoiceRecorderResult) => void; + className?: string; +} + +export function VoiceRecorder({ onResult, className }: VoiceRecorderProps) { + const [recording, setRecording] = useState(false); + const [processing, setProcessing] = useState(false); + const [processingStep, setProcessingStep] = useState(''); + const [elapsed, setElapsed] = useState(0); + const [error, setError] = useState(null); + const [audioUrl, setAudioUrl] = useState(null); + + const mediaRecorderRef = useRef(null); + const chunksRef = useRef([]); + const timerRef = useRef | null>(null); + const startTimeRef = useRef(0); + + useEffect(() => { + return () => { + if (timerRef.current) clearInterval(timerRef.current); + if (audioUrl) URL.revokeObjectURL(audioUrl); + }; + }, [audioUrl]); + + const formatTime = (seconds: number) => { + const m = Math.floor(seconds / 60).toString().padStart(2, '0'); + const s = (seconds % 60).toString().padStart(2, '0'); + return `${m}:${s}`; + }; + + const startRecording = useCallback(async () => { + setError(null); + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(stream, { + mimeType: MediaRecorder.isTypeSupported('audio/webm;codecs=opus') + ? 'audio/webm;codecs=opus' + : 'audio/webm', + }); + + chunksRef.current = []; + mediaRecorder.ondataavailable = (e) => { + if (e.data.size > 0) chunksRef.current.push(e.data); + }; + + mediaRecorder.onstop = () => { + stream.getTracks().forEach((t) => t.stop()); + }; + + mediaRecorder.start(1000); + mediaRecorderRef.current = mediaRecorder; + startTimeRef.current = Date.now(); + setRecording(true); + setElapsed(0); + + timerRef.current = setInterval(() => { + setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000)); + }, 1000); + } catch (err) { + setError(err instanceof Error ? err.message : 'Microphone access denied'); + } + }, []); + + const stopRecording = useCallback(async () => { + const mediaRecorder = mediaRecorderRef.current; + if (!mediaRecorder || mediaRecorder.state === 'inactive') return; + + if (timerRef.current) { + clearInterval(timerRef.current); + timerRef.current = null; + } + + const duration = Math.floor((Date.now() - startTimeRef.current) / 1000); + setRecording(false); + setProcessing(true); + + // Wait for final data + const blob = await new Promise((resolve) => { + mediaRecorder.onstop = () => { + mediaRecorder.stream.getTracks().forEach((t) => t.stop()); + resolve(new Blob(chunksRef.current, { type: mediaRecorder.mimeType })); + }; + mediaRecorder.stop(); + }); + + // Preview URL + const previewUrl = URL.createObjectURL(blob); + setAudioUrl(previewUrl); + + try { + // Upload audio file + setProcessingStep('Uploading audio...'); + const uploadForm = new FormData(); + uploadForm.append('file', blob, 'recording.webm'); + + const uploadRes = await authFetch('/api/uploads', { + method: 'POST', + body: uploadForm, + }); + + if (!uploadRes.ok) { + const data = await uploadRes.json(); + throw new Error(data.error || 'Upload failed'); + } + + const uploadResult = await uploadRes.json(); + + // Transcribe + setProcessingStep('Transcribing...'); + const transcribeForm = new FormData(); + transcribeForm.append('audio', blob, 'recording.webm'); + + const transcribeRes = await authFetch('/api/voice/transcribe', { + method: 'POST', + body: transcribeForm, + }); + + let transcript = ''; + if (transcribeRes.ok) { + const transcribeResult = await transcribeRes.json(); + transcript = transcribeResult.text || ''; + } else { + console.warn('Transcription failed, saving audio without transcript'); + } + + onResult({ + fileUrl: uploadResult.url, + mimeType: uploadResult.mimeType, + fileSize: uploadResult.size, + duration, + transcript, + }); + } catch (err) { + setError(err instanceof Error ? err.message : 'Processing failed'); + } finally { + setProcessing(false); + setProcessingStep(''); + } + }, [onResult]); + + const discard = useCallback(() => { + if (audioUrl) { + URL.revokeObjectURL(audioUrl); + setAudioUrl(null); + } + setElapsed(0); + setError(null); + }, [audioUrl]); + + return ( +
+
+ {/* Recording controls */} +
+ {!recording && !processing && !audioUrl && ( + <> + +

Tap to start recording

+ + )} + + {recording && ( + <> +
+ + {formatTime(elapsed)} +
+ +

Tap to stop

+ + )} + + {processing && ( +
+ + + + +

{processingStep}

+
+ )} + + {audioUrl && !processing && ( +
+
+ )} +
+ + {error && ( +

{error}

+ )} +
+
+ ); +}