feat: add AUDIO note type with voice recording and transcription

- Add AUDIO to NoteType enum, duration field to Note model
- New VoiceRecorder component (MediaRecorder API, upload, transcribe)
- New /api/voice/transcribe proxy route to voice-command-api container
- Audio MIME types added to upload whitelist
- Audio player + transcript display on note detail page
- AUDIO type button on new note page with recorder UI

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-02-15 08:34:50 -07:00
parent e450381e2f
commit d7a2372a56
10 changed files with 341 additions and 11 deletions

View File

@ -11,6 +11,7 @@ services:
- RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000}
- NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://encryptid.jeffemmett.com}
- RSPACE_INTERNAL_KEY=${RSPACE_INTERNAL_KEY}
- VOICE_API_URL=${VOICE_API_URL:-http://voice-command-api:8000}
volumes:
- uploads_data:/app/uploads
labels:

View File

@ -79,6 +79,7 @@ model Note {
mimeType String?
fileUrl String?
fileSize Int?
duration Int?
isPinned Boolean @default(false)
canvasShapeId String?
sortOrder Int @default(0)
@ -100,6 +101,7 @@ enum NoteType {
CODE
IMAGE
FILE
AUDIO
}
// ─── Tags ───────────────────────────────────────────────────────────

View File

@ -37,7 +37,7 @@ export async function POST(
}
const body = await request.json();
const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize } = body;
const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
if (!title?.trim()) {
return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@ -73,6 +73,7 @@ export async function POST(
fileUrl: fileUrl || null,
mimeType: mimeType || null,
fileSize: fileSize || null,
duration: duration || null,
tags: {
create: tagRecords.map((tag) => ({
tagId: tag.id,

View File

@ -43,7 +43,7 @@ export async function POST(request: NextRequest) {
if (!isAuthed(auth)) return auth;
const { user } = auth;
const body = await request.json();
const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize } = body;
const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
if (!title?.trim()) {
return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@ -79,6 +79,7 @@ export async function POST(request: NextRequest) {
fileUrl: fileUrl || null,
mimeType: mimeType || null,
fileSize: fileSize || null,
duration: duration || null,
tags: {
create: tagRecords.map((tag) => ({
tagId: tag.id,

View File

@ -19,6 +19,9 @@ const ALLOWED_MIME_TYPES = new Set([
// Code
'text/javascript', 'text/typescript', 'text/html', 'text/css',
'application/x-python-code', 'text/x-python',
// Audio
'audio/webm', 'audio/mpeg', 'audio/wav', 'audio/ogg',
'audio/mp4', 'audio/x-m4a', 'audio/aac', 'audio/flac',
]);
function sanitizeFilename(name: string): string {

View File

@ -0,0 +1,42 @@
import { NextRequest, NextResponse } from 'next/server';
import { requireAuth, isAuthed } from '@/lib/auth';
const VOICE_API_URL = process.env.VOICE_API_URL || 'http://voice-command-api:8000';
export async function POST(request: NextRequest) {
try {
const auth = await requireAuth(request);
if (!isAuthed(auth)) return auth;
const formData = await request.formData();
const audio = formData.get('audio') as File | null;
if (!audio) {
return NextResponse.json({ error: 'No audio file provided' }, { status: 400 });
}
// Forward to voice-command API
const proxyForm = new FormData();
proxyForm.append('audio', audio, audio.name || 'recording.webm');
const res = await fetch(`${VOICE_API_URL}/api/voice/transcribe`, {
method: 'POST',
body: proxyForm,
});
if (!res.ok) {
const err = await res.text();
console.error('Voice API error:', res.status, err);
return NextResponse.json(
{ error: 'Transcription failed' },
{ status: res.status }
);
}
const result = await res.json();
return NextResponse.json(result);
} catch (error) {
console.error('Transcribe proxy error:', error);
return NextResponse.json({ error: 'Transcription failed' }, { status: 500 });
}
}

View File

@ -15,6 +15,7 @@ const TYPE_COLORS: Record<string, string> = {
CODE: 'bg-green-500/20 text-green-400',
IMAGE: 'bg-pink-500/20 text-pink-400',
FILE: 'bg-slate-500/20 text-slate-400',
AUDIO: 'bg-red-500/20 text-red-400',
};
interface NoteData {
@ -28,6 +29,7 @@ interface NoteData {
fileUrl: string | null;
mimeType: string | null;
fileSize: number | null;
duration: number | null;
isPinned: boolean;
canvasShapeId: string | null;
createdAt: string;
@ -248,6 +250,16 @@ export default function NoteDetailPage() {
</a>
</div>
)}
{note.fileUrl && note.type === 'AUDIO' && (
<div className="mb-6 p-4 bg-slate-800/50 border border-slate-700 rounded-lg space-y-3">
<audio controls src={note.fileUrl} className="w-full" />
<div className="flex items-center gap-3 text-xs text-slate-500">
{note.duration != null && <span>{Math.floor(note.duration / 60)}:{(note.duration % 60).toString().padStart(2, '0')}</span>}
{note.mimeType && <span>{note.mimeType}</span>}
{note.fileSize && <span>{(note.fileSize / 1024).toFixed(1)} KB</span>}
</div>
</div>
)}
{/* Content */}
{editing ? (

View File

@ -5,6 +5,7 @@ import { useRouter, useSearchParams } from 'next/navigation';
import Link from 'next/link';
import { NoteEditor } from '@/components/NoteEditor';
import { FileUpload } from '@/components/FileUpload';
import { VoiceRecorder } from '@/components/VoiceRecorder';
import { UserMenu } from '@/components/UserMenu';
import { authFetch } from '@/lib/authFetch';
@ -15,6 +16,7 @@ const NOTE_TYPES = [
{ value: 'CODE', label: 'Code', desc: 'Code snippet' },
{ value: 'IMAGE', label: 'Image', desc: 'Upload image' },
{ value: 'FILE', label: 'File', desc: 'Upload file' },
{ value: 'AUDIO', label: 'Audio', desc: 'Voice recording' },
];
interface NotebookOption {
@ -51,6 +53,7 @@ function NewNoteForm() {
const [fileUrl, setFileUrl] = useState('');
const [mimeType, setMimeType] = useState('');
const [fileSize, setFileSize] = useState(0);
const [duration, setDuration] = useState(0);
const [notebookId, setNotebookId] = useState(preselectedNotebook || '');
const [notebooks, setNotebooks] = useState<NotebookOption[]>([]);
const [saving, setSaving] = useState(false);
@ -80,6 +83,7 @@ function NewNoteForm() {
if (fileUrl) body.fileUrl = fileUrl;
if (mimeType) body.mimeType = mimeType;
if (fileSize) body.fileSize = fileSize;
if (duration) body.duration = duration;
const endpoint = notebookId
? `/api/notebooks/${notebookId}/notes`
@ -105,6 +109,7 @@ function NewNoteForm() {
const showUrl = ['CLIP', 'BOOKMARK'].includes(type);
const showUpload = ['IMAGE', 'FILE'].includes(type);
const showLanguage = type === 'CODE';
const showRecorder = type === 'AUDIO';
return (
<div className="min-h-screen bg-[#0a0a0a]">
@ -234,16 +239,43 @@ function NewNoteForm() {
</div>
)}
{/* Voice recorder */}
{showRecorder && (
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Recording</label>
<VoiceRecorder
onResult={(result) => {
setFileUrl(result.fileUrl);
setMimeType(result.mimeType);
setFileSize(result.fileSize);
setDuration(result.duration);
setContent(result.transcript);
if (!title) setTitle(`Voice note ${new Date().toLocaleDateString()}`);
}}
/>
{content && (
<div className="mt-4">
<label className="block text-sm font-medium text-slate-300 mb-2">Transcript</label>
<div className="p-4 bg-slate-800/50 border border-slate-700 rounded-lg text-slate-300 text-sm leading-relaxed">
{content}
</div>
</div>
)}
</div>
)}
{/* Content */}
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Content</label>
<NoteEditor
value={content}
onChange={setContent}
type={type}
placeholder={type === 'CODE' ? 'Paste your code here...' : 'Write in Markdown...'}
/>
</div>
{!showRecorder && (
<div>
<label className="block text-sm font-medium text-slate-300 mb-2">Content</label>
<NoteEditor
value={content}
onChange={setContent}
type={type}
placeholder={type === 'CODE' ? 'Paste your code here...' : 'Write in Markdown...'}
/>
</div>
)}
{/* Notebook */}
<div>

View File

@ -10,6 +10,7 @@ const TYPE_COLORS: Record<string, string> = {
CODE: 'bg-green-500/20 text-green-400',
IMAGE: 'bg-pink-500/20 text-pink-400',
FILE: 'bg-slate-500/20 text-slate-400',
AUDIO: 'bg-red-500/20 text-red-400',
};
interface NoteCardProps {

View File

@ -0,0 +1,235 @@
'use client';
import { useState, useRef, useCallback, useEffect } from 'react';
import { authFetch } from '@/lib/authFetch';
interface VoiceRecorderResult {
fileUrl: string;
mimeType: string;
fileSize: number;
duration: number;
transcript: string;
}
interface VoiceRecorderProps {
onResult: (result: VoiceRecorderResult) => void;
className?: string;
}
export function VoiceRecorder({ onResult, className }: VoiceRecorderProps) {
const [recording, setRecording] = useState(false);
const [processing, setProcessing] = useState(false);
const [processingStep, setProcessingStep] = useState('');
const [elapsed, setElapsed] = useState(0);
const [error, setError] = useState<string | null>(null);
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
const startTimeRef = useRef<number>(0);
useEffect(() => {
return () => {
if (timerRef.current) clearInterval(timerRef.current);
if (audioUrl) URL.revokeObjectURL(audioUrl);
};
}, [audioUrl]);
const formatTime = (seconds: number) => {
const m = Math.floor(seconds / 60).toString().padStart(2, '0');
const s = (seconds % 60).toString().padStart(2, '0');
return `${m}:${s}`;
};
const startRecording = useCallback(async () => {
setError(null);
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const mediaRecorder = new MediaRecorder(stream, {
mimeType: MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
? 'audio/webm;codecs=opus'
: 'audio/webm',
});
chunksRef.current = [];
mediaRecorder.ondataavailable = (e) => {
if (e.data.size > 0) chunksRef.current.push(e.data);
};
mediaRecorder.onstop = () => {
stream.getTracks().forEach((t) => t.stop());
};
mediaRecorder.start(1000);
mediaRecorderRef.current = mediaRecorder;
startTimeRef.current = Date.now();
setRecording(true);
setElapsed(0);
timerRef.current = setInterval(() => {
setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000));
}, 1000);
} catch (err) {
setError(err instanceof Error ? err.message : 'Microphone access denied');
}
}, []);
const stopRecording = useCallback(async () => {
const mediaRecorder = mediaRecorderRef.current;
if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
const duration = Math.floor((Date.now() - startTimeRef.current) / 1000);
setRecording(false);
setProcessing(true);
// Wait for final data
const blob = await new Promise<Blob>((resolve) => {
mediaRecorder.onstop = () => {
mediaRecorder.stream.getTracks().forEach((t) => t.stop());
resolve(new Blob(chunksRef.current, { type: mediaRecorder.mimeType }));
};
mediaRecorder.stop();
});
// Preview URL
const previewUrl = URL.createObjectURL(blob);
setAudioUrl(previewUrl);
try {
// Upload audio file
setProcessingStep('Uploading audio...');
const uploadForm = new FormData();
uploadForm.append('file', blob, 'recording.webm');
const uploadRes = await authFetch('/api/uploads', {
method: 'POST',
body: uploadForm,
});
if (!uploadRes.ok) {
const data = await uploadRes.json();
throw new Error(data.error || 'Upload failed');
}
const uploadResult = await uploadRes.json();
// Transcribe
setProcessingStep('Transcribing...');
const transcribeForm = new FormData();
transcribeForm.append('audio', blob, 'recording.webm');
const transcribeRes = await authFetch('/api/voice/transcribe', {
method: 'POST',
body: transcribeForm,
});
let transcript = '';
if (transcribeRes.ok) {
const transcribeResult = await transcribeRes.json();
transcript = transcribeResult.text || '';
} else {
console.warn('Transcription failed, saving audio without transcript');
}
onResult({
fileUrl: uploadResult.url,
mimeType: uploadResult.mimeType,
fileSize: uploadResult.size,
duration,
transcript,
});
} catch (err) {
setError(err instanceof Error ? err.message : 'Processing failed');
} finally {
setProcessing(false);
setProcessingStep('');
}
}, [onResult]);
const discard = useCallback(() => {
if (audioUrl) {
URL.revokeObjectURL(audioUrl);
setAudioUrl(null);
}
setElapsed(0);
setError(null);
}, [audioUrl]);
return (
<div className={className}>
<div className="border border-slate-700 rounded-lg p-6 bg-slate-800/30">
{/* Recording controls */}
<div className="flex flex-col items-center gap-4">
{!recording && !processing && !audioUrl && (
<>
<button
type="button"
onClick={startRecording}
className="w-20 h-20 rounded-full bg-red-500 hover:bg-red-400 transition-colors flex items-center justify-center"
>
<svg className="w-8 h-8 text-white" fill="currentColor" viewBox="0 0 24 24">
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5z" />
<path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" />
</svg>
</button>
<p className="text-sm text-slate-400">Tap to start recording</p>
</>
)}
{recording && (
<>
<div className="flex items-center gap-3">
<span className="w-3 h-3 rounded-full bg-red-500 animate-pulse" />
<span className="text-2xl font-mono text-white">{formatTime(elapsed)}</span>
</div>
<button
type="button"
onClick={stopRecording}
className="w-20 h-20 rounded-full bg-slate-700 hover:bg-slate-600 transition-colors flex items-center justify-center border-2 border-red-500"
>
<div className="w-7 h-7 rounded bg-red-500" />
</button>
<p className="text-sm text-slate-400">Tap to stop</p>
</>
)}
{processing && (
<div className="flex flex-col items-center gap-3 py-4">
<svg className="animate-spin h-8 w-8 text-amber-400" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
<p className="text-sm text-slate-400">{processingStep}</p>
</div>
)}
{audioUrl && !processing && (
<div className="w-full space-y-3">
<audio controls src={audioUrl} className="w-full" />
<div className="flex items-center justify-between">
<span className="text-sm text-slate-400">{formatTime(elapsed)} recorded</span>
<button
type="button"
onClick={discard}
className="text-sm text-slate-400 hover:text-red-400 transition-colors"
>
Discard &amp; re-record
</button>
</div>
</div>
)}
</div>
{error && (
<p className="text-red-400 text-sm mt-4 text-center">{error}</p>
)}
</div>
</div>
);
}