feat: add AUDIO note type with voice recording and transcription

- Add AUDIO to NoteType enum, duration field to Note model - New VoiceRecorder component (MediaRecorder API, upload, transcribe) - New /api/voice/transcribe proxy route to voice-command-api container - Audio MIME types added to upload whitelist - Audio player + transcript display on note detail page - AUDIO type button on new note page with recorder UI Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 08:34:50 -07:00 · 2026-02-15 08:34:50 -07:00 · d7a2372a56
parent e450381e2f
commit d7a2372a56
10 changed files with 341 additions and 11 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -11,6 +11,7 @@ services:
      - RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000}
      - NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://encryptid.jeffemmett.com}
      - RSPACE_INTERNAL_KEY=${RSPACE_INTERNAL_KEY}
+      - VOICE_API_URL=${VOICE_API_URL:-http://voice-command-api:8000}
    volumes:
      - uploads_data:/app/uploads
    labels:
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@ -79,6 +79,7 @@ model Note {
  mimeType      String?
  fileUrl       String?
  fileSize      Int?
+  duration      Int?
  isPinned      Boolean  @default(false)
  canvasShapeId String?
  sortOrder     Int      @default(0)
@ -100,6 +101,7 @@ enum NoteType {
  CODE
  IMAGE
  FILE
+  AUDIO
 }

 // ─── Tags ───────────────────────────────────────────────────────────
--- a/src/app/api/notebooks/[id]/notes/route.ts
+++ b/src/app/api/notebooks/[id]/notes/route.ts
@ -37,7 +37,7 @@ export async function POST(
    }

    const body = await request.json();
-    const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize } = body;
+    const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;

    if (!title?.trim()) {
      return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@ -73,6 +73,7 @@ export async function POST(
        fileUrl: fileUrl || null,
        mimeType: mimeType || null,
        fileSize: fileSize || null,
+        duration: duration || null,
        tags: {
          create: tagRecords.map((tag) => ({
            tagId: tag.id,
--- a/src/app/api/notes/route.ts
+++ b/src/app/api/notes/route.ts
@ -43,7 +43,7 @@ export async function POST(request: NextRequest) {
    if (!isAuthed(auth)) return auth;
    const { user } = auth;
    const body = await request.json();
-    const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize } = body;
+    const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;

    if (!title?.trim()) {
      return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@ -79,6 +79,7 @@ export async function POST(request: NextRequest) {
        fileUrl: fileUrl || null,
        mimeType: mimeType || null,
        fileSize: fileSize || null,
+        duration: duration || null,
        tags: {
          create: tagRecords.map((tag) => ({
            tagId: tag.id,
--- a/src/app/api/uploads/route.ts
+++ b/src/app/api/uploads/route.ts
@ -19,6 +19,9 @@ const ALLOWED_MIME_TYPES = new Set([
  // Code
  'text/javascript', 'text/typescript', 'text/html', 'text/css',
  'application/x-python-code', 'text/x-python',
+  // Audio
+  'audio/webm', 'audio/mpeg', 'audio/wav', 'audio/ogg',
+  'audio/mp4', 'audio/x-m4a', 'audio/aac', 'audio/flac',
 ]);

 function sanitizeFilename(name: string): string {
--- a/src/app/api/voice/transcribe/route.ts
+++ b/src/app/api/voice/transcribe/route.ts
@ -0,0 +1,42 @@
+import { NextRequest, NextResponse } from 'next/server';
+import { requireAuth, isAuthed } from '@/lib/auth';
+
+const VOICE_API_URL = process.env.VOICE_API_URL || 'http://voice-command-api:8000';
+
+export async function POST(request: NextRequest) {
+  try {
+    const auth = await requireAuth(request);
+    if (!isAuthed(auth)) return auth;
+
+    const formData = await request.formData();
+    const audio = formData.get('audio') as File | null;
+
+    if (!audio) {
+      return NextResponse.json({ error: 'No audio file provided' }, { status: 400 });
+    }
+
+    // Forward to voice-command API
+    const proxyForm = new FormData();
+    proxyForm.append('audio', audio, audio.name || 'recording.webm');
+
+    const res = await fetch(`${VOICE_API_URL}/api/voice/transcribe`, {
+      method: 'POST',
+      body: proxyForm,
+    });
+
+    if (!res.ok) {
+      const err = await res.text();
+      console.error('Voice API error:', res.status, err);
+      return NextResponse.json(
+        { error: 'Transcription failed' },
+        { status: res.status }
+      );
+    }
+
+    const result = await res.json();
+    return NextResponse.json(result);
+  } catch (error) {
+    console.error('Transcribe proxy error:', error);
+    return NextResponse.json({ error: 'Transcription failed' }, { status: 500 });
+  }
+}
--- a/src/app/notes/[id]/page.tsx
+++ b/src/app/notes/[id]/page.tsx
@ -15,6 +15,7 @@ const TYPE_COLORS: Record<string, string> = {
  CODE: 'bg-green-500/20 text-green-400',
  IMAGE: 'bg-pink-500/20 text-pink-400',
  FILE: 'bg-slate-500/20 text-slate-400',
+  AUDIO: 'bg-red-500/20 text-red-400',
 };

 interface NoteData {
@ -28,6 +29,7 @@ interface NoteData {
  fileUrl: string | null;
  mimeType: string | null;
  fileSize: number | null;
+  duration: number | null;
  isPinned: boolean;
  canvasShapeId: string | null;
  createdAt: string;
@ -248,6 +250,16 @@ export default function NoteDetailPage() {
            </a>
          </div>
        )}
+        {note.fileUrl && note.type === 'AUDIO' && (
+          <div className="mb-6 p-4 bg-slate-800/50 border border-slate-700 rounded-lg space-y-3">
+            <audio controls src={note.fileUrl} className="w-full" />
+            <div className="flex items-center gap-3 text-xs text-slate-500">
+              {note.duration != null && <span>{Math.floor(note.duration / 60)}:{(note.duration % 60).toString().padStart(2, '0')}</span>}
+              {note.mimeType && <span>{note.mimeType}</span>}
+              {note.fileSize && <span>{(note.fileSize / 1024).toFixed(1)} KB</span>}
+            </div>
+          </div>
+        )}

        {/* Content */}
        {editing ? (
--- a/src/app/notes/new/page.tsx
+++ b/src/app/notes/new/page.tsx
@ -5,6 +5,7 @@ import { useRouter, useSearchParams } from 'next/navigation';
 import Link from 'next/link';
 import { NoteEditor } from '@/components/NoteEditor';
 import { FileUpload } from '@/components/FileUpload';
+import { VoiceRecorder } from '@/components/VoiceRecorder';
 import { UserMenu } from '@/components/UserMenu';
 import { authFetch } from '@/lib/authFetch';

@ -15,6 +16,7 @@ const NOTE_TYPES = [
  { value: 'CODE', label: 'Code', desc: 'Code snippet' },
  { value: 'IMAGE', label: 'Image', desc: 'Upload image' },
  { value: 'FILE', label: 'File', desc: 'Upload file' },
+  { value: 'AUDIO', label: 'Audio', desc: 'Voice recording' },
 ];

 interface NotebookOption {
@ -51,6 +53,7 @@ function NewNoteForm() {
  const [fileUrl, setFileUrl] = useState('');
  const [mimeType, setMimeType] = useState('');
  const [fileSize, setFileSize] = useState(0);
+  const [duration, setDuration] = useState(0);
  const [notebookId, setNotebookId] = useState(preselectedNotebook || '');
  const [notebooks, setNotebooks] = useState<NotebookOption[]>([]);
  const [saving, setSaving] = useState(false);
@ -80,6 +83,7 @@ function NewNoteForm() {
      if (fileUrl) body.fileUrl = fileUrl;
      if (mimeType) body.mimeType = mimeType;
      if (fileSize) body.fileSize = fileSize;
+      if (duration) body.duration = duration;

      const endpoint = notebookId
        ? `/api/notebooks/${notebookId}/notes`
@ -105,6 +109,7 @@ function NewNoteForm() {
  const showUrl = ['CLIP', 'BOOKMARK'].includes(type);
  const showUpload = ['IMAGE', 'FILE'].includes(type);
  const showLanguage = type === 'CODE';
+  const showRecorder = type === 'AUDIO';

  return (
    <div className="min-h-screen bg-[#0a0a0a]">
@ -234,16 +239,43 @@ function NewNoteForm() {
            </div>
          )}

+          {/* Voice recorder */}
+          {showRecorder && (
+            <div>
+              <label className="block text-sm font-medium text-slate-300 mb-2">Recording</label>
+              <VoiceRecorder
+                onResult={(result) => {
+                  setFileUrl(result.fileUrl);
+                  setMimeType(result.mimeType);
+                  setFileSize(result.fileSize);
+                  setDuration(result.duration);
+                  setContent(result.transcript);
+                  if (!title) setTitle(`Voice note ${new Date().toLocaleDateString()}`);
+                }}
+              />
+              {content && (
+                <div className="mt-4">
+                  <label className="block text-sm font-medium text-slate-300 mb-2">Transcript</label>
+                  <div className="p-4 bg-slate-800/50 border border-slate-700 rounded-lg text-slate-300 text-sm leading-relaxed">
+                    {content}
+                  </div>
+                </div>
+              )}
+            </div>
+          )}
+
          {/* Content */}
-          <div>
-            <label className="block text-sm font-medium text-slate-300 mb-2">Content</label>
-            <NoteEditor
-              value={content}
-              onChange={setContent}
-              type={type}
-              placeholder={type === 'CODE' ? 'Paste your code here...' : 'Write in Markdown...'}
-            />
-          </div>
+          {!showRecorder && (
+            <div>
+              <label className="block text-sm font-medium text-slate-300 mb-2">Content</label>
+              <NoteEditor
+                value={content}
+                onChange={setContent}
+                type={type}
+                placeholder={type === 'CODE' ? 'Paste your code here...' : 'Write in Markdown...'}
+              />
+            </div>
+          )}

          {/* Notebook */}
          <div>
--- a/src/components/NoteCard.tsx
+++ b/src/components/NoteCard.tsx
@ -10,6 +10,7 @@ const TYPE_COLORS: Record<string, string> = {
  CODE: 'bg-green-500/20 text-green-400',
  IMAGE: 'bg-pink-500/20 text-pink-400',
  FILE: 'bg-slate-500/20 text-slate-400',
+  AUDIO: 'bg-red-500/20 text-red-400',
 };

 interface NoteCardProps {
--- a/src/components/VoiceRecorder.tsx
+++ b/src/components/VoiceRecorder.tsx
@ -0,0 +1,235 @@
+'use client';
+
+import { useState, useRef, useCallback, useEffect } from 'react';
+import { authFetch } from '@/lib/authFetch';
+
+interface VoiceRecorderResult {
+  fileUrl: string;
+  mimeType: string;
+  fileSize: number;
+  duration: number;
+  transcript: string;
+}
+
+interface VoiceRecorderProps {
+  onResult: (result: VoiceRecorderResult) => void;
+  className?: string;
+}
+
+export function VoiceRecorder({ onResult, className }: VoiceRecorderProps) {
+  const [recording, setRecording] = useState(false);
+  const [processing, setProcessing] = useState(false);
+  const [processingStep, setProcessingStep] = useState('');
+  const [elapsed, setElapsed] = useState(0);
+  const [error, setError] = useState<string | null>(null);
+  const [audioUrl, setAudioUrl] = useState<string | null>(null);
+
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+  const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
+  const startTimeRef = useRef<number>(0);
+
+  useEffect(() => {
+    return () => {
+      if (timerRef.current) clearInterval(timerRef.current);
+      if (audioUrl) URL.revokeObjectURL(audioUrl);
+    };
+  }, [audioUrl]);
+
+  const formatTime = (seconds: number) => {
+    const m = Math.floor(seconds / 60).toString().padStart(2, '0');
+    const s = (seconds % 60).toString().padStart(2, '0');
+    return `${m}:${s}`;
+  };
+
+  const startRecording = useCallback(async () => {
+    setError(null);
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const mediaRecorder = new MediaRecorder(stream, {
+        mimeType: MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
+          ? 'audio/webm;codecs=opus'
+          : 'audio/webm',
+      });
+
+      chunksRef.current = [];
+      mediaRecorder.ondataavailable = (e) => {
+        if (e.data.size > 0) chunksRef.current.push(e.data);
+      };
+
+      mediaRecorder.onstop = () => {
+        stream.getTracks().forEach((t) => t.stop());
+      };
+
+      mediaRecorder.start(1000);
+      mediaRecorderRef.current = mediaRecorder;
+      startTimeRef.current = Date.now();
+      setRecording(true);
+      setElapsed(0);
+
+      timerRef.current = setInterval(() => {
+        setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000));
+      }, 1000);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Microphone access denied');
+    }
+  }, []);
+
+  const stopRecording = useCallback(async () => {
+    const mediaRecorder = mediaRecorderRef.current;
+    if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
+
+    if (timerRef.current) {
+      clearInterval(timerRef.current);
+      timerRef.current = null;
+    }
+
+    const duration = Math.floor((Date.now() - startTimeRef.current) / 1000);
+    setRecording(false);
+    setProcessing(true);
+
+    // Wait for final data
+    const blob = await new Promise<Blob>((resolve) => {
+      mediaRecorder.onstop = () => {
+        mediaRecorder.stream.getTracks().forEach((t) => t.stop());
+        resolve(new Blob(chunksRef.current, { type: mediaRecorder.mimeType }));
+      };
+      mediaRecorder.stop();
+    });
+
+    // Preview URL
+    const previewUrl = URL.createObjectURL(blob);
+    setAudioUrl(previewUrl);
+
+    try {
+      // Upload audio file
+      setProcessingStep('Uploading audio...');
+      const uploadForm = new FormData();
+      uploadForm.append('file', blob, 'recording.webm');
+
+      const uploadRes = await authFetch('/api/uploads', {
+        method: 'POST',
+        body: uploadForm,
+      });
+
+      if (!uploadRes.ok) {
+        const data = await uploadRes.json();
+        throw new Error(data.error || 'Upload failed');
+      }
+
+      const uploadResult = await uploadRes.json();
+
+      // Transcribe
+      setProcessingStep('Transcribing...');
+      const transcribeForm = new FormData();
+      transcribeForm.append('audio', blob, 'recording.webm');
+
+      const transcribeRes = await authFetch('/api/voice/transcribe', {
+        method: 'POST',
+        body: transcribeForm,
+      });
+
+      let transcript = '';
+      if (transcribeRes.ok) {
+        const transcribeResult = await transcribeRes.json();
+        transcript = transcribeResult.text || '';
+      } else {
+        console.warn('Transcription failed, saving audio without transcript');
+      }
+
+      onResult({
+        fileUrl: uploadResult.url,
+        mimeType: uploadResult.mimeType,
+        fileSize: uploadResult.size,
+        duration,
+        transcript,
+      });
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Processing failed');
+    } finally {
+      setProcessing(false);
+      setProcessingStep('');
+    }
+  }, [onResult]);
+
+  const discard = useCallback(() => {
+    if (audioUrl) {
+      URL.revokeObjectURL(audioUrl);
+      setAudioUrl(null);
+    }
+    setElapsed(0);
+    setError(null);
+  }, [audioUrl]);
+
+  return (
+    <div className={className}>
+      <div className="border border-slate-700 rounded-lg p-6 bg-slate-800/30">
+        {/* Recording controls */}
+        <div className="flex flex-col items-center gap-4">
+          {!recording && !processing && !audioUrl && (
+            <>
+              <button
+                type="button"
+                onClick={startRecording}
+                className="w-20 h-20 rounded-full bg-red-500 hover:bg-red-400 transition-colors flex items-center justify-center"
+              >
+                <svg className="w-8 h-8 text-white" fill="currentColor" viewBox="0 0 24 24">
+                  <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5z" />
+                  <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" />
+                </svg>
+              </button>
+              <p className="text-sm text-slate-400">Tap to start recording</p>
+            </>
+          )}
+
+          {recording && (
+            <>
+              <div className="flex items-center gap-3">
+                <span className="w-3 h-3 rounded-full bg-red-500 animate-pulse" />
+                <span className="text-2xl font-mono text-white">{formatTime(elapsed)}</span>
+              </div>
+              <button
+                type="button"
+                onClick={stopRecording}
+                className="w-20 h-20 rounded-full bg-slate-700 hover:bg-slate-600 transition-colors flex items-center justify-center border-2 border-red-500"
+              >
+                <div className="w-7 h-7 rounded bg-red-500" />
+              </button>
+              <p className="text-sm text-slate-400">Tap to stop</p>
+            </>
+          )}
+
+          {processing && (
+            <div className="flex flex-col items-center gap-3 py-4">
+              <svg className="animate-spin h-8 w-8 text-amber-400" viewBox="0 0 24 24">
+                <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
+                <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
+              </svg>
+              <p className="text-sm text-slate-400">{processingStep}</p>
+            </div>
+          )}
+
+          {audioUrl && !processing && (
+            <div className="w-full space-y-3">
+              <audio controls src={audioUrl} className="w-full" />
+              <div className="flex items-center justify-between">
+                <span className="text-sm text-slate-400">{formatTime(elapsed)} recorded</span>
+                <button
+                  type="button"
+                  onClick={discard}
+                  className="text-sm text-slate-400 hover:text-red-400 transition-colors"
+                >
+                  Discard &amp; re-record
+                </button>
+              </div>
+            </div>
+          )}
+        </div>
+
+        {error && (
+          <p className="text-red-400 text-sm mt-4 text-center">{error}</p>
+        )}
+      </div>
+    </div>
+  );
+}