From d7a2372a56d053a5f70c99c27594ffa31c4a435a Mon Sep 17 00:00:00 2001
From: Jeff Emmett <jeffemmett@gmail.com>
Date: Sun, 15 Feb 2026 08:34:50 -0700
Subject: [PATCH] feat: add AUDIO note type with voice recording and
 transcription

- Add AUDIO to NoteType enum, duration field to Note model
- New VoiceRecorder component (MediaRecorder API, upload, transcribe)
- New /api/voice/transcribe proxy route to voice-command-api container
- Audio MIME types added to upload whitelist
- Audio player + transcript display on note detail page
- AUDIO type button on new note page with recorder UI

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 docker-compose.yml                        |   1 +
 prisma/schema.prisma                      |   2 +
 src/app/api/notebooks/[id]/notes/route.ts |   3 +-
 src/app/api/notes/route.ts                |   3 +-
 src/app/api/uploads/route.ts              |   3 +
 src/app/api/voice/transcribe/route.ts     |  42 ++++
 src/app/notes/[id]/page.tsx               |  12 ++
 src/app/notes/new/page.tsx                |  50 ++++-
 src/components/NoteCard.tsx               |   1 +
 src/components/VoiceRecorder.tsx          | 235 ++++++++++++++++++++++
 10 files changed, 341 insertions(+), 11 deletions(-)
 create mode 100644 src/app/api/voice/transcribe/route.ts
 create mode 100644 src/components/VoiceRecorder.tsx

diff --git a/docker-compose.yml b/docker-compose.yml
index 160c5e0..df5cc57 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,6 +11,7 @@ services:
       - RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000}
       - NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://encryptid.jeffemmett.com}
       - RSPACE_INTERNAL_KEY=${RSPACE_INTERNAL_KEY}
+      - VOICE_API_URL=${VOICE_API_URL:-http://voice-command-api:8000}
     volumes:
       - uploads_data:/app/uploads
     labels:
diff --git a/prisma/schema.prisma b/prisma/schema.prisma
index d86122e..4b9786e 100644
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -79,6 +79,7 @@ model Note {
   mimeType      String?
   fileUrl       String?
   fileSize      Int?
+  duration      Int?
   isPinned      Boolean  @default(false)
   canvasShapeId String?
   sortOrder     Int      @default(0)
@@ -100,6 +101,7 @@ enum NoteType {
   CODE
   IMAGE
   FILE
+  AUDIO
 }
 
 // ─── Tags ───────────────────────────────────────────────────────────
diff --git a/src/app/api/notebooks/[id]/notes/route.ts b/src/app/api/notebooks/[id]/notes/route.ts
index f3206b6..1a08b15 100644
--- a/src/app/api/notebooks/[id]/notes/route.ts
+++ b/src/app/api/notebooks/[id]/notes/route.ts
@@ -37,7 +37,7 @@ export async function POST(
     }
 
     const body = await request.json();
-    const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize } = body;
+    const { title, content, type, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
 
     if (!title?.trim()) {
       return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@@ -73,6 +73,7 @@ export async function POST(
         fileUrl: fileUrl || null,
         mimeType: mimeType || null,
         fileSize: fileSize || null,
+        duration: duration || null,
         tags: {
           create: tagRecords.map((tag) => ({
             tagId: tag.id,
diff --git a/src/app/api/notes/route.ts b/src/app/api/notes/route.ts
index 27daa51..c7f8f7c 100644
--- a/src/app/api/notes/route.ts
+++ b/src/app/api/notes/route.ts
@@ -43,7 +43,7 @@ export async function POST(request: NextRequest) {
     if (!isAuthed(auth)) return auth;
     const { user } = auth;
     const body = await request.json();
-    const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize } = body;
+    const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
 
     if (!title?.trim()) {
       return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@@ -79,6 +79,7 @@ export async function POST(request: NextRequest) {
         fileUrl: fileUrl || null,
         mimeType: mimeType || null,
         fileSize: fileSize || null,
+        duration: duration || null,
         tags: {
           create: tagRecords.map((tag) => ({
             tagId: tag.id,
diff --git a/src/app/api/uploads/route.ts b/src/app/api/uploads/route.ts
index defb67d..d5e94db 100644
--- a/src/app/api/uploads/route.ts
+++ b/src/app/api/uploads/route.ts
@@ -19,6 +19,9 @@ const ALLOWED_MIME_TYPES = new Set([
   // Code
   'text/javascript', 'text/typescript', 'text/html', 'text/css',
   'application/x-python-code', 'text/x-python',
+  // Audio
+  'audio/webm', 'audio/mpeg', 'audio/wav', 'audio/ogg',
+  'audio/mp4', 'audio/x-m4a', 'audio/aac', 'audio/flac',
 ]);
 
 function sanitizeFilename(name: string): string {
diff --git a/src/app/api/voice/transcribe/route.ts b/src/app/api/voice/transcribe/route.ts
new file mode 100644
index 0000000..9b030b8
--- /dev/null
+++ b/src/app/api/voice/transcribe/route.ts
@@ -0,0 +1,42 @@
+import { NextRequest, NextResponse } from 'next/server';
+import { requireAuth, isAuthed } from '@/lib/auth';
+
+const VOICE_API_URL = process.env.VOICE_API_URL || 'http://voice-command-api:8000';
+
+export async function POST(request: NextRequest) {
+  try {
+    const auth = await requireAuth(request);
+    if (!isAuthed(auth)) return auth;
+
+    const formData = await request.formData();
+    const audio = formData.get('audio') as File | null;
+
+    if (!audio) {
+      return NextResponse.json({ error: 'No audio file provided' }, { status: 400 });
+    }
+
+    // Forward to voice-command API
+    const proxyForm = new FormData();
+    proxyForm.append('audio', audio, audio.name || 'recording.webm');
+
+    const res = await fetch(`${VOICE_API_URL}/api/voice/transcribe`, {
+      method: 'POST',
+      body: proxyForm,
+    });
+
+    if (!res.ok) {
+      const err = await res.text();
+      console.error('Voice API error:', res.status, err);
+      return NextResponse.json(
+        { error: 'Transcription failed' },
+        { status: res.status }
+      );
+    }
+
+    const result = await res.json();
+    return NextResponse.json(result);
+  } catch (error) {
+    console.error('Transcribe proxy error:', error);
+    return NextResponse.json({ error: 'Transcription failed' }, { status: 500 });
+  }
+}
diff --git a/src/app/notes/[id]/page.tsx b/src/app/notes/[id]/page.tsx
index ae9f62e..10685a8 100644
--- a/src/app/notes/[id]/page.tsx
+++ b/src/app/notes/[id]/page.tsx
@@ -15,6 +15,7 @@ const TYPE_COLORS: Record<string, string> = {
   CODE: 'bg-green-500/20 text-green-400',
   IMAGE: 'bg-pink-500/20 text-pink-400',
   FILE: 'bg-slate-500/20 text-slate-400',
+  AUDIO: 'bg-red-500/20 text-red-400',
 };
 
 interface NoteData {
@@ -28,6 +29,7 @@ interface NoteData {
   fileUrl: string | null;
   mimeType: string | null;
   fileSize: number | null;
+  duration: number | null;
   isPinned: boolean;
   canvasShapeId: string | null;
   createdAt: string;
@@ -248,6 +250,16 @@ export default function NoteDetailPage() {
             </a>
           </div>
         )}
+        {note.fileUrl && note.type === 'AUDIO' && (
+          <div className="mb-6 p-4 bg-slate-800/50 border border-slate-700 rounded-lg space-y-3">
+            <audio controls src={note.fileUrl} className="w-full" />
+            <div className="flex items-center gap-3 text-xs text-slate-500">
+              {note.duration != null && <span>{Math.floor(note.duration / 60)}:{(note.duration % 60).toString().padStart(2, '0')}</span>}
+              {note.mimeType && <span>{note.mimeType}</span>}
+              {note.fileSize && <span>{(note.fileSize / 1024).toFixed(1)} KB</span>}
+            </div>
+          </div>
+        )}
 
         {/* Content */}
         {editing ? (
diff --git a/src/app/notes/new/page.tsx b/src/app/notes/new/page.tsx
index f45f85a..05c1c75 100644
--- a/src/app/notes/new/page.tsx
+++ b/src/app/notes/new/page.tsx
@@ -5,6 +5,7 @@ import { useRouter, useSearchParams } from 'next/navigation';
 import Link from 'next/link';
 import { NoteEditor } from '@/components/NoteEditor';
 import { FileUpload } from '@/components/FileUpload';
+import { VoiceRecorder } from '@/components/VoiceRecorder';
 import { UserMenu } from '@/components/UserMenu';
 import { authFetch } from '@/lib/authFetch';
 
@@ -15,6 +16,7 @@ const NOTE_TYPES = [
   { value: 'CODE', label: 'Code', desc: 'Code snippet' },
   { value: 'IMAGE', label: 'Image', desc: 'Upload image' },
   { value: 'FILE', label: 'File', desc: 'Upload file' },
+  { value: 'AUDIO', label: 'Audio', desc: 'Voice recording' },
 ];
 
 interface NotebookOption {
@@ -51,6 +53,7 @@ function NewNoteForm() {
   const [fileUrl, setFileUrl] = useState('');
   const [mimeType, setMimeType] = useState('');
   const [fileSize, setFileSize] = useState(0);
+  const [duration, setDuration] = useState(0);
   const [notebookId, setNotebookId] = useState(preselectedNotebook || '');
   const [notebooks, setNotebooks] = useState<NotebookOption[]>([]);
   const [saving, setSaving] = useState(false);
@@ -80,6 +83,7 @@ function NewNoteForm() {
       if (fileUrl) body.fileUrl = fileUrl;
       if (mimeType) body.mimeType = mimeType;
       if (fileSize) body.fileSize = fileSize;
+      if (duration) body.duration = duration;
 
       const endpoint = notebookId
         ? `/api/notebooks/${notebookId}/notes`
@@ -105,6 +109,7 @@ function NewNoteForm() {
   const showUrl = ['CLIP', 'BOOKMARK'].includes(type);
   const showUpload = ['IMAGE', 'FILE'].includes(type);
   const showLanguage = type === 'CODE';
+  const showRecorder = type === 'AUDIO';
 
   return (
     <div className="min-h-screen bg-[#0a0a0a]">
@@ -234,16 +239,43 @@ function NewNoteForm() {
             </div>
           )}
 
+          {/* Voice recorder */}
+          {showRecorder && (
+            <div>
+              <label className="block text-sm font-medium text-slate-300 mb-2">Recording</label>
+              <VoiceRecorder
+                onResult={(result) => {
+                  setFileUrl(result.fileUrl);
+                  setMimeType(result.mimeType);
+                  setFileSize(result.fileSize);
+                  setDuration(result.duration);
+                  setContent(result.transcript);
+                  if (!title) setTitle(`Voice note ${new Date().toLocaleDateString()}`);
+                }}
+              />
+              {content && (
+                <div className="mt-4">
+                  <label className="block text-sm font-medium text-slate-300 mb-2">Transcript</label>
+                  <div className="p-4 bg-slate-800/50 border border-slate-700 rounded-lg text-slate-300 text-sm leading-relaxed">
+                    {content}
+                  </div>
+                </div>
+              )}
+            </div>
+          )}
+
           {/* Content */}
-          <div>
-            <label className="block text-sm font-medium text-slate-300 mb-2">Content</label>
-            <NoteEditor
-              value={content}
-              onChange={setContent}
-              type={type}
-              placeholder={type === 'CODE' ? 'Paste your code here...' : 'Write in Markdown...'}
-            />
-          </div>
+          {!showRecorder && (
+            <div>
+              <label className="block text-sm font-medium text-slate-300 mb-2">Content</label>
+              <NoteEditor
+                value={content}
+                onChange={setContent}
+                type={type}
+                placeholder={type === 'CODE' ? 'Paste your code here...' : 'Write in Markdown...'}
+              />
+            </div>
+          )}
 
           {/* Notebook */}
           <div>
diff --git a/src/components/NoteCard.tsx b/src/components/NoteCard.tsx
index 51d1284..d2e7f7c 100644
--- a/src/components/NoteCard.tsx
+++ b/src/components/NoteCard.tsx
@@ -10,6 +10,7 @@ const TYPE_COLORS: Record<string, string> = {
   CODE: 'bg-green-500/20 text-green-400',
   IMAGE: 'bg-pink-500/20 text-pink-400',
   FILE: 'bg-slate-500/20 text-slate-400',
+  AUDIO: 'bg-red-500/20 text-red-400',
 };
 
 interface NoteCardProps {
diff --git a/src/components/VoiceRecorder.tsx b/src/components/VoiceRecorder.tsx
new file mode 100644
index 0000000..8fa4673
--- /dev/null
+++ b/src/components/VoiceRecorder.tsx
@@ -0,0 +1,235 @@
+'use client';
+
+import { useState, useRef, useCallback, useEffect } from 'react';
+import { authFetch } from '@/lib/authFetch';
+
+interface VoiceRecorderResult {
+  fileUrl: string;
+  mimeType: string;
+  fileSize: number;
+  duration: number;
+  transcript: string;
+}
+
+interface VoiceRecorderProps {
+  onResult: (result: VoiceRecorderResult) => void;
+  className?: string;
+}
+
+export function VoiceRecorder({ onResult, className }: VoiceRecorderProps) {
+  const [recording, setRecording] = useState(false);
+  const [processing, setProcessing] = useState(false);
+  const [processingStep, setProcessingStep] = useState('');
+  const [elapsed, setElapsed] = useState(0);
+  const [error, setError] = useState<string | null>(null);
+  const [audioUrl, setAudioUrl] = useState<string | null>(null);
+
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+  const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
+  const startTimeRef = useRef<number>(0);
+
+  useEffect(() => {
+    return () => {
+      if (timerRef.current) clearInterval(timerRef.current);
+      if (audioUrl) URL.revokeObjectURL(audioUrl);
+    };
+  }, [audioUrl]);
+
+  const formatTime = (seconds: number) => {
+    const m = Math.floor(seconds / 60).toString().padStart(2, '0');
+    const s = (seconds % 60).toString().padStart(2, '0');
+    return `${m}:${s}`;
+  };
+
+  const startRecording = useCallback(async () => {
+    setError(null);
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const mediaRecorder = new MediaRecorder(stream, {
+        mimeType: MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
+          ? 'audio/webm;codecs=opus'
+          : 'audio/webm',
+      });
+
+      chunksRef.current = [];
+      mediaRecorder.ondataavailable = (e) => {
+        if (e.data.size > 0) chunksRef.current.push(e.data);
+      };
+
+      mediaRecorder.onstop = () => {
+        stream.getTracks().forEach((t) => t.stop());
+      };
+
+      mediaRecorder.start(1000);
+      mediaRecorderRef.current = mediaRecorder;
+      startTimeRef.current = Date.now();
+      setRecording(true);
+      setElapsed(0);
+
+      timerRef.current = setInterval(() => {
+        setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000));
+      }, 1000);
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Microphone access denied');
+    }
+  }, []);
+
+  const stopRecording = useCallback(async () => {
+    const mediaRecorder = mediaRecorderRef.current;
+    if (!mediaRecorder || mediaRecorder.state === 'inactive') return;
+
+    if (timerRef.current) {
+      clearInterval(timerRef.current);
+      timerRef.current = null;
+    }
+
+    const duration = Math.floor((Date.now() - startTimeRef.current) / 1000);
+    setRecording(false);
+    setProcessing(true);
+
+    // Wait for final data
+    const blob = await new Promise<Blob>((resolve) => {
+      mediaRecorder.onstop = () => {
+        mediaRecorder.stream.getTracks().forEach((t) => t.stop());
+        resolve(new Blob(chunksRef.current, { type: mediaRecorder.mimeType }));
+      };
+      mediaRecorder.stop();
+    });
+
+    // Preview URL
+    const previewUrl = URL.createObjectURL(blob);
+    setAudioUrl(previewUrl);
+
+    try {
+      // Upload audio file
+      setProcessingStep('Uploading audio...');
+      const uploadForm = new FormData();
+      uploadForm.append('file', blob, 'recording.webm');
+
+      const uploadRes = await authFetch('/api/uploads', {
+        method: 'POST',
+        body: uploadForm,
+      });
+
+      if (!uploadRes.ok) {
+        const data = await uploadRes.json();
+        throw new Error(data.error || 'Upload failed');
+      }
+
+      const uploadResult = await uploadRes.json();
+
+      // Transcribe
+      setProcessingStep('Transcribing...');
+      const transcribeForm = new FormData();
+      transcribeForm.append('audio', blob, 'recording.webm');
+
+      const transcribeRes = await authFetch('/api/voice/transcribe', {
+        method: 'POST',
+        body: transcribeForm,
+      });
+
+      let transcript = '';
+      if (transcribeRes.ok) {
+        const transcribeResult = await transcribeRes.json();
+        transcript = transcribeResult.text || '';
+      } else {
+        console.warn('Transcription failed, saving audio without transcript');
+      }
+
+      onResult({
+        fileUrl: uploadResult.url,
+        mimeType: uploadResult.mimeType,
+        fileSize: uploadResult.size,
+        duration,
+        transcript,
+      });
+    } catch (err) {
+      setError(err instanceof Error ? err.message : 'Processing failed');
+    } finally {
+      setProcessing(false);
+      setProcessingStep('');
+    }
+  }, [onResult]);
+
+  const discard = useCallback(() => {
+    if (audioUrl) {
+      URL.revokeObjectURL(audioUrl);
+      setAudioUrl(null);
+    }
+    setElapsed(0);
+    setError(null);
+  }, [audioUrl]);
+
+  return (
+    <div className={className}>
+      <div className="border border-slate-700 rounded-lg p-6 bg-slate-800/30">
+        {/* Recording controls */}
+        <div className="flex flex-col items-center gap-4">
+          {!recording && !processing && !audioUrl && (
+            <>
+              <button
+                type="button"
+                onClick={startRecording}
+                className="w-20 h-20 rounded-full bg-red-500 hover:bg-red-400 transition-colors flex items-center justify-center"
+              >
+                <svg className="w-8 h-8 text-white" fill="currentColor" viewBox="0 0 24 24">
+                  <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5z" />
+                  <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" />
+                </svg>
+              </button>
+              <p className="text-sm text-slate-400">Tap to start recording</p>
+            </>
+          )}
+
+          {recording && (
+            <>
+              <div className="flex items-center gap-3">
+                <span className="w-3 h-3 rounded-full bg-red-500 animate-pulse" />
+                <span className="text-2xl font-mono text-white">{formatTime(elapsed)}</span>
+              </div>
+              <button
+                type="button"
+                onClick={stopRecording}
+                className="w-20 h-20 rounded-full bg-slate-700 hover:bg-slate-600 transition-colors flex items-center justify-center border-2 border-red-500"
+              >
+                <div className="w-7 h-7 rounded bg-red-500" />
+              </button>
+              <p className="text-sm text-slate-400">Tap to stop</p>
+            </>
+          )}
+
+          {processing && (
+            <div className="flex flex-col items-center gap-3 py-4">
+              <svg className="animate-spin h-8 w-8 text-amber-400" viewBox="0 0 24 24">
+                <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
+                <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
+              </svg>
+              <p className="text-sm text-slate-400">{processingStep}</p>
+            </div>
+          )}
+
+          {audioUrl && !processing && (
+            <div className="w-full space-y-3">
+              <audio controls src={audioUrl} className="w-full" />
+              <div className="flex items-center justify-between">
+                <span className="text-sm text-slate-400">{formatTime(elapsed)} recorded</span>
+                <button
+                  type="button"
+                  onClick={discard}
+                  className="text-sm text-slate-400 hover:text-red-400 transition-colors"
+                >
+                  Discard &amp; re-record
+                </button>
+              </div>
+            </div>
+          )}
+        </div>
+
+        {error && (
+          <p className="text-red-400 text-sm mt-4 text-center">{error}</p>
+        )}
+      </div>
+    </div>
+  );
+}