rnotes-online/src/components/TranscriptionDemo.tsx

290 lines
10 KiB
TypeScript

'use client';
import { useState, useRef, useCallback, useEffect } from 'react';
/* Web Speech API types — not in default TS lib */
interface SpeechRecognitionResult {
readonly isFinal: boolean;
readonly length: number;
item(index: number): { transcript: string; confidence: number };
[index: number]: { transcript: string; confidence: number };
}
interface SpeechRecognitionResultList {
readonly length: number;
item(index: number): SpeechRecognitionResult;
[index: number]: SpeechRecognitionResult;
}
interface SpeechRecognitionEvent extends Event {
readonly resultIndex: number;
readonly results: SpeechRecognitionResultList;
}
interface SpeechRecognitionErrorEvent extends Event {
readonly error: string;
}
interface ISpeechRecognition extends EventTarget {
continuous: boolean;
interimResults: boolean;
lang: string;
onresult: ((event: SpeechRecognitionEvent) => void) | null;
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
onend: (() => void) | null;
start(): void;
stop(): void;
}
type SpeechRecognitionCtor = new () => ISpeechRecognition;
type DemoStatus = 'idle' | 'listening' | 'unsupported';
interface TranscriptLine {
id: number;
text: string;
final: boolean;
}
export function TranscriptionDemo() {
const [status, setStatus] = useState<DemoStatus>('idle');
const [lines, setLines] = useState<TranscriptLine[]>([]);
const [interim, setInterim] = useState('');
const [elapsed, setElapsed] = useState(0);
const recognitionRef = useRef<ISpeechRecognition | null>(null);
const timerRef = useRef<ReturnType<typeof setInterval> | null>(null);
const startTimeRef = useRef(0);
const scrollRef = useRef<HTMLDivElement>(null);
const lineIdRef = useRef(0);
const supported =
typeof window !== 'undefined' &&
('SpeechRecognition' in window || 'webkitSpeechRecognition' in window);
useEffect(() => {
return () => {
if (timerRef.current) clearInterval(timerRef.current);
if (recognitionRef.current) {
try { recognitionRef.current.stop(); } catch {}
}
};
}, []);
useEffect(() => {
if (scrollRef.current) {
scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
}
}, [lines, interim]);
const start = useCallback(() => {
if (!supported) {
setStatus('unsupported');
return;
}
const Ctor: SpeechRecognitionCtor | undefined =
(window as unknown as Record<string, SpeechRecognitionCtor>).SpeechRecognition ||
(window as unknown as Record<string, SpeechRecognitionCtor>).webkitSpeechRecognition;
if (!Ctor) {
setStatus('unsupported');
return;
}
const recognition = new Ctor();
recognition.continuous = true;
recognition.interimResults = true;
recognition.lang = 'en-US';
recognition.onresult = (event: SpeechRecognitionEvent) => {
let interimText = '';
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
if (result.isFinal) {
const id = ++lineIdRef.current;
setLines((prev) => [...prev, { id, text: result[0].transcript.trim(), final: true }]);
interimText = '';
} else {
interimText += result[0].transcript;
}
}
setInterim(interimText);
};
recognition.onerror = (event: SpeechRecognitionErrorEvent) => {
if (event.error !== 'aborted') {
console.warn('Speech recognition error:', event.error);
}
};
recognition.onend = () => {
if (recognitionRef.current === recognition) {
try { recognition.start(); } catch {}
}
};
recognitionRef.current = recognition;
setLines([]);
setInterim('');
lineIdRef.current = 0;
setElapsed(0);
startTimeRef.current = Date.now();
timerRef.current = setInterval(() => {
setElapsed(Math.floor((Date.now() - startTimeRef.current) / 1000));
}, 1000);
recognition.start();
setStatus('listening');
}, [supported, status]);
const stop = useCallback(() => {
if (recognitionRef.current) {
const ref = recognitionRef.current;
recognitionRef.current = null;
try { ref.stop(); } catch {}
}
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
setInterim('');
setStatus('idle');
}, []);
const reset = useCallback(() => {
stop();
setLines([]);
setElapsed(0);
}, [stop]);
const formatTime = (s: number) => {
const m = Math.floor(s / 60).toString().padStart(2, '0');
const sec = (s % 60).toString().padStart(2, '0');
return `${m}:${sec}`;
};
return (
<div className="w-full max-w-2xl mx-auto">
<div className="rounded-2xl border border-slate-700/50 bg-slate-800/50 overflow-hidden">
{/* Header bar */}
<div className="flex items-center justify-between px-5 py-3 border-b border-slate-700/50 bg-slate-800/30">
<div className="flex items-center gap-2">
<svg className="w-4 h-4 text-amber-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
</svg>
<span className="text-sm font-medium text-slate-300">Live Transcription</span>
{status === 'listening' && (
<span className="flex items-center gap-1.5 text-xs text-green-400">
<span className="w-1.5 h-1.5 rounded-full bg-green-400 animate-pulse" />
LIVE
</span>
)}
</div>
{status === 'listening' && (
<span className="text-xs font-mono text-slate-400">{formatTime(elapsed)}</span>
)}
</div>
{/* Transcript area */}
<div
ref={scrollRef}
className="min-h-[120px] max-h-[200px] overflow-y-auto px-5 py-4"
>
{status === 'idle' && lines.length === 0 && (
<div className="flex flex-col items-center justify-center h-[120px] text-center">
<svg className="w-10 h-10 text-slate-600 mb-3" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
</svg>
<p className="text-sm text-slate-500">
Tap the mic to start live transcription
</p>
<p className="text-xs text-slate-600 mt-1">
Works in your browser no download needed
</p>
</div>
)}
{status === 'unsupported' && (
<div className="flex flex-col items-center justify-center h-[120px] text-center">
<p className="text-sm text-slate-400">
Speech recognition requires Chrome, Edge, or Safari.
</p>
<p className="text-xs text-slate-500 mt-1">
rNotes also supports offline transcription with Parakeet.js (NVIDIA) for full privacy.
</p>
</div>
)}
{lines.length > 0 && (
<div className="space-y-2">
{lines.map((line) => (
<div
key={line.id}
className="text-sm text-slate-200 px-3 py-2 bg-slate-900/50 rounded-lg border-l-2 border-amber-500/40"
>
{line.text}
</div>
))}
</div>
)}
{interim && (
<div className="text-sm text-slate-400 italic px-3 py-2 mt-2">
{interim}
</div>
)}
{status === 'idle' && lines.length > 0 && (
<div className="mt-3 pt-3 border-t border-slate-700/30 flex items-center justify-between">
<span className="text-xs text-slate-500">
{lines.length} segment{lines.length !== 1 ? 's' : ''} transcribed
</span>
<button
onClick={reset}
className="text-xs text-slate-500 hover:text-slate-300 transition-colors"
>
Clear
</button>
</div>
)}
</div>
{/* Controls */}
<div className="flex items-center justify-center gap-3 px-5 py-4 border-t border-slate-700/50">
{status === 'idle' ? (
<button
onClick={start}
className="flex items-center gap-2 px-5 py-2.5 bg-gradient-to-r from-amber-500 to-orange-500 hover:from-amber-400 hover:to-orange-400 text-black font-medium text-sm rounded-full transition-all shadow-lg shadow-amber-900/20"
>
<svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24">
<path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5z" />
<path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z" />
</svg>
Start Transcribing
</button>
) : status === 'listening' ? (
<button
onClick={stop}
className="flex items-center gap-2 px-5 py-2.5 bg-slate-700 hover:bg-slate-600 text-white font-medium text-sm rounded-full transition-colors border border-red-500/50"
>
<div className="w-3 h-3 rounded-sm bg-red-500" />
Stop
</button>
) : null}
</div>
{/* Capability badges */}
<div className="flex flex-wrap items-center justify-center gap-2 px-5 pb-4 text-[11px] text-slate-500">
<span className="px-2 py-0.5 rounded-full bg-slate-700/50 border border-slate-600/30">
Live streaming
</span>
<span className="px-2 py-0.5 rounded-full bg-slate-700/50 border border-slate-600/30">
Audio file upload
</span>
<span className="px-2 py-0.5 rounded-full bg-slate-700/50 border border-slate-600/30">
Video transcription
</span>
<span className="px-2 py-0.5 rounded-full bg-slate-700/50 border border-slate-600/30">
Offline (Parakeet.js)
</span>
</div>
</div>
</div>
);
}