import { useState, useRef, useCallback, useEffect } from 'react' // TypeScript declarations for Web Speech API declare global { interface Window { SpeechRecognition: typeof SpeechRecognition webkitSpeechRecognition: typeof SpeechRecognition } interface SpeechRecognition extends EventTarget { continuous: boolean interimResults: boolean lang: string maxAlternatives: number start(): void stop(): void onstart: ((this: SpeechRecognition, ev: Event) => any) | null onresult: ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any) | null onerror: ((this: SpeechRecognition, ev: SpeechRecognitionErrorEvent) => any) | null onend: ((this: SpeechRecognition, ev: Event) => any) | null } interface SpeechRecognitionEvent extends Event { resultIndex: number results: SpeechRecognitionResultList } interface SpeechRecognitionErrorEvent extends Event { error: string } interface SpeechRecognitionResultList { readonly length: number item(index: number): SpeechRecognitionResult [index: number]: SpeechRecognitionResult } interface SpeechRecognitionResult { readonly length: number item(index: number): SpeechRecognitionAlternative [index: number]: SpeechRecognitionAlternative readonly isFinal: boolean } interface SpeechRecognitionAlternative { readonly transcript: string readonly confidence: number } var SpeechRecognition: { prototype: SpeechRecognition new(): SpeechRecognition } } interface UseWebSpeechTranscriptionOptions { onTranscriptUpdate?: (text: string) => void onError?: (error: Error) => void language?: string continuous?: boolean interimResults?: boolean } export const useWebSpeechTranscription = ({ onTranscriptUpdate, onError, language = 'en-US', continuous = true, interimResults = true }: UseWebSpeechTranscriptionOptions = {}) => { const [isRecording, setIsRecording] = useState(false) const [isTranscribing, setIsTranscribing] = useState(false) const [transcript, setTranscript] = useState('') const [interimTranscript, setInterimTranscript] = useState('') const [isSupported, setIsSupported] = useState(false) const recognitionRef = useRef(null) const finalTranscriptRef = useRef('') const interimTranscriptRef = useRef('') const lastSpeechTimeRef = useRef(0) const pauseTimeoutRef = useRef(null) const lastConfidenceRef = useRef(0) const speakerChangeThreshold = 0.3 // Threshold for detecting speaker changes // Function to add line breaks after pauses and improve punctuation const processTranscript = useCallback((text: string, isFinal: boolean = false) => { if (!text.trim()) return text let processedText = text.trim() // Add punctuation if missing at the end if (isFinal && processedText && !/[.!?]$/.test(processedText)) { processedText += '.' } // Add line break if there's been a pause (for final results) if (isFinal) { const now = Date.now() const timeSinceLastSpeech = now - lastSpeechTimeRef.current // If more than 3 seconds since last speech, add a line break if (timeSinceLastSpeech > 3000 && lastSpeechTimeRef.current > 0) { processedText = '\n' + processedText } lastSpeechTimeRef.current = now } return processedText }, []) // Function to detect speaker changes based on confidence and timing const detectSpeakerChange = useCallback((confidence: number) => { if (lastConfidenceRef.current === 0) { lastConfidenceRef.current = confidence return false } const confidenceDiff = Math.abs(confidence - lastConfidenceRef.current) const now = Date.now() const timeSinceLastSpeech = now - lastSpeechTimeRef.current // Detect speaker change if confidence changes significantly and there's been a pause const isSpeakerChange = confidenceDiff > speakerChangeThreshold && timeSinceLastSpeech > 1000 if (isSpeakerChange) { // Reduced debug logging lastConfidenceRef.current = confidence return true } lastConfidenceRef.current = confidence return false }, [speakerChangeThreshold]) // Function to handle pause detection const handlePauseDetection = useCallback(() => { // Clear existing timeout if (pauseTimeoutRef.current) { clearTimeout(pauseTimeoutRef.current) } // Set new timeout for pause detection pauseTimeoutRef.current = setTimeout(() => { const now = Date.now() const timeSinceLastSpeech = now - lastSpeechTimeRef.current // If more than 2 seconds of silence, add a line break to interim transcript if (timeSinceLastSpeech > 2000 && lastSpeechTimeRef.current > 0) { const currentTranscript = finalTranscriptRef.current + '\n' setTranscript(currentTranscript) onTranscriptUpdate?.(currentTranscript) // Reduced debug logging } }, 2000) // Check after 2 seconds of silence }, [onTranscriptUpdate]) // Check if Web Speech API is supported useEffect(() => { const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition if (SpeechRecognition) { setIsSupported(true) // Reduced debug logging } else { setIsSupported(false) onError?.(new Error('Web Speech API is not supported in this browser')) } }, [onError]) // Initialize speech recognition const initializeRecognition = useCallback(() => { if (!isSupported) return null const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition const recognition = new SpeechRecognition() recognition.continuous = continuous recognition.interimResults = interimResults recognition.lang = language recognition.maxAlternatives = 1 recognition.onstart = () => { setIsRecording(true) setIsTranscribing(true) } recognition.onresult = (event) => { let interimTranscript = '' let finalTranscript = '' // Process all results for (let i = event.resultIndex; i < event.results.length; i++) { const result = event.results[i] const transcript = result[0].transcript if (result.isFinal) { finalTranscript += transcript } else { interimTranscript += transcript } } // Update final transcript with processing if (finalTranscript) { // Get confidence from the first result const confidence = event.results[event.results.length - 1]?.[0]?.confidence || 0 // Detect speaker change const isSpeakerChange = detectSpeakerChange(confidence) // Add speaker indicator if change detected let speakerPrefix = '' if (isSpeakerChange) { speakerPrefix = '\n[Speaker Change]\n' } const processedFinal = processTranscript(finalTranscript, true) const newText = speakerPrefix + processedFinal finalTranscriptRef.current += newText setTranscript(finalTranscriptRef.current) onTranscriptUpdate?.(newText) // Only send the new text portion // Trigger pause detection handlePauseDetection() } // Update interim transcript if (interimTranscript) { const processedInterim = processTranscript(interimTranscript, false) interimTranscriptRef.current = processedInterim setInterimTranscript(processedInterim) } } recognition.onerror = (event) => { console.error('❌ Web Speech API error:', event.error) setIsRecording(false) setIsTranscribing(false) onError?.(new Error(`Speech recognition error: ${event.error}`)) } recognition.onend = () => { setIsRecording(false) setIsTranscribing(false) } return recognition }, [isSupported, continuous, interimResults, language, onTranscriptUpdate, onError]) // Start recording const startRecording = useCallback(() => { if (!isSupported) { onError?.(new Error('Web Speech API is not supported')) return } try { // Don't reset transcripts for continuous transcription - keep existing content // finalTranscriptRef.current = '' // interimTranscriptRef.current = '' // setTranscript('') // setInterimTranscript('') lastSpeechTimeRef.current = 0 lastConfidenceRef.current = 0 // Clear any existing pause timeout if (pauseTimeoutRef.current) { clearTimeout(pauseTimeoutRef.current) pauseTimeoutRef.current = null } // Initialize and start recognition const recognition = initializeRecognition() if (recognition) { recognitionRef.current = recognition recognition.start() } } catch (error) { console.error('❌ Error starting Web Speech API:', error) onError?.(error as Error) } }, [isSupported, initializeRecognition, onError]) // Stop recording const stopRecording = useCallback(() => { if (recognitionRef.current) { recognitionRef.current.stop() recognitionRef.current = null } }, []) // Cleanup const cleanup = useCallback(() => { if (recognitionRef.current) { recognitionRef.current.stop() recognitionRef.current = null } // Clear pause timeout if (pauseTimeoutRef.current) { clearTimeout(pauseTimeoutRef.current) pauseTimeoutRef.current = null } setIsRecording(false) setIsTranscribing(false) }, []) // Cleanup on unmount useEffect(() => { return cleanup }, [cleanup]) return { isRecording, isTranscribing, transcript, interimTranscript, isSupported, startRecording, stopRecording, cleanup } } // Export as default for compatibility export default useWebSpeechTranscription