rspace-online/lib/speech-dictation.ts

// Web Speech API types (not all browsers have these in their types)
export interface SpeechRecognitionResult {
	readonly length: number;
	item(index: number): SpeechRecognitionAlternative;
	[index: number]: SpeechRecognitionAlternative;
	readonly isFinal: boolean;
}

export interface SpeechRecognitionAlternative {
	readonly transcript: string;
	readonly confidence: number;
}

export interface SpeechRecognitionResultList {
	readonly length: number;
	item(index: number): SpeechRecognitionResult;
	[index: number]: SpeechRecognitionResult;
}

export interface SpeechRecognitionEvent extends Event {
	readonly resultIndex: number;
	readonly results: SpeechRecognitionResultList;
}

export interface SpeechRecognitionErrorEvent extends Event {
	readonly error: string;
	readonly message: string;
}

export interface SpeechRecognitionInstance extends EventTarget {
	continuous: boolean;
	interimResults: boolean;
	lang: string;
	onresult: ((event: SpeechRecognitionEvent) => void) | null;
	onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
	onend: (() => void) | null;
	start(): void;
	stop(): void;
}

interface SpeechRecognitionConstructor {
	new (): SpeechRecognitionInstance;
}

declare global {
	interface Window {
		SpeechRecognition?: SpeechRecognitionConstructor;
		webkitSpeechRecognition?: SpeechRecognitionConstructor;
	}
}

export interface SpeechDictationOptions {
	onInterim?: (text: string) => void;
	onFinal?: (text: string) => void;
	onError?: (error: string) => void;
	onStateChange?: (recording: boolean) => void;
	lang?: string;
}

export class SpeechDictation {
	#recognition: SpeechRecognitionInstance | null = null;
	#recording = false;
	#opts: SpeechDictationOptions;

	constructor(opts: SpeechDictationOptions) {
		this.#opts = opts;
		this.#init();
	}

	static isSupported(): boolean {
		return !!(window.SpeechRecognition || window.webkitSpeechRecognition);
	}

	get isRecording(): boolean {
		return this.#recording;
	}

	start(): void {
		if (this.#recording || !this.#recognition) return;
		try {
			this.#recognition.start();
			this.#recording = true;
			this.#opts.onStateChange?.(true);
		} catch (error) {
			this.#opts.onError?.("Failed to start recording");
		}
	}

	stop(): void {
		if (!this.#recording) return;
		this.#recording = false;
		this.#recognition?.stop();
		this.#opts.onStateChange?.(false);
	}

	toggle(): void {
		if (this.#recording) {
			this.stop();
		} else {
			this.start();
		}
	}

	destroy(): void {
		this.stop();
		if (this.#recognition) {
			this.#recognition.onresult = null;
			this.#recognition.onerror = null;
			this.#recognition.onend = null;
			this.#recognition = null;
		}
	}

	#init(): void {
		const Impl = window.SpeechRecognition || window.webkitSpeechRecognition;
		if (!Impl) return;

		this.#recognition = new Impl();
		this.#recognition.continuous = true;
		this.#recognition.interimResults = true;
		this.#recognition.lang = this.#opts.lang ?? "en-US";

		this.#recognition.onresult = (event) => {
			for (let i = event.resultIndex; i < event.results.length; i++) {
				const result = event.results[i];
				const text = result[0].transcript;
				if (result.isFinal) {
					this.#opts.onFinal?.(text);
				} else {
					this.#opts.onInterim?.(text);
				}
			}
		};

		this.#recognition.onerror = (event) => {
			if (event.error !== "no-speech") {
				this.#opts.onError?.(`Recognition error: ${event.error}`);
			}
		};

		this.#recognition.onend = () => {
			// Auto-restart while still recording
			if (this.#recording && this.#recognition) {
				this.#recognition.start();
			}
		};
	}
}