rspace-online/lib/speech-dictation.ts

149 lines
3.5 KiB
TypeScript

// Web Speech API types (not all browsers have these in their types)
export interface SpeechRecognitionResult {
readonly length: number;
item(index: number): SpeechRecognitionAlternative;
[index: number]: SpeechRecognitionAlternative;
readonly isFinal: boolean;
}
export interface SpeechRecognitionAlternative {
readonly transcript: string;
readonly confidence: number;
}
export interface SpeechRecognitionResultList {
readonly length: number;
item(index: number): SpeechRecognitionResult;
[index: number]: SpeechRecognitionResult;
}
export interface SpeechRecognitionEvent extends Event {
readonly resultIndex: number;
readonly results: SpeechRecognitionResultList;
}
export interface SpeechRecognitionErrorEvent extends Event {
readonly error: string;
readonly message: string;
}
export interface SpeechRecognitionInstance extends EventTarget {
continuous: boolean;
interimResults: boolean;
lang: string;
onresult: ((event: SpeechRecognitionEvent) => void) | null;
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
onend: (() => void) | null;
start(): void;
stop(): void;
}
interface SpeechRecognitionConstructor {
new (): SpeechRecognitionInstance;
}
declare global {
interface Window {
SpeechRecognition?: SpeechRecognitionConstructor;
webkitSpeechRecognition?: SpeechRecognitionConstructor;
}
}
export interface SpeechDictationOptions {
onInterim?: (text: string) => void;
onFinal?: (text: string) => void;
onError?: (error: string) => void;
onStateChange?: (recording: boolean) => void;
lang?: string;
}
export class SpeechDictation {
#recognition: SpeechRecognitionInstance | null = null;
#recording = false;
#opts: SpeechDictationOptions;
constructor(opts: SpeechDictationOptions) {
this.#opts = opts;
this.#init();
}
static isSupported(): boolean {
return !!(window.SpeechRecognition || window.webkitSpeechRecognition);
}
get isRecording(): boolean {
return this.#recording;
}
start(): void {
if (this.#recording || !this.#recognition) return;
try {
this.#recognition.start();
this.#recording = true;
this.#opts.onStateChange?.(true);
} catch (error) {
this.#opts.onError?.("Failed to start recording");
}
}
stop(): void {
if (!this.#recording) return;
this.#recording = false;
this.#recognition?.stop();
this.#opts.onStateChange?.(false);
}
toggle(): void {
if (this.#recording) {
this.stop();
} else {
this.start();
}
}
destroy(): void {
this.stop();
if (this.#recognition) {
this.#recognition.onresult = null;
this.#recognition.onerror = null;
this.#recognition.onend = null;
this.#recognition = null;
}
}
#init(): void {
const Impl = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!Impl) return;
this.#recognition = new Impl();
this.#recognition.continuous = true;
this.#recognition.interimResults = true;
this.#recognition.lang = this.#opts.lang ?? "en-US";
this.#recognition.onresult = (event) => {
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
const text = result[0].transcript;
if (result.isFinal) {
this.#opts.onFinal?.(text);
} else {
this.#opts.onInterim?.(text);
}
}
};
this.#recognition.onerror = (event) => {
if (event.error !== "no-speech") {
this.#opts.onError?.(`Recognition error: ${event.error}`);
}
};
this.#recognition.onend = () => {
// Auto-restart while still recording
if (this.#recording && this.#recognition) {
this.#recognition.start();
}
};
}
}