149 lines
3.5 KiB
TypeScript
149 lines
3.5 KiB
TypeScript
// Web Speech API types (not all browsers have these in their types)
|
|
export interface SpeechRecognitionResult {
|
|
readonly length: number;
|
|
item(index: number): SpeechRecognitionAlternative;
|
|
[index: number]: SpeechRecognitionAlternative;
|
|
readonly isFinal: boolean;
|
|
}
|
|
|
|
export interface SpeechRecognitionAlternative {
|
|
readonly transcript: string;
|
|
readonly confidence: number;
|
|
}
|
|
|
|
export interface SpeechRecognitionResultList {
|
|
readonly length: number;
|
|
item(index: number): SpeechRecognitionResult;
|
|
[index: number]: SpeechRecognitionResult;
|
|
}
|
|
|
|
export interface SpeechRecognitionEvent extends Event {
|
|
readonly resultIndex: number;
|
|
readonly results: SpeechRecognitionResultList;
|
|
}
|
|
|
|
export interface SpeechRecognitionErrorEvent extends Event {
|
|
readonly error: string;
|
|
readonly message: string;
|
|
}
|
|
|
|
export interface SpeechRecognitionInstance extends EventTarget {
|
|
continuous: boolean;
|
|
interimResults: boolean;
|
|
lang: string;
|
|
onresult: ((event: SpeechRecognitionEvent) => void) | null;
|
|
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
|
|
onend: (() => void) | null;
|
|
start(): void;
|
|
stop(): void;
|
|
}
|
|
|
|
interface SpeechRecognitionConstructor {
|
|
new (): SpeechRecognitionInstance;
|
|
}
|
|
|
|
declare global {
|
|
interface Window {
|
|
SpeechRecognition?: SpeechRecognitionConstructor;
|
|
webkitSpeechRecognition?: SpeechRecognitionConstructor;
|
|
}
|
|
}
|
|
|
|
export interface SpeechDictationOptions {
|
|
onInterim?: (text: string) => void;
|
|
onFinal?: (text: string) => void;
|
|
onError?: (error: string) => void;
|
|
onStateChange?: (recording: boolean) => void;
|
|
lang?: string;
|
|
}
|
|
|
|
export class SpeechDictation {
|
|
#recognition: SpeechRecognitionInstance | null = null;
|
|
#recording = false;
|
|
#opts: SpeechDictationOptions;
|
|
|
|
constructor(opts: SpeechDictationOptions) {
|
|
this.#opts = opts;
|
|
this.#init();
|
|
}
|
|
|
|
static isSupported(): boolean {
|
|
return !!(window.SpeechRecognition || window.webkitSpeechRecognition);
|
|
}
|
|
|
|
get isRecording(): boolean {
|
|
return this.#recording;
|
|
}
|
|
|
|
start(): void {
|
|
if (this.#recording || !this.#recognition) return;
|
|
try {
|
|
this.#recognition.start();
|
|
this.#recording = true;
|
|
this.#opts.onStateChange?.(true);
|
|
} catch (error) {
|
|
this.#opts.onError?.("Failed to start recording");
|
|
}
|
|
}
|
|
|
|
stop(): void {
|
|
if (!this.#recording) return;
|
|
this.#recording = false;
|
|
this.#recognition?.stop();
|
|
this.#opts.onStateChange?.(false);
|
|
}
|
|
|
|
toggle(): void {
|
|
if (this.#recording) {
|
|
this.stop();
|
|
} else {
|
|
this.start();
|
|
}
|
|
}
|
|
|
|
destroy(): void {
|
|
this.stop();
|
|
if (this.#recognition) {
|
|
this.#recognition.onresult = null;
|
|
this.#recognition.onerror = null;
|
|
this.#recognition.onend = null;
|
|
this.#recognition = null;
|
|
}
|
|
}
|
|
|
|
#init(): void {
|
|
const Impl = window.SpeechRecognition || window.webkitSpeechRecognition;
|
|
if (!Impl) return;
|
|
|
|
this.#recognition = new Impl();
|
|
this.#recognition.continuous = true;
|
|
this.#recognition.interimResults = true;
|
|
this.#recognition.lang = this.#opts.lang ?? "en-US";
|
|
|
|
this.#recognition.onresult = (event) => {
|
|
for (let i = event.resultIndex; i < event.results.length; i++) {
|
|
const result = event.results[i];
|
|
const text = result[0].transcript;
|
|
if (result.isFinal) {
|
|
this.#opts.onFinal?.(text);
|
|
} else {
|
|
this.#opts.onInterim?.(text);
|
|
}
|
|
}
|
|
};
|
|
|
|
this.#recognition.onerror = (event) => {
|
|
if (event.error !== "no-speech") {
|
|
this.#opts.onError?.(`Recognition error: ${event.error}`);
|
|
}
|
|
};
|
|
|
|
this.#recognition.onend = () => {
|
|
// Auto-restart while still recording
|
|
if (this.#recording && this.#recognition) {
|
|
this.#recognition.start();
|
|
}
|
|
};
|
|
}
|
|
}
|