/** * MiVoiceBridge — TTS output via Edge TTS bridge + Web Speech Synthesis fallback. * * Connects to claude-voice.jeffemmett.com for high-quality neural voice synthesis. * Falls back to browser speechSynthesis if the bridge is unavailable. */ export type VoiceState = "idle" | "listening" | "thinking" | "speaking"; export interface MiVoiceBridgeOptions { bridgeUrl?: string; voice?: string; onStateChange?: (state: VoiceState) => void; } const DEFAULT_BRIDGE = "https://claude-voice.jeffemmett.com"; const WS_PATH = "/ws/audio"; const TTS_PATH = "/api/tts/speak"; export class MiVoiceBridge { #bridgeUrl: string; #voice: string; #onStateChange: ((s: VoiceState) => void) | null; #ws: WebSocket | null = null; #audioCtx: AudioContext | null = null; #currentSource: AudioBufferSourceNode | null = null; #speaking = false; #destroyed = false; #speakResolve: (() => void) | null = null; constructor(opts: MiVoiceBridgeOptions = {}) { this.#bridgeUrl = opts.bridgeUrl ?? DEFAULT_BRIDGE; this.#voice = opts.voice ?? "en-US-EmmaMultilingualNeural"; this.#onStateChange = opts.onStateChange ?? null; } get isSpeaking(): boolean { return this.#speaking; } setVoice(voice: string): void { this.#voice = voice; } async speak(text: string): Promise { if (this.#destroyed || !text.trim()) return; this.#speaking = true; try { await this.#speakViaBridge(text); } catch { // Bridge unavailable — fall back to browser TTS await this.#speakViaBrowser(text); } finally { this.#speaking = false; } } stop(): void { // Stop AudioContext playback if (this.#currentSource) { try { this.#currentSource.stop(); } catch { /* already stopped */ } this.#currentSource = null; } // Stop browser TTS if (window.speechSynthesis?.speaking) { window.speechSynthesis.cancel(); } this.#speaking = false; if (this.#speakResolve) { this.#speakResolve(); this.#speakResolve = null; } } destroy(): void { this.#destroyed = true; this.stop(); if (this.#ws) { this.#ws.close(); this.#ws = null; } if (this.#audioCtx) { this.#audioCtx.close(); this.#audioCtx = null; } } // ── Bridge TTS ── async #ensureAudioCtx(): Promise { if (!this.#audioCtx || this.#audioCtx.state === "closed") { this.#audioCtx = new AudioContext(); } if (this.#audioCtx.state === "suspended") { try { await this.#audioCtx.resume(); } catch { /* gesture may be required */ } } return this.#audioCtx; } #connectWs(): Promise { if (this.#ws && this.#ws.readyState === WebSocket.OPEN) { return Promise.resolve(this.#ws); } return new Promise((resolve, reject) => { const wsUrl = this.#bridgeUrl.replace(/^http/, "ws") + WS_PATH; const ws = new WebSocket(wsUrl); ws.binaryType = "arraybuffer"; const timeout = setTimeout(() => { ws.close(); reject(new Error("WS connect timeout")); }, 5000); ws.onopen = () => { clearTimeout(timeout); this.#ws = ws; resolve(ws); }; ws.onerror = () => { clearTimeout(timeout); reject(new Error("WS connection failed")); }; ws.onclose = () => { if (this.#ws === ws) this.#ws = null; }; }); } async #speakViaBridge(text: string): Promise { // Connect WS first so we're ready to receive audio const ws = await this.#connectWs(); return new Promise(async (resolve, reject) => { this.#speakResolve = resolve; // Listen for the audio frame const handler = async (ev: MessageEvent) => { if (!(ev.data instanceof ArrayBuffer)) return; ws.removeEventListener("message", handler); try { const buf = ev.data as ArrayBuffer; const view = new DataView(buf); // Frame format: [4B header_len big-endian][JSON header][MP3 bytes] const headerLen = view.getUint32(0, false); const headerJson = new TextDecoder().decode(buf.slice(4, 4 + headerLen)); let gainValue = 1; try { const header = JSON.parse(headerJson); if (typeof header.volume === "number") { gainValue = Math.max(0, Math.min(1, header.volume)); } } catch { /* ignore bad header */ } const mp3Bytes = buf.slice(4 + headerLen); const ctx = await this.#ensureAudioCtx(); const audioBuffer = await ctx.decodeAudioData(mp3Bytes.slice(0)); // slice to copy const source = ctx.createBufferSource(); source.buffer = audioBuffer; const gain = ctx.createGain(); gain.gain.value = gainValue; source.connect(gain); gain.connect(ctx.destination); this.#currentSource = source; if (ctx.state === "suspended") { try { await ctx.resume(); } catch { /* ignore */ } } source.onended = () => { this.#currentSource = null; this.#speakResolve = null; resolve(); }; source.start(); } catch (err) { this.#speakResolve = null; reject(err); } }; ws.addEventListener("message", handler); // POST the TTS request try { const res = await fetch(`${this.#bridgeUrl}${TTS_PATH}`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text, voice: this.#voice, volume: 0.25, rate: "-8%", pitch: "+0Hz" }), }); if (!res.ok) { ws.removeEventListener("message", handler); this.#speakResolve = null; reject(new Error(`TTS POST failed: ${res.status}`)); } } catch (err) { ws.removeEventListener("message", handler); this.#speakResolve = null; reject(err); } // Timeout: if no audio frame in 15s, reject setTimeout(() => { ws.removeEventListener("message", handler); if (this.#speakResolve === resolve) { this.#speakResolve = null; reject(new Error("TTS audio timeout")); } }, 15000); }); } // ── Browser fallback ── async #speakViaBrowser(text: string): Promise { if (!window.speechSynthesis) return; return new Promise((resolve) => { this.#speakResolve = resolve; const utterance = new SpeechSynthesisUtterance(text); utterance.rate = 0.95; utterance.pitch = 1.0; utterance.volume = 0.25; utterance.onend = () => { this.#speakResolve = null; resolve(); }; utterance.onerror = () => { this.#speakResolve = null; resolve(); }; window.speechSynthesis.speak(utterance); }); } }