feat(mi-voice): quieter+smoother voice, caveman terseness in voice mode

- Default voice en-US-AvaMultilingualNeural -> en-US-AndrewMultilingualNeural (smoother, less feminine timbre). - Volume 0.55 -> 0.3, rate -8% -> -10%, pitch -2Hz -> -6Hz. - Browser fallback matches: pitch 0.85, volume 0.3. - Client passes voiceMode flag to /api/mi/ask; server appends a VOICE MODE section to the system prompt demanding ≤1-2 short sentences, no lists/markdown/emoji/preamble — because listening to long replies is tedious.
2026-04-16 15:44:11 -04:00 · 2026-04-16 15:44:11 -04:00 · 39fbd99897
parent 71782b1cf1
commit 39fbd99897
3 changed files with 16 additions and 6 deletions
--- a/lib/mi-voice-bridge.ts
+++ b/lib/mi-voice-bridge.ts
@ -30,7 +30,7 @@ export class MiVoiceBridge {

 	constructor(opts: MiVoiceBridgeOptions = {}) {
 		this.#bridgeUrl = opts.bridgeUrl ?? DEFAULT_BRIDGE;
-		this.#voice = opts.voice ?? "en-US-AvaMultilingualNeural";
+		this.#voice = opts.voice ?? "en-US-AndrewMultilingualNeural";
 		this.#onStateChange = opts.onStateChange ?? null;
 	}

@ -191,7 +191,7 @@ export class MiVoiceBridge {
 				const res = await fetch(`${this.#bridgeUrl}${TTS_PATH}`, {
 					method: "POST",
 					headers: { "Content-Type": "application/json" },
-					body: JSON.stringify({ text, voice: this.#voice, volume: 0.55, rate: "-8%", pitch: "-2Hz" }),
+					body: JSON.stringify({ text, voice: this.#voice, volume: 0.3, rate: "-10%", pitch: "-6Hz" }),
 				});
 				if (!res.ok) {
 					ws.removeEventListener("message", handler);
@ -224,8 +224,8 @@ export class MiVoiceBridge {
 			this.#speakResolve = resolve;
 			const utterance = new SpeechSynthesisUtterance(text);
 			utterance.rate = 0.95;
-			utterance.pitch = 0.9;
-			utterance.volume = 0.55;
+			utterance.pitch = 0.85;
+			utterance.volume = 0.3;
 			utterance.onend = () => {
 				this.#speakResolve = null;
 				resolve();
--- a/server/mi-routes.ts
+++ b/server/mi-routes.ts
@ -45,7 +45,7 @@ mi.get("/models", (c) => {
 // ── POST /ask — main MI chat ──

 mi.post("/ask", async (c) => {
-	const { query, messages = [], space, module: currentModule, context = {}, model: requestedModel } = await c.req.json();
+	const { query, messages = [], space, module: currentModule, context = {}, model: requestedModel, voiceMode = false } = await c.req.json();
 	if (!query) return c.json({ error: "Query required" }, 400);

 	// ── Resolve caller role + space access ──
@ -304,7 +304,16 @@ Results will be provided in a follow-up message for you to incorporate into your

 ## Batch Actions
 [MI_ACTION:{"type":"batch","actions":[...actions...],"requireConfirm":true}]
-Use requireConfirm:true for destructive batches.`;
+Use requireConfirm:true for destructive batches.${voiceMode ? `
+
+## VOICE MODE — SPEAK LIKE A CAVEMAN
+User is hearing your reply read aloud. Be extremely terse.
+- Max 1-2 short sentences per reply. Fragments fine. Drop articles (a/an/the) and filler.
+- No lists, no headers, no markdown, no emoji, no code blocks — plain spoken prose only.
+- No preamble ("Sure!", "Of course,", "Great question"). Answer first, stop.
+- Never narrate what you're about to do. Do it. Report result in ≤1 sentence.
+- Action markers still allowed — they are silent and do not count toward length.
+- If a full answer would take longer than ~8 seconds to read aloud, give the headline only and offer to expand.` : ""}`;

 	// Build conversation
 	const miMessages: MiMessage[] = [
--- a/shared/components/rstack-mi.ts
+++ b/shared/components/rstack-mi.ts
@ -726,6 +726,7 @@ export class RStackMi extends HTMLElement {
 					module: context.module,
 					context,
 					model: this.#preferredModel || undefined,
+					voiceMode: this.#voiceMode,
 				}),
 				signal: this.#abortController.signal,
 			});