rspace-online/lib/folk-transcription.ts

616 lines
14 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { FolkShape } from "./folk-shape";
import { css, html } from "./tags";
// Web Speech API types (not all browsers have these in their types)
interface SpeechRecognitionResult {
readonly length: number;
item(index: number): SpeechRecognitionAlternative;
[index: number]: SpeechRecognitionAlternative;
readonly isFinal: boolean;
}
interface SpeechRecognitionAlternative {
readonly transcript: string;
readonly confidence: number;
}
interface SpeechRecognitionResultList {
readonly length: number;
item(index: number): SpeechRecognitionResult;
[index: number]: SpeechRecognitionResult;
}
interface SpeechRecognitionEvent extends Event {
readonly resultIndex: number;
readonly results: SpeechRecognitionResultList;
}
interface SpeechRecognitionErrorEvent extends Event {
readonly error: string;
readonly message: string;
}
interface SpeechRecognition extends EventTarget {
continuous: boolean;
interimResults: boolean;
lang: string;
onresult: ((event: SpeechRecognitionEvent) => void) | null;
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
onend: (() => void) | null;
start(): void;
stop(): void;
}
interface SpeechRecognitionConstructor {
new (): SpeechRecognition;
}
declare global {
interface Window {
SpeechRecognition?: SpeechRecognitionConstructor;
webkitSpeechRecognition?: SpeechRecognitionConstructor;
}
}
const styles = css`
:host {
background: white;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
min-width: 350px;
min-height: 400px;
}
.header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 8px 12px;
background: linear-gradient(135deg, #14b8a6, #06b6d4);
color: white;
border-radius: 8px 8px 0 0;
font-size: 12px;
font-weight: 600;
cursor: move;
}
.header-title {
display: flex;
align-items: center;
gap: 6px;
}
.header-actions {
display: flex;
gap: 4px;
}
.header-actions button {
background: transparent;
border: none;
color: white;
cursor: pointer;
padding: 2px 6px;
border-radius: 4px;
font-size: 14px;
}
.header-actions button:hover {
background: rgba(255, 255, 255, 0.2);
}
.content {
display: flex;
flex-direction: column;
height: calc(100% - 36px);
overflow: hidden;
}
.controls {
display: flex;
align-items: center;
justify-content: center;
gap: 16px;
padding: 16px;
border-bottom: 1px solid #e2e8f0;
}
.record-btn {
width: 64px;
height: 64px;
border-radius: 50%;
border: 4px solid #e2e8f0;
background: white;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: all 0.2s;
}
.record-btn:hover {
border-color: #14b8a6;
}
.record-btn.recording {
border-color: #ef4444;
animation: pulse-ring 1.5s infinite;
}
.record-icon {
width: 24px;
height: 24px;
border-radius: 50%;
background: #ef4444;
transition: all 0.2s;
}
.record-btn.recording .record-icon {
border-radius: 4px;
width: 20px;
height: 20px;
}
@keyframes pulse-ring {
0% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); }
70% { box-shadow: 0 0 0 10px rgba(239, 68, 68, 0); }
100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0); }
}
.status {
font-size: 12px;
color: #64748b;
}
.status.recording {
color: #ef4444;
font-weight: 600;
}
.duration {
font-family: "Monaco", "Consolas", monospace;
font-size: 14px;
color: #1e293b;
}
.transcript-area {
flex: 1;
overflow-y: auto;
padding: 12px;
}
.placeholder {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
color: #94a3b8;
text-align: center;
gap: 8px;
}
.placeholder-icon {
font-size: 48px;
opacity: 0.5;
}
.transcript {
font-size: 14px;
line-height: 1.6;
color: #1e293b;
}
.transcript-segment {
margin-bottom: 12px;
padding: 8px 12px;
background: #f8fafc;
border-radius: 8px;
border-left: 3px solid #14b8a6;
}
.segment-time {
font-size: 11px;
color: #64748b;
margin-bottom: 4px;
font-family: "Monaco", "Consolas", monospace;
}
.segment-text {
color: #1e293b;
}
.segment-text.interim {
color: #94a3b8;
font-style: italic;
}
.actions {
display: flex;
gap: 8px;
padding: 12px;
border-top: 1px solid #e2e8f0;
}
.action-btn {
flex: 1;
padding: 8px 12px;
border: 2px solid #e2e8f0;
border-radius: 6px;
background: white;
cursor: pointer;
font-size: 12px;
font-weight: 500;
color: #64748b;
transition: all 0.2s;
}
.action-btn:hover {
border-color: #14b8a6;
color: #14b8a6;
}
.error {
color: #ef4444;
padding: 12px;
background: #fef2f2;
border-radius: 6px;
font-size: 13px;
margin: 12px;
}
`;
export interface TranscriptSegment {
id: string;
text: string;
timestamp: number;
isFinal: boolean;
}
declare global {
interface HTMLElementTagNameMap {
"folk-transcription": FolkTranscription;
}
}
export class FolkTranscription extends FolkShape {
static override tagName = "folk-transcription";
static {
const sheet = new CSSStyleSheet();
const parentRules = Array.from(FolkShape.styles.cssRules)
.map((r) => r.cssText)
.join("\n");
const childRules = Array.from(styles.cssRules)
.map((r) => r.cssText)
.join("\n");
sheet.replaceSync(`${parentRules}\n${childRules}`);
this.styles = sheet;
}
#segments: TranscriptSegment[] = [];
#isRecording = false;
#duration = 0;
#durationInterval: ReturnType<typeof setInterval> | null = null;
#recognition: SpeechRecognition | null = null;
#error: string | null = null;
#recordBtn: HTMLElement | null = null;
#statusEl: HTMLElement | null = null;
#durationEl: HTMLElement | null = null;
#transcriptArea: HTMLElement | null = null;
get segments() {
return this.#segments;
}
get transcript() {
return this.#segments
.filter((s) => s.isFinal)
.map((s) => s.text)
.join(" ");
}
override createRenderRoot() {
const root = super.createRenderRoot();
const wrapper = document.createElement("div");
wrapper.innerHTML = html`
<div class="header">
<span class="header-title">
<span>🎤</span>
<span>Transcription</span>
</span>
<div class="header-actions">
<button class="close-btn" title="Close">×</button>
</div>
</div>
<div class="content">
<div class="controls">
<button class="record-btn" title="Start/Stop Recording">
<span class="record-icon"></span>
</button>
<div>
<div class="status">Ready to record</div>
<div class="duration">00:00</div>
</div>
</div>
<div class="transcript-area">
<div class="placeholder">
<span class="placeholder-icon">🎤</span>
<span>Click the record button to start</span>
<span style="font-size: 11px;">Uses your browser's speech recognition</span>
</div>
</div>
<div class="actions">
<button class="action-btn copy-btn">📋 Copy</button>
<button class="action-btn clear-btn">🗑 Clear</button>
</div>
</div>
`;
// Replace the container div (slot's parent) with our wrapper
const slot = root.querySelector("slot");
const containerDiv = slot?.parentElement as HTMLElement;
if (containerDiv) {
containerDiv.replaceWith(wrapper);
}
this.#recordBtn = wrapper.querySelector(".record-btn");
this.#statusEl = wrapper.querySelector(".status");
this.#durationEl = wrapper.querySelector(".duration");
this.#transcriptArea = wrapper.querySelector(".transcript-area");
const copyBtn = wrapper.querySelector(".copy-btn") as HTMLButtonElement;
const clearBtn = wrapper.querySelector(".clear-btn") as HTMLButtonElement;
const closeBtn = wrapper.querySelector(".close-btn") as HTMLButtonElement;
// Record button
this.#recordBtn?.addEventListener("click", (e) => {
e.stopPropagation();
this.#toggleRecording();
});
// Copy button
copyBtn.addEventListener("click", (e) => {
e.stopPropagation();
this.#copyTranscript();
});
// Clear button
clearBtn.addEventListener("click", (e) => {
e.stopPropagation();
this.#clearTranscript();
});
// Close button
closeBtn.addEventListener("click", (e) => {
e.stopPropagation();
this.#stopRecording();
this.dispatchEvent(new CustomEvent("close"));
});
// Initialize speech recognition
this.#initSpeechRecognition();
return root;
}
#initSpeechRecognition() {
const SpeechRecognitionImpl = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognitionImpl) {
this.#error = "Speech recognition not supported in this browser";
this.#renderError();
return;
}
this.#recognition = new SpeechRecognitionImpl();
this.#recognition.continuous = true;
this.#recognition.interimResults = true;
this.#recognition.lang = "en-US";
this.#recognition.onresult = (event) => {
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
const text = result[0].transcript;
if (result.isFinal) {
// Find and update interim segment or add new
const interimIdx = this.#segments.findIndex((s) => !s.isFinal);
if (interimIdx >= 0) {
this.#segments[interimIdx] = {
...this.#segments[interimIdx],
text,
isFinal: true,
};
} else {
this.#segments.push({
id: crypto.randomUUID(),
text,
timestamp: this.#duration,
isFinal: true,
});
}
} else {
// Update or add interim
const interimIdx = this.#segments.findIndex((s) => !s.isFinal);
if (interimIdx >= 0) {
this.#segments[interimIdx].text = text;
} else {
this.#segments.push({
id: crypto.randomUUID(),
text,
timestamp: this.#duration,
isFinal: false,
});
}
}
}
this.#renderTranscript();
};
this.#recognition.onerror = (event) => {
console.error("Speech recognition error:", event.error);
if (event.error !== "no-speech") {
this.#error = `Recognition error: ${event.error}`;
this.#renderError();
}
};
this.#recognition.onend = () => {
// Restart if still supposed to be recording
if (this.#isRecording && this.#recognition) {
this.#recognition.start();
}
};
}
#toggleRecording() {
if (this.#isRecording) {
this.#stopRecording();
} else {
this.#startRecording();
}
}
#startRecording() {
if (!this.#recognition) {
this.#error = "Speech recognition not available";
this.#renderError();
return;
}
try {
this.#recognition.start();
this.#isRecording = true;
this.#error = null;
this.#recordBtn?.classList.add("recording");
if (this.#statusEl) {
this.#statusEl.textContent = "Recording...";
this.#statusEl.classList.add("recording");
}
// Start duration timer
this.#durationInterval = setInterval(() => {
this.#duration++;
this.#updateDuration();
}, 1000);
this.dispatchEvent(new CustomEvent("recording-start"));
} catch (error) {
this.#error = "Failed to start recording";
this.#renderError();
}
}
#stopRecording() {
if (!this.#isRecording) return;
this.#recognition?.stop();
this.#isRecording = false;
this.#recordBtn?.classList.remove("recording");
if (this.#statusEl) {
this.#statusEl.textContent = "Stopped";
this.#statusEl.classList.remove("recording");
}
// Stop duration timer
if (this.#durationInterval) {
clearInterval(this.#durationInterval);
this.#durationInterval = null;
}
// Remove any interim segments
this.#segments = this.#segments.filter((s) => s.isFinal);
this.#renderTranscript();
this.dispatchEvent(new CustomEvent("recording-stop", { detail: { transcript: this.transcript } }));
}
#updateDuration() {
if (!this.#durationEl) return;
const mins = Math.floor(this.#duration / 60)
.toString()
.padStart(2, "0");
const secs = (this.#duration % 60).toString().padStart(2, "0");
this.#durationEl.textContent = `${mins}:${secs}`;
}
#renderTranscript() {
if (!this.#transcriptArea) return;
if (this.#segments.length === 0) {
this.#transcriptArea.innerHTML = `
<div class="placeholder">
<span class="placeholder-icon">🎤</span>
<span>Click the record button to start</span>
<span style="font-size: 11px;">Uses your browser's speech recognition</span>
</div>
`;
return;
}
this.#transcriptArea.innerHTML = this.#segments
.map(
(segment) => `
<div class="transcript-segment">
<div class="segment-time">${this.#formatTime(segment.timestamp)}</div>
<div class="segment-text ${segment.isFinal ? "" : "interim"}">${this.#escapeHtml(segment.text)}</div>
</div>
`
)
.join("");
// Scroll to bottom
this.#transcriptArea.scrollTop = this.#transcriptArea.scrollHeight;
}
#renderError() {
if (!this.#transcriptArea || !this.#error) return;
this.#transcriptArea.innerHTML = `<div class="error">${this.#escapeHtml(this.#error)}</div>`;
}
#formatTime(seconds: number): string {
const mins = Math.floor(seconds / 60)
.toString()
.padStart(2, "0");
const secs = (seconds % 60).toString().padStart(2, "0");
return `${mins}:${secs}`;
}
async #copyTranscript() {
try {
await navigator.clipboard.writeText(this.transcript);
this.dispatchEvent(new CustomEvent("copied"));
} catch {
console.error("Failed to copy transcript");
}
}
#clearTranscript() {
this.#segments = [];
this.#duration = 0;
this.#updateDuration();
this.#renderTranscript();
}
#escapeHtml(text: string): string {
const div = document.createElement("div");
div.textContent = text;
return div.innerHTML;
}
override toJSON() {
return {
...super.toJSON(),
type: "folk-transcription",
transcript: this.transcript,
segments: this.segments.map((s) => ({
...s,
})),
};
}
}