rspace-online/lib/folk-transcription.ts

616 lines
14 KiB
TypeScript

import { FolkShape } from "./folk-shape";
import { css, html } from "./tags";
// Web Speech API types (not all browsers have these in their types)
interface SpeechRecognitionResult {
readonly length: number;
item(index: number): SpeechRecognitionAlternative;
[index: number]: SpeechRecognitionAlternative;
readonly isFinal: boolean;
}
interface SpeechRecognitionAlternative {
readonly transcript: string;
readonly confidence: number;
}
interface SpeechRecognitionResultList {
readonly length: number;
item(index: number): SpeechRecognitionResult;
[index: number]: SpeechRecognitionResult;
}
interface SpeechRecognitionEvent extends Event {
readonly resultIndex: number;
readonly results: SpeechRecognitionResultList;
}
interface SpeechRecognitionErrorEvent extends Event {
readonly error: string;
readonly message: string;
}
interface SpeechRecognition extends EventTarget {
continuous: boolean;
interimResults: boolean;
lang: string;
onresult: ((event: SpeechRecognitionEvent) => void) | null;
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null;
onend: (() => void) | null;
start(): void;
stop(): void;
}
interface SpeechRecognitionConstructor {
new (): SpeechRecognition;
}
declare global {
interface Window {
SpeechRecognition?: SpeechRecognitionConstructor;
webkitSpeechRecognition?: SpeechRecognitionConstructor;
}
}
const styles = css`
:host {
background: white;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
min-width: 350px;
min-height: 400px;
}
.header {
display: flex;
align-items: center;
justify-content: space-between;
padding: 8px 12px;
background: linear-gradient(135deg, #14b8a6, #06b6d4);
color: white;
border-radius: 8px 8px 0 0;
font-size: 12px;
font-weight: 600;
cursor: move;
}
.header-title {
display: flex;
align-items: center;
gap: 6px;
}
.header-actions {
display: flex;
gap: 4px;
}
.header-actions button {
background: transparent;
border: none;
color: white;
cursor: pointer;
padding: 2px 6px;
border-radius: 4px;
font-size: 14px;
}
.header-actions button:hover {
background: rgba(255, 255, 255, 0.2);
}
.content {
display: flex;
flex-direction: column;
height: calc(100% - 36px);
overflow: hidden;
}
.controls {
display: flex;
align-items: center;
justify-content: center;
gap: 16px;
padding: 16px;
border-bottom: 1px solid #e2e8f0;
}
.record-btn {
width: 64px;
height: 64px;
border-radius: 50%;
border: 4px solid #e2e8f0;
background: white;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: all 0.2s;
}
.record-btn:hover {
border-color: #14b8a6;
}
.record-btn.recording {
border-color: #ef4444;
animation: pulse-ring 1.5s infinite;
}
.record-icon {
width: 24px;
height: 24px;
border-radius: 50%;
background: #ef4444;
transition: all 0.2s;
}
.record-btn.recording .record-icon {
border-radius: 4px;
width: 20px;
height: 20px;
}
@keyframes pulse-ring {
0% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); }
70% { box-shadow: 0 0 0 10px rgba(239, 68, 68, 0); }
100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0); }
}
.status {
font-size: 12px;
color: #64748b;
}
.status.recording {
color: #ef4444;
font-weight: 600;
}
.duration {
font-family: "Monaco", "Consolas", monospace;
font-size: 14px;
color: #1e293b;
}
.transcript-area {
flex: 1;
overflow-y: auto;
padding: 12px;
}
.placeholder {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
color: #94a3b8;
text-align: center;
gap: 8px;
}
.placeholder-icon {
font-size: 48px;
opacity: 0.5;
}
.transcript {
font-size: 14px;
line-height: 1.6;
color: #1e293b;
}
.transcript-segment {
margin-bottom: 12px;
padding: 8px 12px;
background: #f8fafc;
border-radius: 8px;
border-left: 3px solid #14b8a6;
}
.segment-time {
font-size: 11px;
color: #64748b;
margin-bottom: 4px;
font-family: "Monaco", "Consolas", monospace;
}
.segment-text {
color: #1e293b;
}
.segment-text.interim {
color: #94a3b8;
font-style: italic;
}
.actions {
display: flex;
gap: 8px;
padding: 12px;
border-top: 1px solid #e2e8f0;
}
.action-btn {
flex: 1;
padding: 8px 12px;
border: 2px solid #e2e8f0;
border-radius: 6px;
background: white;
cursor: pointer;
font-size: 12px;
font-weight: 500;
color: #64748b;
transition: all 0.2s;
}
.action-btn:hover {
border-color: #14b8a6;
color: #14b8a6;
}
.error {
color: #ef4444;
padding: 12px;
background: #fef2f2;
border-radius: 6px;
font-size: 13px;
margin: 12px;
}
`;
export interface TranscriptSegment {
id: string;
text: string;
timestamp: number;
isFinal: boolean;
}
declare global {
interface HTMLElementTagNameMap {
"folk-transcription": FolkTranscription;
}
}
export class FolkTranscription extends FolkShape {
static override tagName = "folk-transcription";
static {
const sheet = new CSSStyleSheet();
const parentRules = Array.from(FolkShape.styles.cssRules)
.map((r) => r.cssText)
.join("\n");
const childRules = Array.from(styles.cssRules)
.map((r) => r.cssText)
.join("\n");
sheet.replaceSync(`${parentRules}\n${childRules}`);
this.styles = sheet;
}
#segments: TranscriptSegment[] = [];
#isRecording = false;
#duration = 0;
#durationInterval: ReturnType<typeof setInterval> | null = null;
#recognition: SpeechRecognition | null = null;
#error: string | null = null;
#recordBtn: HTMLElement | null = null;
#statusEl: HTMLElement | null = null;
#durationEl: HTMLElement | null = null;
#transcriptArea: HTMLElement | null = null;
get segments() {
return this.#segments;
}
get transcript() {
return this.#segments
.filter((s) => s.isFinal)
.map((s) => s.text)
.join(" ");
}
override createRenderRoot() {
const root = super.createRenderRoot();
const wrapper = document.createElement("div");
wrapper.innerHTML = html`
<div class="header">
<span class="header-title">
<span>\u{1F3A4}</span>
<span>Transcription</span>
</span>
<div class="header-actions">
<button class="close-btn" title="Close">\u00D7</button>
</div>
</div>
<div class="content">
<div class="controls">
<button class="record-btn" title="Start/Stop Recording">
<span class="record-icon"></span>
</button>
<div>
<div class="status">Ready to record</div>
<div class="duration">00:00</div>
</div>
</div>
<div class="transcript-area">
<div class="placeholder">
<span class="placeholder-icon">\u{1F3A4}</span>
<span>Click the record button to start</span>
<span style="font-size: 11px;">Uses your browser's speech recognition</span>
</div>
</div>
<div class="actions">
<button class="action-btn copy-btn">\u{1F4CB} Copy</button>
<button class="action-btn clear-btn">\u{1F5D1} Clear</button>
</div>
</div>
`;
// Replace the container div (slot's parent) with our wrapper
const slot = root.querySelector("slot");
const containerDiv = slot?.parentElement as HTMLElement;
if (containerDiv) {
containerDiv.replaceWith(wrapper);
}
this.#recordBtn = wrapper.querySelector(".record-btn");
this.#statusEl = wrapper.querySelector(".status");
this.#durationEl = wrapper.querySelector(".duration");
this.#transcriptArea = wrapper.querySelector(".transcript-area");
const copyBtn = wrapper.querySelector(".copy-btn") as HTMLButtonElement;
const clearBtn = wrapper.querySelector(".clear-btn") as HTMLButtonElement;
const closeBtn = wrapper.querySelector(".close-btn") as HTMLButtonElement;
// Record button
this.#recordBtn?.addEventListener("click", (e) => {
e.stopPropagation();
this.#toggleRecording();
});
// Copy button
copyBtn.addEventListener("click", (e) => {
e.stopPropagation();
this.#copyTranscript();
});
// Clear button
clearBtn.addEventListener("click", (e) => {
e.stopPropagation();
this.#clearTranscript();
});
// Close button
closeBtn.addEventListener("click", (e) => {
e.stopPropagation();
this.#stopRecording();
this.dispatchEvent(new CustomEvent("close"));
});
// Initialize speech recognition
this.#initSpeechRecognition();
return root;
}
#initSpeechRecognition() {
const SpeechRecognitionImpl = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognitionImpl) {
this.#error = "Speech recognition not supported in this browser";
this.#renderError();
return;
}
this.#recognition = new SpeechRecognitionImpl();
this.#recognition.continuous = true;
this.#recognition.interimResults = true;
this.#recognition.lang = "en-US";
this.#recognition.onresult = (event) => {
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
const text = result[0].transcript;
if (result.isFinal) {
// Find and update interim segment or add new
const interimIdx = this.#segments.findIndex((s) => !s.isFinal);
if (interimIdx >= 0) {
this.#segments[interimIdx] = {
...this.#segments[interimIdx],
text,
isFinal: true,
};
} else {
this.#segments.push({
id: crypto.randomUUID(),
text,
timestamp: this.#duration,
isFinal: true,
});
}
} else {
// Update or add interim
const interimIdx = this.#segments.findIndex((s) => !s.isFinal);
if (interimIdx >= 0) {
this.#segments[interimIdx].text = text;
} else {
this.#segments.push({
id: crypto.randomUUID(),
text,
timestamp: this.#duration,
isFinal: false,
});
}
}
}
this.#renderTranscript();
};
this.#recognition.onerror = (event) => {
console.error("Speech recognition error:", event.error);
if (event.error !== "no-speech") {
this.#error = `Recognition error: ${event.error}`;
this.#renderError();
}
};
this.#recognition.onend = () => {
// Restart if still supposed to be recording
if (this.#isRecording && this.#recognition) {
this.#recognition.start();
}
};
}
#toggleRecording() {
if (this.#isRecording) {
this.#stopRecording();
} else {
this.#startRecording();
}
}
#startRecording() {
if (!this.#recognition) {
this.#error = "Speech recognition not available";
this.#renderError();
return;
}
try {
this.#recognition.start();
this.#isRecording = true;
this.#error = null;
this.#recordBtn?.classList.add("recording");
if (this.#statusEl) {
this.#statusEl.textContent = "Recording...";
this.#statusEl.classList.add("recording");
}
// Start duration timer
this.#durationInterval = setInterval(() => {
this.#duration++;
this.#updateDuration();
}, 1000);
this.dispatchEvent(new CustomEvent("recording-start"));
} catch (error) {
this.#error = "Failed to start recording";
this.#renderError();
}
}
#stopRecording() {
if (!this.#isRecording) return;
this.#recognition?.stop();
this.#isRecording = false;
this.#recordBtn?.classList.remove("recording");
if (this.#statusEl) {
this.#statusEl.textContent = "Stopped";
this.#statusEl.classList.remove("recording");
}
// Stop duration timer
if (this.#durationInterval) {
clearInterval(this.#durationInterval);
this.#durationInterval = null;
}
// Remove any interim segments
this.#segments = this.#segments.filter((s) => s.isFinal);
this.#renderTranscript();
this.dispatchEvent(new CustomEvent("recording-stop", { detail: { transcript: this.transcript } }));
}
#updateDuration() {
if (!this.#durationEl) return;
const mins = Math.floor(this.#duration / 60)
.toString()
.padStart(2, "0");
const secs = (this.#duration % 60).toString().padStart(2, "0");
this.#durationEl.textContent = `${mins}:${secs}`;
}
#renderTranscript() {
if (!this.#transcriptArea) return;
if (this.#segments.length === 0) {
this.#transcriptArea.innerHTML = `
<div class="placeholder">
<span class="placeholder-icon">\u{1F3A4}</span>
<span>Click the record button to start</span>
<span style="font-size: 11px;">Uses your browser's speech recognition</span>
</div>
`;
return;
}
this.#transcriptArea.innerHTML = this.#segments
.map(
(segment) => `
<div class="transcript-segment">
<div class="segment-time">${this.#formatTime(segment.timestamp)}</div>
<div class="segment-text ${segment.isFinal ? "" : "interim"}">${this.#escapeHtml(segment.text)}</div>
</div>
`
)
.join("");
// Scroll to bottom
this.#transcriptArea.scrollTop = this.#transcriptArea.scrollHeight;
}
#renderError() {
if (!this.#transcriptArea || !this.#error) return;
this.#transcriptArea.innerHTML = `<div class="error">${this.#escapeHtml(this.#error)}</div>`;
}
#formatTime(seconds: number): string {
const mins = Math.floor(seconds / 60)
.toString()
.padStart(2, "0");
const secs = (seconds % 60).toString().padStart(2, "0");
return `${mins}:${secs}`;
}
async #copyTranscript() {
try {
await navigator.clipboard.writeText(this.transcript);
this.dispatchEvent(new CustomEvent("copied"));
} catch {
console.error("Failed to copy transcript");
}
}
#clearTranscript() {
this.#segments = [];
this.#duration = 0;
this.#updateDuration();
this.#renderTranscript();
}
#escapeHtml(text: string): string {
const div = document.createElement("div");
div.textContent = text;
return div.innerHTML;
}
override toJSON() {
return {
...super.toJSON(),
type: "folk-transcription",
transcript: this.transcript,
segments: this.segments.map((s) => ({
...s,
})),
};
}
}