feat: add rVoice popup recorder with 3-tier transcription to browser extension
Adds a standalone voice recording popup (voice.html) accessible via the extension popup button or Ctrl+Shift+V hotkey. Records audio, uploads to rNotes, and transcribes with a 3-tier cascade: server Whisper API, live Web Speech API (real-time text while recording), and offline Parakeet.js (NVIDIA 0.6B, ~634MB cached in IndexedDB). Saves as AUDIO notes with editable transcript and notebook selection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3d77eae16b
commit
5ff6c9d832
|
|
@ -35,6 +35,9 @@
|
||||||
"page": "options.html",
|
"page": "options.html",
|
||||||
"open_in_tab": false
|
"open_in_tab": false
|
||||||
},
|
},
|
||||||
|
"content_security_policy": {
|
||||||
|
"extension_pages": "script-src 'self' https://esm.sh; object-src 'self'"
|
||||||
|
},
|
||||||
"commands": {
|
"commands": {
|
||||||
"open-voice-recorder": {
|
"open-voice-recorder": {
|
||||||
"suggested_key": {
|
"suggested_key": {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,147 @@
|
||||||
|
/**
|
||||||
|
* Offline transcription using parakeet.js (NVIDIA Parakeet TDT 0.6B v2).
|
||||||
|
* Loaded at runtime from CDN. Model ~634 MB (int8) on first download,
|
||||||
|
* cached in IndexedDB after. Works fully offline after first download.
|
||||||
|
*
|
||||||
|
* Port of src/lib/parakeetOffline.ts for the browser extension.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const CACHE_KEY = 'parakeet-offline-cached';
|
||||||
|
|
||||||
|
// Singleton model — don't reload on subsequent calls
|
||||||
|
let cachedModel = null;
|
||||||
|
let loadingPromise = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the Parakeet model has been downloaded before.
|
||||||
|
*/
|
||||||
|
function isModelCached() {
|
||||||
|
try {
|
||||||
|
return localStorage.getItem(CACHE_KEY) === 'true';
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect WebGPU availability.
|
||||||
|
*/
|
||||||
|
async function detectWebGPU() {
|
||||||
|
if (!navigator.gpu) return false;
|
||||||
|
try {
|
||||||
|
const adapter = await navigator.gpu.requestAdapter();
|
||||||
|
return !!adapter;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get or create the Parakeet model singleton.
|
||||||
|
* @param {function} onProgress - callback({ status, progress, file, message })
|
||||||
|
*/
|
||||||
|
async function getModel(onProgress) {
|
||||||
|
if (cachedModel) return cachedModel;
|
||||||
|
if (loadingPromise) return loadingPromise;
|
||||||
|
|
||||||
|
loadingPromise = (async () => {
|
||||||
|
onProgress?.({ status: 'loading', message: 'Loading Parakeet model...' });
|
||||||
|
|
||||||
|
// Dynamic import from CDN at runtime
|
||||||
|
const { fromHub } = await import('https://esm.sh/parakeet.js@1.1.2');
|
||||||
|
|
||||||
|
const backend = (await detectWebGPU()) ? 'webgpu' : 'wasm';
|
||||||
|
const fileProgress = {};
|
||||||
|
|
||||||
|
const model = await fromHub('parakeet-tdt-0.6b-v2', {
|
||||||
|
backend,
|
||||||
|
progress: ({ file, loaded, total }) => {
|
||||||
|
fileProgress[file] = { loaded, total };
|
||||||
|
|
||||||
|
let totalBytes = 0;
|
||||||
|
let loadedBytes = 0;
|
||||||
|
for (const fp of Object.values(fileProgress)) {
|
||||||
|
totalBytes += fp.total || 0;
|
||||||
|
loadedBytes += fp.loaded || 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (totalBytes > 0) {
|
||||||
|
const pct = Math.round((loadedBytes / totalBytes) * 100);
|
||||||
|
onProgress?.({
|
||||||
|
status: 'downloading',
|
||||||
|
progress: pct,
|
||||||
|
file,
|
||||||
|
message: `Downloading model... ${pct}%`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
localStorage.setItem(CACHE_KEY, 'true');
|
||||||
|
onProgress?.({ status: 'loading', message: 'Model loaded' });
|
||||||
|
|
||||||
|
cachedModel = model;
|
||||||
|
loadingPromise = null;
|
||||||
|
return model;
|
||||||
|
})();
|
||||||
|
|
||||||
|
return loadingPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode an audio Blob to Float32Array at 16 kHz mono.
|
||||||
|
*/
|
||||||
|
async function decodeAudioBlob(blob) {
|
||||||
|
const arrayBuffer = await blob.arrayBuffer();
|
||||||
|
const audioCtx = new AudioContext({ sampleRate: 16000 });
|
||||||
|
try {
|
||||||
|
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
|
||||||
|
|
||||||
|
if (audioBuffer.sampleRate === 16000 && audioBuffer.numberOfChannels === 1) {
|
||||||
|
return audioBuffer.getChannelData(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resample via OfflineAudioContext
|
||||||
|
const numSamples = Math.ceil(audioBuffer.duration * 16000);
|
||||||
|
const offlineCtx = new OfflineAudioContext(1, numSamples, 16000);
|
||||||
|
const source = offlineCtx.createBufferSource();
|
||||||
|
source.buffer = audioBuffer;
|
||||||
|
source.connect(offlineCtx.destination);
|
||||||
|
source.start();
|
||||||
|
const resampled = await offlineCtx.startRendering();
|
||||||
|
return resampled.getChannelData(0);
|
||||||
|
} finally {
|
||||||
|
await audioCtx.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transcribe an audio Blob offline using Parakeet in the browser.
|
||||||
|
* First call downloads the model (~634 MB). Subsequent calls use cached.
|
||||||
|
*
|
||||||
|
* @param {Blob} audioBlob
|
||||||
|
* @param {function} onProgress - callback({ status, progress, file, message })
|
||||||
|
* @returns {Promise<string>} transcribed text
|
||||||
|
*/
|
||||||
|
async function transcribeOffline(audioBlob, onProgress) {
|
||||||
|
const model = await getModel(onProgress);
|
||||||
|
|
||||||
|
onProgress?.({ status: 'transcribing', message: 'Transcribing audio...' });
|
||||||
|
|
||||||
|
const audioData = await decodeAudioBlob(audioBlob);
|
||||||
|
|
||||||
|
const result = await model.transcribe(audioData, 16000, {
|
||||||
|
returnTimestamps: false,
|
||||||
|
enableProfiling: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
const text = result.utterance_text?.trim() || '';
|
||||||
|
onProgress?.({ status: 'done', message: 'Transcription complete' });
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export for use in voice.js (loaded as ES module)
|
||||||
|
window.ParakeetOffline = {
|
||||||
|
isModelCached,
|
||||||
|
transcribeOffline,
|
||||||
|
};
|
||||||
|
|
@ -175,6 +175,13 @@
|
||||||
color: #525252;
|
color: #525252;
|
||||||
font-style: italic;
|
font-style: italic;
|
||||||
}
|
}
|
||||||
|
.transcript-text .final-text {
|
||||||
|
color: #d4d4d4;
|
||||||
|
}
|
||||||
|
.transcript-text .interim-text {
|
||||||
|
color: #737373;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
/* Controls row */
|
/* Controls row */
|
||||||
.controls {
|
.controls {
|
||||||
|
|
@ -255,6 +262,61 @@
|
||||||
.status-bar.error { color: #fca5a5; background: #450a0a; border-top-color: #991b1b; }
|
.status-bar.error { color: #fca5a5; background: #450a0a; border-top-color: #991b1b; }
|
||||||
.status-bar.loading { color: #93c5fd; background: #172554; border-top-color: #1e40af; }
|
.status-bar.loading { color: #93c5fd; background: #172554; border-top-color: #1e40af; }
|
||||||
|
|
||||||
|
/* Live indicator */
|
||||||
|
.live-indicator {
|
||||||
|
display: none;
|
||||||
|
align-items: center;
|
||||||
|
gap: 5px;
|
||||||
|
font-size: 10px;
|
||||||
|
font-weight: 700;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 1.5px;
|
||||||
|
color: #4ade80;
|
||||||
|
}
|
||||||
|
.live-indicator.visible {
|
||||||
|
display: flex;
|
||||||
|
}
|
||||||
|
.live-indicator .dot {
|
||||||
|
width: 6px;
|
||||||
|
height: 6px;
|
||||||
|
border-radius: 50%;
|
||||||
|
background: #4ade80;
|
||||||
|
animation: pulse-dot 1s infinite;
|
||||||
|
}
|
||||||
|
@keyframes pulse-dot {
|
||||||
|
0%, 100% { opacity: 1; }
|
||||||
|
50% { opacity: 0.3; }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Progress bar (for model download) */
|
||||||
|
.progress-area {
|
||||||
|
width: 100%;
|
||||||
|
padding: 0 14px 8px;
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
.progress-area.visible {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
.progress-label {
|
||||||
|
font-size: 11px;
|
||||||
|
color: #a3a3a3;
|
||||||
|
margin-bottom: 4px;
|
||||||
|
}
|
||||||
|
.progress-bar {
|
||||||
|
width: 100%;
|
||||||
|
height: 6px;
|
||||||
|
background: #262626;
|
||||||
|
border-radius: 3px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
.progress-bar .fill {
|
||||||
|
height: 100%;
|
||||||
|
background: #f59e0b;
|
||||||
|
border-radius: 3px;
|
||||||
|
transition: width 0.3s;
|
||||||
|
width: 0%;
|
||||||
|
}
|
||||||
|
|
||||||
/* Audio preview */
|
/* Audio preview */
|
||||||
.audio-preview {
|
.audio-preview {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
|
|
@ -305,6 +367,15 @@
|
||||||
<div class="inner"></div>
|
<div class="inner"></div>
|
||||||
</button>
|
</button>
|
||||||
<div class="timer" id="timer">00:00</div>
|
<div class="timer" id="timer">00:00</div>
|
||||||
|
<div class="live-indicator" id="liveIndicator">
|
||||||
|
<span class="dot"></span>
|
||||||
|
Live transcribe
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="progress-area" id="progressArea">
|
||||||
|
<div class="progress-label" id="progressLabel">Loading model...</div>
|
||||||
|
<div class="progress-bar"><div class="fill" id="progressFill"></div></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="audio-preview" id="audioPreview">
|
<div class="audio-preview" id="audioPreview">
|
||||||
|
|
@ -334,9 +405,10 @@
|
||||||
<div class="status-bar" id="statusBar"></div>
|
<div class="status-bar" id="statusBar"></div>
|
||||||
|
|
||||||
<div class="kbd-hint">
|
<div class="kbd-hint">
|
||||||
<kbd>Space</kbd> to record · <kbd>Esc</kbd> to close
|
<kbd>Space</kbd> to record · <kbd>Esc</kbd> to close · Offline ready
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<script src="parakeet-offline.js" type="module"></script>
|
||||||
<script src="voice.js"></script>
|
<script src="voice.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -9,17 +9,23 @@ let startTime = 0;
|
||||||
let audioBlob = null;
|
let audioBlob = null;
|
||||||
let audioUrl = null;
|
let audioUrl = null;
|
||||||
let transcript = '';
|
let transcript = '';
|
||||||
|
let liveTranscript = ''; // accumulated from Web Speech API
|
||||||
let uploadedFileUrl = '';
|
let uploadedFileUrl = '';
|
||||||
let uploadedMimeType = '';
|
let uploadedMimeType = '';
|
||||||
let uploadedFileSize = 0;
|
let uploadedFileSize = 0;
|
||||||
let duration = 0;
|
let duration = 0;
|
||||||
|
|
||||||
|
// Web Speech API
|
||||||
|
let recognition = null;
|
||||||
|
let speechSupported = !!(window.SpeechRecognition || window.webkitSpeechRecognition);
|
||||||
|
|
||||||
// --- DOM refs ---
|
// --- DOM refs ---
|
||||||
const recBtn = document.getElementById('recBtn');
|
const recBtn = document.getElementById('recBtn');
|
||||||
const timerEl = document.getElementById('timer');
|
const timerEl = document.getElementById('timer');
|
||||||
const statusLabel = document.getElementById('statusLabel');
|
const statusLabel = document.getElementById('statusLabel');
|
||||||
const transcriptArea = document.getElementById('transcriptArea');
|
const transcriptArea = document.getElementById('transcriptArea');
|
||||||
const transcriptText = document.getElementById('transcriptText');
|
const transcriptText = document.getElementById('transcriptText');
|
||||||
|
const liveIndicator = document.getElementById('liveIndicator');
|
||||||
const audioPreview = document.getElementById('audioPreview');
|
const audioPreview = document.getElementById('audioPreview');
|
||||||
const audioPlayer = document.getElementById('audioPlayer');
|
const audioPlayer = document.getElementById('audioPlayer');
|
||||||
const notebookSelect = document.getElementById('notebook');
|
const notebookSelect = document.getElementById('notebook');
|
||||||
|
|
@ -70,6 +76,36 @@ function showStatusBar(message, type) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Parakeet progress UI ---
|
||||||
|
|
||||||
|
const progressArea = document.getElementById('progressArea');
|
||||||
|
const progressLabel = document.getElementById('progressLabel');
|
||||||
|
const progressFill = document.getElementById('progressFill');
|
||||||
|
|
||||||
|
function showParakeetProgress(p) {
|
||||||
|
if (!progressArea) return;
|
||||||
|
progressArea.classList.add('visible');
|
||||||
|
|
||||||
|
if (p.message) {
|
||||||
|
progressLabel.textContent = p.message;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p.status === 'downloading' && p.progress !== undefined) {
|
||||||
|
progressFill.style.width = `${p.progress}%`;
|
||||||
|
} else if (p.status === 'transcribing') {
|
||||||
|
progressFill.style.width = '100%';
|
||||||
|
} else if (p.status === 'loading') {
|
||||||
|
progressFill.style.width = '0%';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function hideParakeetProgress() {
|
||||||
|
if (progressArea) {
|
||||||
|
progressArea.classList.remove('visible');
|
||||||
|
progressFill.style.width = '0%';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// --- Notebook loader ---
|
// --- Notebook loader ---
|
||||||
|
|
||||||
async function loadNotebooks() {
|
async function loadNotebooks() {
|
||||||
|
|
@ -103,6 +139,97 @@ notebookSelect.addEventListener('change', (e) => {
|
||||||
chrome.storage.local.set({ lastNotebookId: e.target.value });
|
chrome.storage.local.set({ lastNotebookId: e.target.value });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// --- Live transcription (Web Speech API) ---
|
||||||
|
|
||||||
|
function startLiveTranscription() {
|
||||||
|
if (!speechSupported) return;
|
||||||
|
|
||||||
|
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||||
|
recognition = new SpeechRecognition();
|
||||||
|
recognition.continuous = true;
|
||||||
|
recognition.interimResults = true;
|
||||||
|
recognition.lang = 'en-US';
|
||||||
|
|
||||||
|
let finalizedText = '';
|
||||||
|
|
||||||
|
recognition.onresult = (event) => {
|
||||||
|
let interimText = '';
|
||||||
|
// Rebuild finalized text from all final results
|
||||||
|
finalizedText = '';
|
||||||
|
for (let i = 0; i < event.results.length; i++) {
|
||||||
|
const result = event.results[i];
|
||||||
|
if (result.isFinal) {
|
||||||
|
finalizedText += result[0].transcript.trim() + ' ';
|
||||||
|
} else {
|
||||||
|
interimText += result[0].transcript;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
liveTranscript = finalizedText.trim();
|
||||||
|
|
||||||
|
// Update the live transcript display
|
||||||
|
updateLiveDisplay(finalizedText.trim(), interimText.trim());
|
||||||
|
};
|
||||||
|
|
||||||
|
recognition.onerror = (event) => {
|
||||||
|
if (event.error !== 'aborted' && event.error !== 'no-speech') {
|
||||||
|
console.warn('Speech recognition error:', event.error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Auto-restart on end (Chrome stops after ~60s of silence)
|
||||||
|
recognition.onend = () => {
|
||||||
|
if (state === 'recording' && recognition) {
|
||||||
|
try { recognition.start(); } catch {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
recognition.start();
|
||||||
|
if (liveIndicator) liveIndicator.classList.add('visible');
|
||||||
|
} catch (err) {
|
||||||
|
console.warn('Could not start speech recognition:', err);
|
||||||
|
speechSupported = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function stopLiveTranscription() {
|
||||||
|
if (recognition) {
|
||||||
|
const ref = recognition;
|
||||||
|
recognition = null;
|
||||||
|
try { ref.stop(); } catch {}
|
||||||
|
}
|
||||||
|
if (liveIndicator) liveIndicator.classList.remove('visible');
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateLiveDisplay(finalText, interimText) {
|
||||||
|
if (state !== 'recording') return;
|
||||||
|
|
||||||
|
// Show transcript area while recording
|
||||||
|
transcriptArea.classList.add('visible');
|
||||||
|
|
||||||
|
let html = '';
|
||||||
|
if (finalText) {
|
||||||
|
html += `<span class="final-text">${escapeHtml(finalText)}</span>`;
|
||||||
|
}
|
||||||
|
if (interimText) {
|
||||||
|
html += `<span class="interim-text">${escapeHtml(interimText)}</span>`;
|
||||||
|
}
|
||||||
|
if (!finalText && !interimText) {
|
||||||
|
html = '<span class="placeholder">Listening...</span>';
|
||||||
|
}
|
||||||
|
transcriptText.innerHTML = html;
|
||||||
|
|
||||||
|
// Auto-scroll
|
||||||
|
transcriptText.scrollTop = transcriptText.scrollHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeHtml(text) {
|
||||||
|
const div = document.createElement('div');
|
||||||
|
div.textContent = text;
|
||||||
|
return div.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
// --- Recording ---
|
// --- Recording ---
|
||||||
|
|
||||||
async function startRecording() {
|
async function startRecording() {
|
||||||
|
|
@ -115,6 +242,7 @@ async function startRecording() {
|
||||||
|
|
||||||
mediaRecorder = new MediaRecorder(stream, { mimeType });
|
mediaRecorder = new MediaRecorder(stream, { mimeType });
|
||||||
audioChunks = [];
|
audioChunks = [];
|
||||||
|
liveTranscript = '';
|
||||||
|
|
||||||
mediaRecorder.ondataavailable = (e) => {
|
mediaRecorder.ondataavailable = (e) => {
|
||||||
if (e.data.size > 0) audioChunks.push(e.data);
|
if (e.data.size > 0) audioChunks.push(e.data);
|
||||||
|
|
@ -130,14 +258,24 @@ async function startRecording() {
|
||||||
setStatusLabel('Recording', 'recording');
|
setStatusLabel('Recording', 'recording');
|
||||||
postActions.style.display = 'none';
|
postActions.style.display = 'none';
|
||||||
audioPreview.classList.remove('visible');
|
audioPreview.classList.remove('visible');
|
||||||
transcriptArea.classList.remove('visible');
|
|
||||||
statusBar.className = 'status-bar';
|
statusBar.className = 'status-bar';
|
||||||
|
|
||||||
|
// Show transcript area with listening placeholder
|
||||||
|
if (speechSupported) {
|
||||||
|
transcriptArea.classList.add('visible');
|
||||||
|
transcriptText.innerHTML = '<span class="placeholder">Listening...</span>';
|
||||||
|
} else {
|
||||||
|
transcriptArea.classList.remove('visible');
|
||||||
|
}
|
||||||
|
|
||||||
timerInterval = setInterval(() => {
|
timerInterval = setInterval(() => {
|
||||||
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
||||||
timerEl.textContent = formatTime(elapsed);
|
timerEl.textContent = formatTime(elapsed);
|
||||||
}, 1000);
|
}, 1000);
|
||||||
|
|
||||||
|
// Start live transcription alongside recording
|
||||||
|
startLiveTranscription();
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
showStatusBar(err.message || 'Microphone access denied', 'error');
|
showStatusBar(err.message || 'Microphone access denied', 'error');
|
||||||
}
|
}
|
||||||
|
|
@ -150,6 +288,12 @@ async function stopRecording() {
|
||||||
timerInterval = null;
|
timerInterval = null;
|
||||||
duration = Math.floor((Date.now() - startTime) / 1000);
|
duration = Math.floor((Date.now() - startTime) / 1000);
|
||||||
|
|
||||||
|
// Capture live transcript before stopping recognition
|
||||||
|
const capturedLiveTranscript = liveTranscript;
|
||||||
|
|
||||||
|
// Stop live transcription
|
||||||
|
stopLiveTranscription();
|
||||||
|
|
||||||
state = 'processing';
|
state = 'processing';
|
||||||
recBtn.classList.remove('recording');
|
recBtn.classList.remove('recording');
|
||||||
timerEl.classList.remove('recording');
|
timerEl.classList.remove('recording');
|
||||||
|
|
@ -170,17 +314,21 @@ async function stopRecording() {
|
||||||
audioPlayer.src = audioUrl;
|
audioPlayer.src = audioUrl;
|
||||||
audioPreview.classList.add('visible');
|
audioPreview.classList.add('visible');
|
||||||
|
|
||||||
// Show transcript area with placeholder
|
// Show live transcript while we process (if we have one)
|
||||||
transcriptArea.classList.add('visible');
|
transcriptArea.classList.add('visible');
|
||||||
transcriptText.innerHTML = '<span class="placeholder">Transcribing...</span>';
|
if (capturedLiveTranscript) {
|
||||||
|
transcriptText.textContent = capturedLiveTranscript;
|
||||||
|
showStatusBar('Improving transcript...', 'loading');
|
||||||
|
} else {
|
||||||
|
transcriptText.innerHTML = '<span class="placeholder">Transcribing...</span>';
|
||||||
|
showStatusBar('Uploading & transcribing...', 'loading');
|
||||||
|
}
|
||||||
|
|
||||||
// Upload audio file
|
// Upload audio file
|
||||||
const token = await getToken();
|
const token = await getToken();
|
||||||
const settings = await getSettings();
|
const settings = await getSettings();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
showStatusBar('Uploading recording...', 'loading');
|
|
||||||
|
|
||||||
const uploadForm = new FormData();
|
const uploadForm = new FormData();
|
||||||
uploadForm.append('file', audioBlob, 'voice-note.webm');
|
uploadForm.append('file', audioBlob, 'voice-note.webm');
|
||||||
|
|
||||||
|
|
@ -197,26 +345,50 @@ async function stopRecording() {
|
||||||
uploadedMimeType = uploadResult.mimeType;
|
uploadedMimeType = uploadResult.mimeType;
|
||||||
uploadedFileSize = uploadResult.size;
|
uploadedFileSize = uploadResult.size;
|
||||||
|
|
||||||
// Transcribe via batch API
|
// --- Three-tier transcription cascade ---
|
||||||
showStatusBar('Transcribing...', 'loading');
|
|
||||||
|
|
||||||
const transcribeForm = new FormData();
|
// Tier 1: Batch API (Whisper on server — highest quality)
|
||||||
transcribeForm.append('audio', audioBlob, 'voice-note.webm');
|
let bestTranscript = '';
|
||||||
|
try {
|
||||||
|
showStatusBar('Transcribing via server...', 'loading');
|
||||||
|
const transcribeForm = new FormData();
|
||||||
|
transcribeForm.append('audio', audioBlob, 'voice-note.webm');
|
||||||
|
|
||||||
const transcribeRes = await fetch(`${settings.host}/api/voice/transcribe`, {
|
const transcribeRes = await fetch(`${settings.host}/api/voice/transcribe`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Authorization': `Bearer ${token}` },
|
headers: { 'Authorization': `Bearer ${token}` },
|
||||||
body: transcribeForm,
|
body: transcribeForm,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (transcribeRes.ok) {
|
if (transcribeRes.ok) {
|
||||||
const transcribeResult = await transcribeRes.json();
|
const transcribeResult = await transcribeRes.json();
|
||||||
transcript = transcribeResult.text || '';
|
bestTranscript = transcribeResult.text || '';
|
||||||
} else {
|
}
|
||||||
transcript = '';
|
} catch {
|
||||||
console.warn('Transcription failed, saving without transcript');
|
console.warn('Tier 1 (batch API) unavailable');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tier 2: Live transcript from Web Speech API (already captured)
|
||||||
|
if (!bestTranscript && capturedLiveTranscript) {
|
||||||
|
bestTranscript = capturedLiveTranscript;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tier 3: Offline Parakeet.js (NVIDIA, runs in browser)
|
||||||
|
if (!bestTranscript && window.ParakeetOffline) {
|
||||||
|
try {
|
||||||
|
showStatusBar('Transcribing offline (Parakeet)...', 'loading');
|
||||||
|
bestTranscript = await window.ParakeetOffline.transcribeOffline(audioBlob, (p) => {
|
||||||
|
showParakeetProgress(p);
|
||||||
|
});
|
||||||
|
hideParakeetProgress();
|
||||||
|
} catch (offlineErr) {
|
||||||
|
console.warn('Tier 3 (Parakeet offline) failed:', offlineErr);
|
||||||
|
hideParakeetProgress();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
transcript = bestTranscript;
|
||||||
|
|
||||||
// Show transcript (editable)
|
// Show transcript (editable)
|
||||||
if (transcript) {
|
if (transcript) {
|
||||||
transcriptText.textContent = transcript;
|
transcriptText.textContent = transcript;
|
||||||
|
|
@ -230,6 +402,26 @@ async function stopRecording() {
|
||||||
statusBar.className = 'status-bar';
|
statusBar.className = 'status-bar';
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
// On upload error, try offline transcription directly
|
||||||
|
let fallbackTranscript = capturedLiveTranscript || '';
|
||||||
|
|
||||||
|
if (!fallbackTranscript && window.ParakeetOffline) {
|
||||||
|
try {
|
||||||
|
showStatusBar('Upload failed, transcribing offline...', 'loading');
|
||||||
|
fallbackTranscript = await window.ParakeetOffline.transcribeOffline(audioBlob, (p) => {
|
||||||
|
showParakeetProgress(p);
|
||||||
|
});
|
||||||
|
hideParakeetProgress();
|
||||||
|
} catch {
|
||||||
|
hideParakeetProgress();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
transcript = fallbackTranscript;
|
||||||
|
if (transcript) {
|
||||||
|
transcriptText.textContent = transcript;
|
||||||
|
}
|
||||||
|
|
||||||
showStatusBar(`Error: ${err.message}`, 'error');
|
showStatusBar(`Error: ${err.message}`, 'error');
|
||||||
state = 'done';
|
state = 'done';
|
||||||
setStatusLabel('Error', 'idle');
|
setStatusLabel('Error', 'idle');
|
||||||
|
|
@ -341,11 +533,14 @@ function resetState() {
|
||||||
audioChunks = [];
|
audioChunks = [];
|
||||||
audioBlob = null;
|
audioBlob = null;
|
||||||
transcript = '';
|
transcript = '';
|
||||||
|
liveTranscript = '';
|
||||||
uploadedFileUrl = '';
|
uploadedFileUrl = '';
|
||||||
uploadedMimeType = '';
|
uploadedMimeType = '';
|
||||||
uploadedFileSize = 0;
|
uploadedFileSize = 0;
|
||||||
duration = 0;
|
duration = 0;
|
||||||
|
|
||||||
|
stopLiveTranscription();
|
||||||
|
|
||||||
if (audioUrl) {
|
if (audioUrl) {
|
||||||
URL.revokeObjectURL(audioUrl);
|
URL.revokeObjectURL(audioUrl);
|
||||||
audioUrl = null;
|
audioUrl = null;
|
||||||
|
|
@ -358,6 +553,7 @@ function resetState() {
|
||||||
postActions.style.display = 'none';
|
postActions.style.display = 'none';
|
||||||
audioPreview.classList.remove('visible');
|
audioPreview.classList.remove('visible');
|
||||||
transcriptArea.classList.remove('visible');
|
transcriptArea.classList.remove('visible');
|
||||||
|
hideParakeetProgress();
|
||||||
statusBar.className = 'status-bar';
|
statusBar.className = 'status-bar';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue