fix(meeting-intelligence): fix transcription pipeline bugs

- Fix JSONB serialization in API database.py (json.dumps + ::jsonb casts)
- Fix integer vs UUID job ID handling in transcriber
- Fix UUID-to-string conversion for meeting_id in processor
- Add whisper.cpp shared libraries to Dockerfile (libwhisper, libggml)
- Fix Jibri finalize script log directory path
- Add graceful error handling for speaker diarization
- Support video_path parameter for automatic audio extraction

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-02-06 12:25:59 +00:00
parent 0e9c78c86a
commit dd12348da8
6 changed files with 138 additions and 145 deletions

View File

@ -2,6 +2,7 @@
Database operations for the Meeting Intelligence API. Database operations for the Meeting Intelligence API.
""" """
import json
import uuid import uuid
from datetime import datetime from datetime import datetime
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
@ -109,9 +110,9 @@ class Database:
id, conference_id, conference_name, title, id, conference_id, conference_name, title,
recording_path, started_at, status, metadata recording_path, started_at, status, metadata
) )
VALUES ($1, $2, $3, $4, $5, $6, 'recording', $7) VALUES ($1, $2, $3, $4, $5, $6, 'recording', $7::jsonb)
""", meeting_id, conference_id, conference_name, title, """, meeting_id, conference_id, conference_name, title,
recording_path, started_at or datetime.utcnow(), metadata or {}) recording_path, started_at or datetime.utcnow(), json.dumps(metadata or {}))
return meeting_id return meeting_id
@ -326,12 +327,13 @@ class Database:
payload: dict payload: dict
) -> int: ) -> int:
"""Save a webhook event for processing.""" """Save a webhook event for processing."""
import json
async with self.pool.acquire() as conn: async with self.pool.acquire() as conn:
row = await conn.fetchrow(""" row = await conn.fetchrow("""
INSERT INTO webhook_events (event_type, payload) INSERT INTO webhook_events (event_type, payload)
VALUES ($1, $2) VALUES ($1, $2::jsonb)
RETURNING id RETURNING id
""", event_type, payload) """, event_type, json.dumps(payload))
return row["id"] return row["id"]
@ -348,8 +350,8 @@ class Database:
async with self.pool.acquire() as conn: async with self.pool.acquire() as conn:
row = await conn.fetchrow(""" row = await conn.fetchrow("""
INSERT INTO processing_jobs (meeting_id, job_type, priority, result) INSERT INTO processing_jobs (meeting_id, job_type, priority, result)
VALUES ($1::uuid, $2, $3, $4) VALUES ($1::uuid, $2, $3, $4::jsonb)
RETURNING id RETURNING id
""", meeting_id, job_type, priority, result or {}) """, meeting_id, job_type, priority, json.dumps(result or {}))
return row["id"] return row["id"]

View File

@ -1,19 +1,12 @@
#!/bin/bash #!/bin/bash
# Jibri Recording Finalize Script # Jibri Recording Finalize Script
# Called when Jibri finishes recording a meeting
#
# Arguments:
# $1 - Recording directory path (e.g., /recordings/<conference_id>/<timestamp>)
#
# This script:
# 1. Finds the recording file
# 2. Notifies the Meeting Intelligence API to start processing
set -e set -e
RECORDING_DIR="$1" RECORDING_DIR="$1"
API_URL="${MEETING_INTELLIGENCE_API:-http://api:8000}" API_URL="${MEETING_INTELLIGENCE_API:-http://api:8000}"
LOG_FILE="/var/log/jibri/finalize.log" LOG_FILE="/config/logs/finalize.log"
mkdir -p /config/logs
log() { log() {
echo "[$(date -Iseconds)] $1" >> "$LOG_FILE" echo "[$(date -Iseconds)] $1" >> "$LOG_FILE"
@ -23,13 +16,11 @@ log() {
log "=== Finalize script started ===" log "=== Finalize script started ==="
log "Recording directory: $RECORDING_DIR" log "Recording directory: $RECORDING_DIR"
# Validate recording directory
if [ -z "$RECORDING_DIR" ] || [ ! -d "$RECORDING_DIR" ]; then if [ -z "$RECORDING_DIR" ] || [ ! -d "$RECORDING_DIR" ]; then
log "ERROR: Invalid recording directory: $RECORDING_DIR" log "ERROR: Invalid recording directory: $RECORDING_DIR"
exit 1 exit 1
fi fi
# Find the recording file (MP4 or WebM)
RECORDING_FILE=$(find "$RECORDING_DIR" -type f \( -name "*.mp4" -o -name "*.webm" \) | head -1) RECORDING_FILE=$(find "$RECORDING_DIR" -type f \( -name "*.mp4" -o -name "*.webm" \) | head -1)
if [ -z "$RECORDING_FILE" ]; then if [ -z "$RECORDING_FILE" ]; then
@ -39,66 +30,26 @@ fi
log "Found recording file: $RECORDING_FILE" log "Found recording file: $RECORDING_FILE"
# Get file info
FILE_SIZE=$(stat -c%s "$RECORDING_FILE" 2>/dev/null || echo "0") FILE_SIZE=$(stat -c%s "$RECORDING_FILE" 2>/dev/null || echo "0")
log "Recording file size: $FILE_SIZE bytes" log "Recording file size: $FILE_SIZE bytes"
# Extract conference info from path CONFERENCE_ID=$(basename "$RECORDING_DIR")
# Expected format: /recordings/<conference_id>/<timestamp>/recording.mp4
CONFERENCE_ID=$(echo "$RECORDING_DIR" | awk -F'/' '{print $(NF-1)}')
if [ -z "$CONFERENCE_ID" ]; then
CONFERENCE_ID=$(basename "$(dirname "$RECORDING_DIR")")
fi
# Look for metadata file (Jibri sometimes creates this)
METADATA_FILE="$RECORDING_DIR/metadata.json" METADATA_FILE="$RECORDING_DIR/metadata.json"
if [ -f "$METADATA_FILE" ]; then if [ -f "$METADATA_FILE" ]; then
log "Found metadata file: $METADATA_FILE"
METADATA=$(cat "$METADATA_FILE") METADATA=$(cat "$METADATA_FILE")
else else
METADATA="{}" METADATA="{}"
fi fi
# Prepare webhook payload PAYLOAD="{\"event_type\":\"recording_completed\",\"conference_id\":\"$CONFERENCE_ID\",\"recording_path\":\"$RECORDING_FILE\",\"file_size_bytes\":$FILE_SIZE,\"metadata\":$METADATA}"
PAYLOAD=$(cat <<EOF
{
"event_type": "recording_completed",
"conference_id": "$CONFERENCE_ID",
"recording_path": "$RECORDING_FILE",
"recording_dir": "$RECORDING_DIR",
"file_size_bytes": $FILE_SIZE,
"completed_at": "$(date -Iseconds)",
"metadata": $METADATA
}
EOF
)
log "Sending webhook to $API_URL/webhooks/recording-complete" log "Sending webhook to $API_URL/webhooks/recording-complete"
log "Payload: $PAYLOAD"
# Send webhook to Meeting Intelligence API
RESPONSE=$(curl -s -w "\n%{http_code}" \
-X POST \
-H "Content-Type: application/json" \
-d "$PAYLOAD" \
"$API_URL/webhooks/recording-complete" 2>&1)
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST -H "Content-Type: application/json" -d "$PAYLOAD" "$API_URL/webhooks/recording-complete" 2>&1)
HTTP_CODE=$(echo "$RESPONSE" | tail -1) HTTP_CODE=$(echo "$RESPONSE" | tail -1)
BODY=$(echo "$RESPONSE" | head -n -1) BODY=$(echo "$RESPONSE" | head -n -1)
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "202" ]; then log "Response: HTTP $HTTP_CODE - $BODY"
log "SUCCESS: Webhook accepted (HTTP $HTTP_CODE)"
log "Response: $BODY"
else
log "WARNING: Webhook returned HTTP $HTTP_CODE"
log "Response: $BODY"
# Don't fail the script - the recording is still saved
# The API can be retried later
fi
# Optional: Clean up old recordings (keep last 30 days)
# find /recordings -type f -mtime +30 -delete
log "=== Finalize script completed ===" log "=== Finalize script completed ==="
exit 0 exit 0

View File

@ -5,44 +5,30 @@
FROM python:3.11-slim AS builder FROM python:3.11-slim AS builder
# Install build dependencies # Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends build-essential cmake git ffmpeg wget && rm -rf /var/lib/apt/lists/*
build-essential \
cmake \
git \
ffmpeg \
wget \
&& rm -rf /var/lib/apt/lists/*
# Build whisper.cpp # Build whisper.cpp
WORKDIR /build WORKDIR /build
RUN git clone https://github.com/ggerganov/whisper.cpp.git && \ RUN git clone https://github.com/ggerganov/whisper.cpp.git && cd whisper.cpp && cmake -B build -DWHISPER_BUILD_EXAMPLES=ON -DBUILD_SHARED_LIBS=ON && cmake --build build --config Release -j$(nproc) && cp build/bin/whisper-cli /usr/local/bin/whisper && cp build/bin/whisper-server /usr/local/bin/whisper-server 2>/dev/null || true && mkdir -p /usr/local/lib/whisper && find build -name '*.so*' -exec cp {} /usr/local/lib/whisper/ \; && ls -la /usr/local/lib/whisper/
cd whisper.cpp && \
cmake -B build -DWHISPER_BUILD_EXAMPLES=ON && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/whisper-cli /usr/local/bin/whisper && \
cp build/bin/whisper-server /usr/local/bin/whisper-server 2>/dev/null || true
# Download whisper models # Download whisper models
WORKDIR /models WORKDIR /models
RUN cd /build/whisper.cpp && \ RUN cd /build/whisper.cpp && bash models/download-ggml-model.sh small && mv models/ggml-small.bin /models/
bash models/download-ggml-model.sh small && \
mv models/ggml-small.bin /models/
# Production image # Production image
FROM python:3.11-slim FROM python:3.11-slim
# Install runtime dependencies and build tools (for compiling Python packages) # Install runtime dependencies and build tools (for compiling Python packages)
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg libsndfile1 curl build-essential && rm -rf /var/lib/apt/lists/*
ffmpeg \
libsndfile1 \
curl \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Copy whisper binary and models # Copy whisper binary, libraries, and models
COPY --from=builder /usr/local/bin/whisper /usr/local/bin/whisper COPY --from=builder /usr/local/bin/whisper /usr/local/bin/whisper
COPY --from=builder /usr/local/lib/whisper/ /usr/local/lib/
COPY --from=builder /models /models COPY --from=builder /models /models
# Update shared library cache
RUN ldconfig && /usr/local/bin/whisper --help || echo "Whisper help check failed"
# Set up Python environment # Set up Python environment
WORKDIR /app WORKDIR /app
@ -65,8 +51,7 @@ ENV WHISPER_MODEL=/models/ggml-small.bin
ENV WHISPER_THREADS=8 ENV WHISPER_THREADS=8
# Health check # Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 CMD curl -f http://localhost:8001/health || exit 1
CMD curl -f http://localhost:8001/health || exit 1
# Run the service # Run the service
EXPOSE 8001 EXPOSE 8001

View File

@ -2,6 +2,7 @@
Database operations for the Transcription Service. Database operations for the Transcription Service.
""" """
import json
import uuid import uuid
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
@ -47,28 +48,28 @@ class Database:
enable_diarization: bool = True, enable_diarization: bool = True,
language: Optional[str] = None, language: Optional[str] = None,
priority: int = 5 priority: int = 5
) -> str: ) -> int:
"""Create a new transcription job.""" """Create a new transcription job. Returns the auto-generated job ID."""
job_id = str(uuid.uuid4()) result_data = {
"audio_path": audio_path,
"video_path": video_path,
"enable_diarization": enable_diarization,
"language": language
}
async with self.pool.acquire() as conn: async with self.pool.acquire() as conn:
await conn.execute(""" job_id = await conn.fetchval("""
INSERT INTO processing_jobs ( INSERT INTO processing_jobs (
id, meeting_id, job_type, status, priority, meeting_id, job_type, status, priority, result
result
) )
VALUES ($1, $2::uuid, 'transcribe', 'pending', $3, $4) VALUES ($1::uuid, 'transcribe', 'pending', $2, $3::jsonb)
""", job_id, meeting_id, priority, { RETURNING id
"audio_path": audio_path, """, meeting_id, priority, json.dumps(result_data))
"video_path": video_path,
"enable_diarization": enable_diarization,
"language": language
})
log.info("Created transcription job", job_id=job_id, meeting_id=meeting_id) log.info("Created transcription job", job_id=job_id, meeting_id=meeting_id)
return job_id return job_id
async def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: async def get_job(self, job_id: int) -> Optional[Dict[str, Any]]:
"""Get a job by ID.""" """Get a job by ID."""
async with self.pool.acquire() as conn: async with self.pool.acquire() as conn:
row = await conn.fetchrow(""" row = await conn.fetchrow("""
@ -107,19 +108,30 @@ class Database:
result = dict(row) result = dict(row)
# Merge result JSON into the dict # Merge result JSON into the dict
if result.get("result"): if result.get("result"):
result.update(result["result"]) if isinstance(result["result"], dict):
result.update(result["result"])
elif isinstance(result["result"], str):
result.update(json.loads(result["result"]))
return result return result
return None return None
async def update_job_status( async def update_job_status(
self, self,
job_id: str, job_id: int,
status: str, status: str,
error_message: Optional[str] = None, error_message: Optional[str] = None,
result: Optional[dict] = None, result: Optional[dict] = None,
progress: Optional[float] = None progress: Optional[float] = None
): ):
"""Update job status.""" """Update job status."""
result_json = None
if result is not None:
if progress is not None:
result["progress"] = progress
result_json = json.dumps(result)
elif progress is not None:
result_json = json.dumps({"progress": progress})
async with self.pool.acquire() as conn: async with self.pool.acquire() as conn:
if status == "completed": if status == "completed":
await conn.execute(""" await conn.execute("""
@ -129,29 +141,24 @@ class Database:
error_message = $2, error_message = $2,
result = COALESCE($3::jsonb, result) result = COALESCE($3::jsonb, result)
WHERE id = $4 WHERE id = $4
""", status, error_message, result, job_id) """, status, error_message, result_json, job_id)
else: else:
update_result = result
if progress is not None:
update_result = result or {}
update_result["progress"] = progress
await conn.execute(""" await conn.execute("""
UPDATE processing_jobs UPDATE processing_jobs
SET status = $1, SET status = $1,
error_message = $2, error_message = $2,
result = COALESCE($3::jsonb, result) result = COALESCE($3::jsonb, result)
WHERE id = $4 WHERE id = $4
""", status, error_message, update_result, job_id) """, status, error_message, result_json, job_id)
async def update_job_audio_path(self, job_id: str, audio_path: str): async def update_job_audio_path(self, job_id: int, audio_path: str):
"""Update the audio path for a job.""" """Update the audio path for a job."""
async with self.pool.acquire() as conn: async with self.pool.acquire() as conn:
await conn.execute(""" await conn.execute("""
UPDATE processing_jobs UPDATE processing_jobs
SET result = result || $1::jsonb SET result = result || $1::jsonb
WHERE id = $2 WHERE id = $2
""", {"audio_path": audio_path}, job_id) """, json.dumps({"audio_path": audio_path}), job_id)
async def update_meeting_status(self, meeting_id: str, status: str): async def update_meeting_status(self, meeting_id: str, status: str):
"""Update meeting processing status.""" """Update meeting processing status."""
@ -232,9 +239,9 @@ class Database:
id, conference_id, conference_name, title, id, conference_id, conference_name, title,
recording_path, status, metadata recording_path, status, metadata
) )
VALUES ($1, $2, $3, $4, $5, 'recording', $6) VALUES ($1::uuid, $2, $3, $4, $5, 'recording', $6::jsonb)
""", meeting_id, conference_id, conference_name, title, """, meeting_id, conference_id, conference_name, title,
recording_path, metadata or {}) recording_path, json.dumps(metadata or {}))
log.info("Created meeting", meeting_id=meeting_id, conference_id=conference_id) log.info("Created meeting", meeting_id=meeting_id, conference_id=conference_id)
return meeting_id return meeting_id

View File

@ -11,7 +11,7 @@ FastAPI service that handles:
import asyncio import asyncio
import os import os
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from typing import Optional from typing import Optional, Union
from fastapi import FastAPI, BackgroundTasks, HTTPException from fastapi import FastAPI, BackgroundTasks, HTTPException
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
@ -33,20 +33,21 @@ log = structlog.get_logger()
# Pydantic models # Pydantic models
class TranscribeRequest(BaseModel): class TranscribeRequest(BaseModel):
meeting_id: str meeting_id: str
audio_path: str audio_path: Optional[str] = None
video_path: Optional[str] = None # If provided, will extract audio first
priority: int = 5 priority: int = 5
enable_diarization: bool = True enable_diarization: bool = True
language: Optional[str] = None language: Optional[str] = None
class TranscribeResponse(BaseModel): class TranscribeResponse(BaseModel):
job_id: str job_id: int # Integer from database auto-increment
status: str status: str
message: str message: str
class JobStatus(BaseModel): class JobStatus(BaseModel):
job_id: str job_id: int
status: str status: str
progress: Optional[float] = None progress: Optional[float] = None
result: Optional[dict] = None result: Optional[dict] = None
@ -172,24 +173,64 @@ async def service_status():
@app.post("/transcribe", response_model=TranscribeResponse) @app.post("/transcribe", response_model=TranscribeResponse)
async def queue_transcription(request: TranscribeRequest, background_tasks: BackgroundTasks): async def queue_transcription(request: TranscribeRequest, background_tasks: BackgroundTasks):
"""Queue a transcription job.""" """Queue a transcription job."""
audio_path = request.audio_path
# If video_path provided, extract audio first
if request.video_path and not audio_path:
log.info(
"Extracting audio from video",
meeting_id=request.meeting_id,
video_path=request.video_path
)
if not os.path.exists(request.video_path):
raise HTTPException(
status_code=404,
detail=f"Video file not found: {request.video_path}"
)
# Extract audio using ffmpeg
import subprocess
audio_dir = os.environ.get("AUDIO_OUTPUT_DIR", "/audio")
os.makedirs(audio_dir, exist_ok=True)
audio_path = os.path.join(audio_dir, f"{request.meeting_id}.wav")
try:
result = subprocess.run([
"ffmpeg", "-y", "-i", request.video_path,
"-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1",
audio_path
], capture_output=True, text=True, timeout=300)
if result.returncode != 0:
log.error("FFmpeg error", stderr=result.stderr)
raise HTTPException(status_code=500, detail=f"Audio extraction failed: {result.stderr}")
log.info("Audio extracted", audio_path=audio_path)
except subprocess.TimeoutExpired:
raise HTTPException(status_code=500, detail="Audio extraction timed out")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Audio extraction failed: {str(e)}")
log.info( log.info(
"Received transcription request", "Received transcription request",
meeting_id=request.meeting_id, meeting_id=request.meeting_id,
audio_path=request.audio_path audio_path=audio_path
) )
# Validate audio file exists # Validate audio file exists
if not os.path.exists(request.audio_path): if not audio_path or not os.path.exists(audio_path):
raise HTTPException( raise HTTPException(
status_code=404, status_code=404,
detail=f"Audio file not found: {request.audio_path}" detail=f"Audio file not found: {audio_path}"
) )
# Create job record in database # Create job record in database - use the extracted audio_path
try: try:
job_id = await state.db.create_transcription_job( job_id = await state.db.create_transcription_job(
meeting_id=request.meeting_id, meeting_id=request.meeting_id,
audio_path=request.audio_path, audio_path=audio_path, # Use extracted audio_path, not request.audio_path
video_path=request.video_path,
enable_diarization=request.enable_diarization, enable_diarization=request.enable_diarization,
language=request.language, language=request.language,
priority=request.priority priority=request.priority
@ -216,7 +257,7 @@ async def queue_transcription(request: TranscribeRequest, background_tasks: Back
@app.get("/transcribe/{job_id}", response_model=JobStatus) @app.get("/transcribe/{job_id}", response_model=JobStatus)
async def get_job_status(job_id: str): async def get_job_status(job_id: int):
"""Get the status of a transcription job.""" """Get the status of a transcription job."""
job = await state.db.get_job(job_id) job = await state.db.get_job(job_id)
@ -233,7 +274,7 @@ async def get_job_status(job_id: str):
@app.delete("/transcribe/{job_id}") @app.delete("/transcribe/{job_id}")
async def cancel_job(job_id: str): async def cancel_job(job_id: int):
"""Cancel a pending transcription job.""" """Cancel a pending transcription job."""
job = await state.db.get_job(job_id) job = await state.db.get_job(job_id)

View File

@ -4,7 +4,7 @@ Job Processor for the Transcription Service.
Handles the processing pipeline: Handles the processing pipeline:
1. Audio extraction from video 1. Audio extraction from video
2. Transcription 2. Transcription
3. Speaker diarization 3. Speaker diarization (optional, fails gracefully)
4. Database storage 4. Database storage
""" """
@ -76,7 +76,7 @@ class JobProcessor:
continue continue
job_id = job["id"] job_id = job["id"]
meeting_id = job["meeting_id"] meeting_id = str(job["meeting_id"]) # Convert UUID to string
log.info( log.info(
f"Worker {worker_id} processing job", f"Worker {worker_id} processing job",
@ -113,7 +113,7 @@ class JobProcessor:
async def _process_job(self, job: dict): async def _process_job(self, job: dict):
"""Process a single transcription job.""" """Process a single transcription job."""
job_id = job["id"] job_id = job["id"]
meeting_id = job["meeting_id"] meeting_id = str(job["meeting_id"]) # Ensure string
audio_path = job.get("audio_path") audio_path = job.get("audio_path")
video_path = job.get("video_path") video_path = job.get("video_path")
enable_diarization = job.get("enable_diarization", True) enable_diarization = job.get("enable_diarization", True)
@ -149,32 +149,39 @@ class JobProcessor:
duration=transcription.duration duration=transcription.duration
) )
# Step 3: Speaker diarization # Step 3: Speaker diarization (optional, fails gracefully)
speaker_segments = [] speaker_segments = []
if enable_diarization and len(transcription.segments) > 0: if enable_diarization and len(transcription.segments) > 0:
log.info("Starting speaker diarization") log.info("Starting speaker diarization")
await self.db.update_job_status(job_id, "processing", progress=0.6) await self.db.update_job_status(job_id, "processing", progress=0.6)
await self.db.update_meeting_status(meeting_id, "diarizing") await self.db.update_meeting_status(meeting_id, "diarizing")
# Convert transcript segments to dicts for diarizer try:
transcript_dicts = [ # Convert transcript segments to dicts for diarizer
{"start": s.start, "end": s.end, "text": s.text} transcript_dicts = [
for s in transcription.segments {"start": s.start, "end": s.end, "text": s.text}
] for s in transcription.segments
]
speaker_segments = await asyncio.get_event_loop().run_in_executor( speaker_segments = await asyncio.get_event_loop().run_in_executor(
None, None,
lambda: self.diarizer.diarize( lambda: self.diarizer.diarize(
audio_path, audio_path,
transcript_segments=transcript_dicts transcript_segments=transcript_dicts
)
) )
)
log.info( log.info(
"Diarization complete", "Diarization complete",
num_segments=len(speaker_segments), num_segments=len(speaker_segments),
num_speakers=len(set(s.speaker_id for s in speaker_segments)) num_speakers=len(set(s.speaker_id for s in speaker_segments))
) )
except Exception as e:
log.warning(
"Diarization failed, continuing without speaker labels",
error=str(e)
)
speaker_segments = []
# Step 4: Store results # Step 4: Store results
log.info("Storing transcript in database") log.info("Storing transcript in database")