jeffsi-meet/deploy/meeting-intelligence/transcriber/app/database.py

246 lines
8.5 KiB
Python

"""
Database operations for the Transcription Service.
"""
import uuid
from typing import Optional, List, Dict, Any
import asyncpg
import structlog
log = structlog.get_logger()
class Database:
"""Database operations for transcription service."""
def __init__(self, connection_string: str):
self.connection_string = connection_string
self.pool: Optional[asyncpg.Pool] = None
async def connect(self):
"""Establish database connection pool."""
log.info("Connecting to database...")
self.pool = await asyncpg.create_pool(
self.connection_string,
min_size=2,
max_size=10
)
log.info("Database connected")
async def disconnect(self):
"""Close database connection pool."""
if self.pool:
await self.pool.close()
log.info("Database disconnected")
async def health_check(self):
"""Check database connectivity."""
async with self.pool.acquire() as conn:
await conn.fetchval("SELECT 1")
async def create_transcription_job(
self,
meeting_id: str,
audio_path: Optional[str] = None,
video_path: Optional[str] = None,
enable_diarization: bool = True,
language: Optional[str] = None,
priority: int = 5
) -> str:
"""Create a new transcription job."""
job_id = str(uuid.uuid4())
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO processing_jobs (
id, meeting_id, job_type, status, priority,
result
)
VALUES ($1, $2::uuid, 'transcribe', 'pending', $3, $4)
""", job_id, meeting_id, priority, {
"audio_path": audio_path,
"video_path": video_path,
"enable_diarization": enable_diarization,
"language": language
})
log.info("Created transcription job", job_id=job_id, meeting_id=meeting_id)
return job_id
async def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
"""Get a job by ID."""
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT id, meeting_id, job_type, status, priority,
attempts, started_at, completed_at,
error_message, result, created_at
FROM processing_jobs
WHERE id = $1
""", job_id)
if row:
return dict(row)
return None
async def get_next_pending_job(self) -> Optional[Dict[str, Any]]:
"""Get the next pending job and mark it as processing."""
async with self.pool.acquire() as conn:
# Use FOR UPDATE SKIP LOCKED to prevent race conditions
row = await conn.fetchrow("""
UPDATE processing_jobs
SET status = 'processing',
started_at = NOW(),
attempts = attempts + 1
WHERE id = (
SELECT id FROM processing_jobs
WHERE status = 'pending'
AND job_type = 'transcribe'
ORDER BY priority ASC, created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT 1
)
RETURNING id, meeting_id, job_type, result
""")
if row:
result = dict(row)
# Merge result JSON into the dict
if result.get("result"):
result.update(result["result"])
return result
return None
async def update_job_status(
self,
job_id: str,
status: str,
error_message: Optional[str] = None,
result: Optional[dict] = None,
progress: Optional[float] = None
):
"""Update job status."""
async with self.pool.acquire() as conn:
if status == "completed":
await conn.execute("""
UPDATE processing_jobs
SET status = $1,
completed_at = NOW(),
error_message = $2,
result = COALESCE($3::jsonb, result)
WHERE id = $4
""", status, error_message, result, job_id)
else:
update_result = result
if progress is not None:
update_result = result or {}
update_result["progress"] = progress
await conn.execute("""
UPDATE processing_jobs
SET status = $1,
error_message = $2,
result = COALESCE($3::jsonb, result)
WHERE id = $4
""", status, error_message, update_result, job_id)
async def update_job_audio_path(self, job_id: str, audio_path: str):
"""Update the audio path for a job."""
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE processing_jobs
SET result = result || $1::jsonb
WHERE id = $2
""", {"audio_path": audio_path}, job_id)
async def update_meeting_status(self, meeting_id: str, status: str):
"""Update meeting processing status."""
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE meetings
SET status = $1,
updated_at = NOW()
WHERE id = $2::uuid
""", status, meeting_id)
async def insert_transcript_segment(
self,
meeting_id: str,
segment_index: int,
start_time: float,
end_time: float,
text: str,
speaker_id: Optional[str] = None,
speaker_label: Optional[str] = None,
confidence: Optional[float] = None,
language: str = "en"
):
"""Insert a transcript segment."""
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO transcripts (
meeting_id, segment_index, start_time, end_time,
text, speaker_id, speaker_label, confidence, language
)
VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9)
""", meeting_id, segment_index, start_time, end_time,
text, speaker_id, speaker_label, confidence, language)
async def get_transcript(self, meeting_id: str) -> List[Dict[str, Any]]:
"""Get all transcript segments for a meeting."""
async with self.pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, segment_index, start_time, end_time,
speaker_id, speaker_label, text, confidence, language
FROM transcripts
WHERE meeting_id = $1::uuid
ORDER BY segment_index ASC
""", meeting_id)
return [dict(row) for row in rows]
async def get_meeting(self, meeting_id: str) -> Optional[Dict[str, Any]]:
"""Get meeting details."""
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT id, conference_id, conference_name, title,
started_at, ended_at, duration_seconds,
recording_path, audio_path, status,
metadata, created_at
FROM meetings
WHERE id = $1::uuid
""", meeting_id)
if row:
return dict(row)
return None
async def create_meeting(
self,
conference_id: str,
conference_name: Optional[str] = None,
title: Optional[str] = None,
recording_path: Optional[str] = None,
metadata: Optional[dict] = None
) -> str:
"""Create a new meeting record."""
meeting_id = str(uuid.uuid4())
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO meetings (
id, conference_id, conference_name, title,
recording_path, status, metadata
)
VALUES ($1, $2, $3, $4, $5, 'recording', $6)
""", meeting_id, conference_id, conference_name, title,
recording_path, metadata or {})
log.info("Created meeting", meeting_id=meeting_id, conference_id=conference_id)
return meeting_id
class DatabaseError(Exception):
"""Database operation error."""
pass