jeffsi-meet/deploy/meeting-intelligence/api/app/database.py

358 lines
13 KiB
Python

"""
Database operations for the Meeting Intelligence API.
"""
import json
import uuid
from datetime import datetime
from typing import Optional, List, Dict, Any
import asyncpg
import structlog
log = structlog.get_logger()
class Database:
"""Database operations for Meeting Intelligence API."""
def __init__(self, connection_string: str):
self.connection_string = connection_string
self.pool: Optional[asyncpg.Pool] = None
async def connect(self):
"""Establish database connection pool."""
log.info("Connecting to database...")
self.pool = await asyncpg.create_pool(
self.connection_string,
min_size=2,
max_size=20
)
log.info("Database connected")
async def disconnect(self):
"""Close database connection pool."""
if self.pool:
await self.pool.close()
log.info("Database disconnected")
async def health_check(self):
"""Check database connectivity."""
async with self.pool.acquire() as conn:
await conn.fetchval("SELECT 1")
# ==================== Meetings ====================
async def list_meetings(
self,
limit: int = 50,
offset: int = 0,
status: Optional[str] = None
) -> List[Dict[str, Any]]:
"""List meetings with pagination."""
async with self.pool.acquire() as conn:
if status:
rows = await conn.fetch("""
SELECT id, conference_id, conference_name, title,
started_at, ended_at, duration_seconds,
status, created_at
FROM meetings
WHERE status = $1
ORDER BY created_at DESC
LIMIT $2 OFFSET $3
""", status, limit, offset)
else:
rows = await conn.fetch("""
SELECT id, conference_id, conference_name, title,
started_at, ended_at, duration_seconds,
status, created_at
FROM meetings
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
""", limit, offset)
return [dict(row) for row in rows]
async def get_meeting(self, meeting_id: str) -> Optional[Dict[str, Any]]:
"""Get meeting details."""
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT m.id, m.conference_id, m.conference_name, m.title,
m.started_at, m.ended_at, m.duration_seconds,
m.recording_path, m.audio_path, m.status,
m.metadata, m.created_at,
(SELECT COUNT(*) FROM transcripts WHERE meeting_id = m.id) as segment_count,
(SELECT COUNT(*) FROM meeting_participants WHERE meeting_id = m.id) as participant_count,
(SELECT id FROM summaries WHERE meeting_id = m.id LIMIT 1) as summary_id
FROM meetings m
WHERE m.id = $1::uuid
""", meeting_id)
if row:
return dict(row)
return None
async def create_meeting(
self,
conference_id: str,
conference_name: Optional[str] = None,
title: Optional[str] = None,
recording_path: Optional[str] = None,
started_at: Optional[datetime] = None,
metadata: Optional[dict] = None
) -> str:
"""Create a new meeting record."""
meeting_id = str(uuid.uuid4())
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO meetings (
id, conference_id, conference_name, title,
recording_path, started_at, status, metadata
)
VALUES ($1, $2, $3, $4, $5, $6, 'recording', $7::jsonb)
""", meeting_id, conference_id, conference_name, title,
recording_path, started_at or datetime.utcnow(), json.dumps(metadata or {}))
return meeting_id
async def update_meeting(
self,
meeting_id: str,
**kwargs
):
"""Update meeting fields."""
if not kwargs:
return
set_clauses = []
values = []
i = 1
for key, value in kwargs.items():
if key in ['status', 'title', 'ended_at', 'duration_seconds',
'recording_path', 'audio_path', 'error_message']:
set_clauses.append(f"{key} = ${i}")
values.append(value)
i += 1
if not set_clauses:
return
values.append(meeting_id)
async with self.pool.acquire() as conn:
await conn.execute(f"""
UPDATE meetings
SET {', '.join(set_clauses)}, updated_at = NOW()
WHERE id = ${i}::uuid
""", *values)
# ==================== Transcripts ====================
async def get_transcript(
self,
meeting_id: str,
speaker_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
"""Get transcript segments for a meeting."""
async with self.pool.acquire() as conn:
if speaker_filter:
rows = await conn.fetch("""
SELECT id, segment_index, start_time, end_time,
speaker_id, speaker_name, speaker_label,
text, confidence, language
FROM transcripts
WHERE meeting_id = $1::uuid AND speaker_id = $2
ORDER BY segment_index ASC
""", meeting_id, speaker_filter)
else:
rows = await conn.fetch("""
SELECT id, segment_index, start_time, end_time,
speaker_id, speaker_name, speaker_label,
text, confidence, language
FROM transcripts
WHERE meeting_id = $1::uuid
ORDER BY segment_index ASC
""", meeting_id)
return [dict(row) for row in rows]
async def get_speakers(self, meeting_id: str) -> List[Dict[str, Any]]:
"""Get speaker statistics for a meeting."""
async with self.pool.acquire() as conn:
rows = await conn.fetch("""
SELECT speaker_id, speaker_label,
COUNT(*) as segment_count,
SUM(end_time - start_time) as speaking_time,
SUM(LENGTH(text)) as character_count
FROM transcripts
WHERE meeting_id = $1::uuid AND speaker_id IS NOT NULL
GROUP BY speaker_id, speaker_label
ORDER BY speaking_time DESC
""", meeting_id)
return [dict(row) for row in rows]
# ==================== Summaries ====================
async def get_summary(self, meeting_id: str) -> Optional[Dict[str, Any]]:
"""Get AI summary for a meeting."""
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
SELECT id, meeting_id, summary_text, key_points,
action_items, decisions, topics, sentiment,
model_used, generated_at
FROM summaries
WHERE meeting_id = $1::uuid
ORDER BY generated_at DESC
LIMIT 1
""", meeting_id)
if row:
return dict(row)
return None
async def save_summary(
self,
meeting_id: str,
summary_text: str,
key_points: List[str],
action_items: List[dict],
decisions: List[str],
topics: List[dict],
sentiment: str,
model_used: str,
prompt_tokens: int = 0,
completion_tokens: int = 0
) -> int:
"""Save AI-generated summary."""
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
INSERT INTO summaries (
meeting_id, summary_text, key_points, action_items,
decisions, topics, sentiment, model_used,
prompt_tokens, completion_tokens
)
VALUES ($1::uuid, $2, $3, $4, $5, $6, $7, $8, $9, $10)
RETURNING id
""", meeting_id, summary_text, key_points, action_items,
decisions, topics, sentiment, model_used,
prompt_tokens, completion_tokens)
return row["id"]
# ==================== Search ====================
async def fulltext_search(
self,
query: str,
meeting_id: Optional[str] = None,
limit: int = 50
) -> List[Dict[str, Any]]:
"""Full-text search across transcripts."""
async with self.pool.acquire() as conn:
if meeting_id:
rows = await conn.fetch("""
SELECT t.id, t.meeting_id, t.start_time, t.end_time,
t.speaker_label, t.text, m.title as meeting_title,
ts_rank(to_tsvector('english', t.text),
plainto_tsquery('english', $1)) as rank
FROM transcripts t
JOIN meetings m ON t.meeting_id = m.id
WHERE t.meeting_id = $2::uuid
AND to_tsvector('english', t.text) @@ plainto_tsquery('english', $1)
ORDER BY rank DESC
LIMIT $3
""", query, meeting_id, limit)
else:
rows = await conn.fetch("""
SELECT t.id, t.meeting_id, t.start_time, t.end_time,
t.speaker_label, t.text, m.title as meeting_title,
ts_rank(to_tsvector('english', t.text),
plainto_tsquery('english', $1)) as rank
FROM transcripts t
JOIN meetings m ON t.meeting_id = m.id
WHERE to_tsvector('english', t.text) @@ plainto_tsquery('english', $1)
ORDER BY rank DESC
LIMIT $2
""", query, limit)
return [dict(row) for row in rows]
async def semantic_search(
self,
embedding: List[float],
meeting_id: Optional[str] = None,
threshold: float = 0.7,
limit: int = 20
) -> List[Dict[str, Any]]:
"""Semantic search using vector embeddings."""
async with self.pool.acquire() as conn:
embedding_str = f"[{','.join(map(str, embedding))}]"
if meeting_id:
rows = await conn.fetch("""
SELECT te.transcript_id, te.meeting_id, te.chunk_text,
t.start_time, t.speaker_label, m.title as meeting_title,
1 - (te.embedding <=> $1::vector) as similarity
FROM transcript_embeddings te
JOIN transcripts t ON te.transcript_id = t.id
JOIN meetings m ON te.meeting_id = m.id
WHERE te.meeting_id = $2::uuid
AND 1 - (te.embedding <=> $1::vector) > $3
ORDER BY te.embedding <=> $1::vector
LIMIT $4
""", embedding_str, meeting_id, threshold, limit)
else:
rows = await conn.fetch("""
SELECT te.transcript_id, te.meeting_id, te.chunk_text,
t.start_time, t.speaker_label, m.title as meeting_title,
1 - (te.embedding <=> $1::vector) as similarity
FROM transcript_embeddings te
JOIN transcripts t ON te.transcript_id = t.id
JOIN meetings m ON te.meeting_id = m.id
WHERE 1 - (te.embedding <=> $1::vector) > $2
ORDER BY te.embedding <=> $1::vector
LIMIT $3
""", embedding_str, threshold, limit)
return [dict(row) for row in rows]
# ==================== Webhooks ====================
async def save_webhook_event(
self,
event_type: str,
payload: dict
) -> int:
"""Save a webhook event for processing."""
import json
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
INSERT INTO webhook_events (event_type, payload)
VALUES ($1, $2::jsonb)
RETURNING id
""", event_type, json.dumps(payload))
return row["id"]
# ==================== Jobs ====================
async def create_job(
self,
meeting_id: str,
job_type: str,
priority: int = 5,
result: Optional[dict] = None
) -> int:
"""Create a processing job."""
async with self.pool.acquire() as conn:
row = await conn.fetchrow("""
INSERT INTO processing_jobs (meeting_id, job_type, priority, result)
VALUES ($1::uuid, $2, $3, $4::jsonb)
RETURNING id
""", meeting_id, job_type, priority, json.dumps(result or {}))
return row["id"]