"""Pipeline orchestration tasks for ARQ worker.""" import json import logging import os import uuid import redis.asyncio as aioredis from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker from app.config import settings from app.models import Job, Clip from app.services import download, transcription, ai_analysis, clip_extraction logger = logging.getLogger(__name__) async def _get_session() -> AsyncSession: engine = create_async_engine(settings.database_url, echo=False) session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) return session_factory() async def _publish_progress( redis: aioredis.Redis, job_id: str, status: str, progress: float, stage_message: str, ): """Publish progress update via Redis pub/sub.""" data = { "status": status, "progress": round(progress, 2), "stage_message": stage_message, } await redis.publish(f"job:{job_id}:progress", json.dumps(data)) async def _update_job( db: AsyncSession, job: Job, status: str, progress: float, stage_message: str, **kwargs, ): """Update job in database.""" job.status = status job.progress = progress job.stage_message = stage_message for k, v in kwargs.items(): setattr(job, k, v) await db.commit() async def process_job(ctx: dict, job_id: str): """Main pipeline: download → transcribe → AI analysis → extract clips.""" r = ctx.get("redis") or aioredis.from_url(settings.redis_url) db = await _get_session() try: job = await db.get(Job, uuid.UUID(job_id)) if not job: logger.error(f"Job {job_id} not found") return logger.info(f"Processing job {job_id}: {job.source_type}") # === STAGE 1: DOWNLOAD === await _update_job(db, job, "downloading", 0.05, "Downloading video...") await _publish_progress(r, job_id, "downloading", 0.05, "Downloading video...") job_media_dir = os.path.join(settings.media_dir, job_id) os.makedirs(job_media_dir, exist_ok=True) if job.source_type == "youtube": try: video_info = await download.download_video(job.source_url, job_media_dir) except Exception as dl_err: if "Sign in to confirm" in str(dl_err) or "bot" in str(dl_err): raise ValueError( "YouTube blocked this download (bot detection). " "Try uploading the video file directly instead." ) raise job.title = video_info.title job.duration = video_info.duration job.media_path = video_info.video_path elif job.media_path: # Uploaded file - get duration duration = await clip_extraction.get_video_duration(job.media_path) job.duration = duration if not job.title: job.title = job.source_filename or "Uploaded Video" else: raise ValueError("No video source available") await db.commit() await _publish_progress( r, job_id, "downloading", 0.20, f"Downloaded: {job.title} ({job.duration:.0f}s)" ) # === STAGE 2: TRANSCRIBE === await _update_job( db, job, "transcribing", 0.25, "Extracting audio and transcribing..." ) await _publish_progress( r, job_id, "transcribing", 0.25, "Extracting audio and transcribing..." ) # Extract audio for transcription audio_path = os.path.join(job_media_dir, "audio.mp3") await download.extract_audio(job.media_path, audio_path) await _publish_progress( r, job_id, "transcribing", 0.30, "Transcribing with Whisper..." ) transcript = await transcription.transcribe(audio_path) job.transcript = transcript await db.commit() word_count = len(transcript.get("words", [])) await _publish_progress( r, job_id, "transcribing", 0.50, f"Transcription complete: {word_count} words" ) # === STAGE 3: AI ANALYSIS === await _update_job( db, job, "analyzing", 0.55, "AI analyzing transcript for viral clips..." ) await _publish_progress( r, job_id, "analyzing", 0.55, "AI analyzing transcript for viral clips..." ) clips_data = await ai_analysis.analyze_transcript( transcript=transcript, video_title=job.title or "", video_duration=job.duration or 0, ) if not clips_data: raise ValueError("AI analysis returned no clips") await _publish_progress( r, job_id, "analyzing", 0.70, f"Found {len(clips_data)} potential clips" ) # === STAGE 4: EXTRACT CLIPS === await _update_job( db, job, "extracting", 0.75, f"Extracting {len(clips_data)} clips..." ) await _publish_progress( r, job_id, "extracting", 0.75, f"Extracting {len(clips_data)} clips..." ) clips_dir = os.path.join(settings.clips_dir, job_id) os.makedirs(clips_dir, exist_ok=True) for i, cd in enumerate(clips_data): clip_filename = f"clip_{i:02d}.mp4" clip_path = os.path.join(clips_dir, clip_filename) thumb_path = os.path.join(clips_dir, f"thumb_{i:02d}.jpg") # Extract the clip await clip_extraction.extract_clip( video_path=job.media_path, start_time=cd["start_time"], end_time=cd["end_time"], output_path=clip_path, ) # Extract thumbnail at 25% into the clip thumb_time = cd["start_time"] + (cd["end_time"] - cd["start_time"]) * 0.25 try: await clip_extraction.extract_thumbnail( video_path=job.media_path, timestamp=thumb_time, output_path=thumb_path, ) except Exception: thumb_path = None # Get transcript segment for this clip segment_text = transcription.get_transcript_segment( transcript.get("words", []), cd["start_time"], cd["end_time"], ) # Save clip to database clip = Clip( job_id=job.id, title=cd["title"], start_time=cd["start_time"], end_time=cd["end_time"], virality_score=cd["virality_score"], category=cd["category"], reasoning=cd["reasoning"], transcript_segment=segment_text, thumbnail_path=thumb_path, raw_clip_path=clip_path, ) db.add(clip) progress = 0.75 + (0.20 * (i + 1) / len(clips_data)) await _publish_progress( r, job_id, "extracting", progress, f"Extracted clip {i + 1}/{len(clips_data)}: {cd['title']}" ) await db.commit() # === COMPLETE === await _update_job( db, job, "complete", 1.0, f"Done! {len(clips_data)} clips extracted" ) await _publish_progress( r, job_id, "complete", 1.0, f"Done! {len(clips_data)} clips extracted" ) # Clean up audio file if os.path.exists(audio_path): os.remove(audio_path) logger.info(f"Job {job_id} complete: {len(clips_data)} clips") except Exception as e: logger.exception(f"Job {job_id} failed: {e}") try: await _update_job( db, job, "failed", job.progress, str(e), error_message=str(e), ) await _publish_progress( r, job_id, "failed", job.progress, f"Error: {e}" ) except Exception: pass finally: await db.close() async def render_clip(ctx: dict, render_id: str): """Render a clip with subtitles and aspect ratio conversion.""" from app.models import RenderRequest, Job from app.services.subtitle_render import generate_ass, render_with_subtitles db = await _get_session() try: render = await db.get(RenderRequest, uuid.UUID(render_id)) if not render: return render.status = "rendering" render.progress = 0.2 await db.commit() clip = await db.get(Clip, render.clip_id) if not clip or not clip.raw_clip_path: render.status = "failed" render.error_message = "Clip not found or not extracted" await db.commit() return renders_dir = os.path.join(settings.renders_dir, str(render.clip_id)) os.makedirs(renders_dir, exist_ok=True) output = os.path.join( renders_dir, f"render_{render.subtitle_style}_{render.aspect_ratio.replace(':', 'x')}.mp4", ) # Generate subtitles from word-level transcript ass_content = "" if render.subtitle_style != "none": job = await db.get(Job, clip.job_id) words = (job.transcript or {}).get("words", []) if job else [] if words: ass_content = generate_ass( words, clip.start_time, clip.end_time, render.subtitle_style ) logger.info(f"Generated ASS subtitles ({len(ass_content)} chars) for render {render_id}") render.progress = 0.4 await db.commit() await render_with_subtitles( clip.raw_clip_path, output, ass_content, render.aspect_ratio ) render.output_path = output render.status = "complete" render.progress = 1.0 await db.commit() logger.info(f"Render {render_id} complete: {output}") except Exception as e: logger.exception(f"Render {render_id} failed: {e}") render.status = "failed" render.error_message = str(e) await db.commit() finally: await db.close()