feat: ClipForge Phase 1 - core pipeline MVP

Self-hosted AI video clipper (Opus Clip alternative). Pipeline: YouTube URL -> yt-dlp download -> Whisper transcription -> Ollama AI clip selection -> FFmpeg extraction. - FastAPI backend with PostgreSQL + Redis + ARQ worker - 7-stage processing pipeline with SSE progress tracking - Services: download (yt-dlp), transcription (whisper.jeffemmett.com), AI analysis (Ollama), clip extraction (FFmpeg stream copy) - API: create jobs, track progress, list clips, render, download - Docker Compose with Traefik labels for clip.jeffemmett.com Cost: $0/video using existing infrastructure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 12:27:43 +00:00 · 2026-02-08 12:27:43 +00:00 · 6aa8a676ec
commit 6aa8a676ec
26 changed files with 1687 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,33 @@
 # ClipForge Environment Configuration
 # Database
 POSTGRES_USER=clipforge
 POSTGRES_PASSWORD=changeme_clipforge_2025
 POSTGRES_DB=clipforge
 DATABASE_URL=postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge
 # Redis
 REDIS_URL=redis://redis:6379/0
 # Whisper (self-hosted)
 WHISPER_API_URL=https://whisper.jeffemmett.com
 WHISPER_MODEL=deepdml/faster-whisper-large-v3-turbo-ct2
 # Ollama (local)
 OLLAMA_URL=http://host.docker.internal:11434
 OLLAMA_MODEL=llama3.1:8b
 # Storage paths (inside container)
 MEDIA_DIR=/data/media
 CLIPS_DIR=/data/clips
 RENDERS_DIR=/data/renders
 # yt-dlp
 YTDLP_COOKIES_FILE=
 MAX_VIDEO_DURATION=7200
 # Processing
 MAX_CONCURRENT_JOBS=2
 CLIP_MIN_DURATION=15
 CLIP_MAX_DURATION=90
 TARGET_CLIPS=5
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
 .env
 __pycache__/
 *.pyc
 .venv/
 node_modules/
 dist/
 data/
 *.egg-info/
 .DS_Store
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -0,0 +1,20 @@
 FROM python:3.12-slim
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 # Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application
 COPY . .
 EXPOSE 8000
 # Default: run API server
 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/app/init.py
+++ b/backend/app/init.py
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
--- a/backend/app/api/routes/init.py
+++ b/backend/app/api/routes/init.py
--- a/backend/app/api/routes/clips.py
+++ b/backend/app/api/routes/clips.py
@ -0,0 +1,34 @@
 from uuid import UUID
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import FileResponse
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.database import get_db
 from app.models import Clip
 router = APIRouter()
@router.get("/clips/{clip_id}")
 async def get_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)):
    clip = await db.get(Clip, clip_id)
    if not clip:
        raise HTTPException(404, "Clip not found")
    clip.duration = clip.end_time - clip.start_time
    return clip
@router.get("/clips/{clip_id}/preview")
 async def preview_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)):
    clip = await db.get(Clip, clip_id)
    if not clip:
        raise HTTPException(404, "Clip not found")
    if not clip.raw_clip_path:
        raise HTTPException(404, "Clip not yet extracted")
    return FileResponse(
        clip.raw_clip_path,
        media_type="video/mp4",
        filename=f"{clip.title}.mp4",
    )
--- a/backend/app/api/routes/jobs.py
+++ b/backend/app/api/routes/jobs.py
@ -0,0 +1,167 @@
 import asyncio
 import json
 from uuid import UUID
 from arq import create_pool
 from arq.connections import RedisSettings
 from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from sse_starlette.sse import EventSourceResponse
 from app.config import settings
 from app.database import get_db
 from app.models import Job, Clip
 from app.schemas import JobCreate, JobResponse, ClipResponse
 router = APIRouter()
 def _redis_settings() -> RedisSettings:
    from urllib.parse import urlparse
    parsed = urlparse(settings.redis_url)
    return RedisSettings(
        host=parsed.hostname or "redis",
        port=parsed.port or 6379,
        database=int(parsed.path.lstrip("/") or "0"),
    )
@router.post("/jobs", response_model=JobResponse, status_code=201)
 async def create_job(job_in: JobCreate, db: AsyncSession = Depends(get_db)):
    if job_in.source_type == "youtube" and not job_in.source_url:
        raise HTTPException(400, "source_url required for youtube source")
    job = Job(
        source_type=job_in.source_type,
        source_url=job_in.source_url,
        status="pending",
    )
    db.add(job)
    await db.commit()
    await db.refresh(job)
    # Enqueue processing
    pool = await create_pool(_redis_settings())
    await pool.enqueue_job("process_job", str(job.id))
    await pool.close()
    return job
@router.post("/jobs/upload", response_model=JobResponse, status_code=201)
 async def create_job_upload(
    file: UploadFile = File(...),
    db: AsyncSession = Depends(get_db),
 ):
    import os
    import aiofiles
    os.makedirs(settings.media_dir, exist_ok=True)
    safe_name = file.filename.replace("/", "_").replace("..", "_")
    dest = os.path.join(settings.media_dir, f"upload_{safe_name}")
    async with aiofiles.open(dest, "wb") as f:
        while chunk := await file.read(1024 * 1024):
            await f.write(chunk)
    job = Job(
        source_type="upload",
        source_filename=safe_name,
        media_path=dest,
        status="pending",
    )
    db.add(job)
    await db.commit()
    await db.refresh(job)
    pool = await create_pool(_redis_settings())
    await pool.enqueue_job("process_job", str(job.id))
    await pool.close()
    return job
@router.get("/jobs", response_model=list[JobResponse])
 async def list_jobs(
    limit: int = 20,
    offset: int = 0,
    db: AsyncSession = Depends(get_db),
 ):
    result = await db.execute(
        select(Job).order_by(Job.created_at.desc()).offset(offset).limit(limit)
    )
    return result.scalars().all()
@router.get("/jobs/{job_id}", response_model=JobResponse)
 async def get_job(job_id: UUID, db: AsyncSession = Depends(get_db)):
    job = await db.get(Job, job_id)
    if not job:
        raise HTTPException(404, "Job not found")
    return job
@router.get("/jobs/{job_id}/clips", response_model=list[ClipResponse])
 async def get_job_clips(job_id: UUID, db: AsyncSession = Depends(get_db)):
    job = await db.get(Job, job_id)
    if not job:
        raise HTTPException(404, "Job not found")
    result = await db.execute(
        select(Clip)
        .where(Clip.job_id == job_id)
        .order_by(Clip.virality_score.desc())
    )
    clips = result.scalars().all()
    # Compute duration manually since it's a generated column
    for clip in clips:
        clip.duration = clip.end_time - clip.start_time
    return clips
@router.get("/jobs/{job_id}/progress")
 async def job_progress_sse(job_id: UUID, db: AsyncSession = Depends(get_db)):
    job = await db.get(Job, job_id)
    if not job:
        raise HTTPException(404, "Job not found")
    async def event_stream():
        import redis.asyncio as aioredis
        r = aioredis.from_url(settings.redis_url)
        pubsub = r.pubsub()
        await pubsub.subscribe(f"job:{job_id}:progress")
        # Send current state immediately
        await db.refresh(job)
        yield {
            "event": "progress",
            "data": json.dumps({
                "status": job.status,
                "progress": job.progress,
                "stage_message": job.stage_message,
            }),
        }
        if job.status in ("complete", "failed"):
            await pubsub.unsubscribe()
            await r.close()
            return
        try:
            while True:
                msg = await pubsub.get_message(
                    ignore_subscribe_messages=True, timeout=1.0
                )
                if msg and msg["type"] == "message":
                    data = json.loads(msg["data"])
                    yield {"event": "progress", "data": json.dumps(data)}
                    if data.get("status") in ("complete", "failed"):
                        break
                await asyncio.sleep(0.5)
        finally:
            await pubsub.unsubscribe()
            await r.close()
    return EventSourceResponse(event_stream())
--- a/backend/app/api/routes/renders.py
+++ b/backend/app/api/routes/renders.py
@ -0,0 +1,111 @@
 from uuid import UUID
 from arq import create_pool
 from arq.connections import RedisSettings
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import FileResponse
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.config import settings
 from app.database import get_db
 from app.models import Clip, RenderRequest
 from app.schemas import RenderCreate, RenderResponse, BulkRenderCreate
 router = APIRouter()
 def _redis_settings() -> RedisSettings:
    from urllib.parse import urlparse
    parsed = urlparse(settings.redis_url)
    return RedisSettings(
        host=parsed.hostname or "redis",
        port=parsed.port or 6379,
        database=int(parsed.path.lstrip("/") or "0"),
    )
@router.post("/clips/{clip_id}/render", response_model=RenderResponse, status_code=201)
 async def render_clip(
    clip_id: UUID,
    render_in: RenderCreate,
    db: AsyncSession = Depends(get_db),
 ):
    clip = await db.get(Clip, clip_id)
    if not clip:
        raise HTTPException(404, "Clip not found")
    if not clip.raw_clip_path:
        raise HTTPException(400, "Clip not yet extracted")
    render = RenderRequest(
        clip_id=clip_id,
        aspect_ratio=render_in.aspect_ratio,
        subtitle_style=render_in.subtitle_style,
        status="pending",
    )
    db.add(render)
    await db.commit()
    await db.refresh(render)
    pool = await create_pool(_redis_settings())
    await pool.enqueue_job("render_clip", str(render.id))
    await pool.close()
    return render
@router.post("/jobs/{job_id}/render-all", response_model=list[RenderResponse], status_code=201)
 async def render_all_clips(
    job_id: UUID,
    bulk_in: BulkRenderCreate,
    db: AsyncSession = Depends(get_db),
 ):
    renders = []
    pool = await create_pool(_redis_settings())
    for clip_id in bulk_in.clip_ids:
        clip = await db.get(Clip, clip_id)
        if not clip or not clip.raw_clip_path:
            continue
        render = RenderRequest(
            clip_id=clip_id,
            aspect_ratio=bulk_in.aspect_ratio,
            subtitle_style=bulk_in.subtitle_style,
            status="pending",
        )
        db.add(render)
        await db.commit()
        await db.refresh(render)
        await pool.enqueue_job("render_clip", str(render.id))
        renders.append(render)
    await pool.close()
    return renders
@router.get("/renders/{render_id}", response_model=RenderResponse)
 async def get_render(render_id: UUID, db: AsyncSession = Depends(get_db)):
    render = await db.get(RenderRequest, render_id)
    if not render:
        raise HTTPException(404, "Render not found")
    return render
@router.get("/renders/{render_id}/download")
 async def download_render(render_id: UUID, db: AsyncSession = Depends(get_db)):
    render = await db.get(RenderRequest, render_id)
    if not render:
        raise HTTPException(404, "Render not found")
    if render.status != "complete" or not render.output_path:
        raise HTTPException(400, "Render not complete")
    clip = await db.get(Clip, render.clip_id)
    filename = f"{clip.title}_{render.aspect_ratio.replace(':', 'x')}.mp4" if clip else "clip.mp4"
    return FileResponse(
        render.output_path,
        media_type="video/mp4",
        filename=filename,
    )
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -0,0 +1,37 @@
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
    # Database
    database_url: str = "postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge"
    # Redis
    redis_url: str = "redis://redis:6379/0"
    # Whisper
    whisper_api_url: str = "https://whisper.jeffemmett.com"
    whisper_model: str = "deepdml/faster-whisper-large-v3-turbo-ct2"
    # Ollama
    ollama_url: str = "http://host.docker.internal:11434"
    ollama_model: str = "llama3.1:8b"
    # Storage
    media_dir: str = "/data/media"
    clips_dir: str = "/data/clips"
    renders_dir: str = "/data/renders"
    # yt-dlp
    ytdlp_cookies_file: str = ""
    max_video_duration: int = 7200
    # Processing
    max_concurrent_jobs: int = 2
    clip_min_duration: int = 15
    clip_max_duration: int = 90
    target_clips: int = 5
    model_config = {"env_file": ".env", "extra": "ignore"}
 settings = Settings()
--- a/backend/app/database.py
+++ b/backend/app/database.py
@ -0,0 +1,11 @@
 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
 from app.config import settings
 engine = create_async_engine(settings.database_url, echo=False)
 async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
 async def get_db() -> AsyncSession:
    async with async_session() as session:
        yield session
--- a/backend/app/main.py
+++ b/backend/app/main.py
@ -0,0 +1,36 @@
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from app.api.routes import jobs, clips, renders
@asynccontextmanager
 async def lifespan(app: FastAPI):
    yield
 app = FastAPI(
    title="ClipForge",
    description="Self-hosted AI video clipper",
    version="0.1.0",
    lifespan=lifespan,
 )
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 app.include_router(jobs.router, prefix="/api")
 app.include_router(clips.router, prefix="/api")
 app.include_router(renders.router, prefix="/api")
@app.get("/health")
 async def health():
    return {"status": "ok", "service": "clipforge"}
--- a/backend/app/models.py
+++ b/backend/app/models.py
@ -0,0 +1,110 @@
 import uuid
 from datetime import datetime
 from sqlalchemy import (
    Column,
    DateTime,
    Enum,
    Float,
    ForeignKey,
    Index,
    String,
    Text,
    func,
 )
 from sqlalchemy.dialects.postgresql import JSONB, UUID
 from sqlalchemy.orm import DeclarativeBase, relationship
 class Base(DeclarativeBase):
    pass
 class Job(Base):
    __tablename__ = "jobs"
    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    source_type = Column(Enum("youtube", "upload", name="source_type"), nullable=False)
    source_url = Column(Text)
    source_filename = Column(Text)
    title = Column(Text)
    duration = Column(Float)
    status = Column(
        Enum(
            "pending",
            "downloading",
            "transcribing",
            "analyzing",
            "extracting",
            "complete",
            "failed",
            name="job_status",
        ),
        nullable=False,
        default="pending",
    )
    progress = Column(Float, nullable=False, default=0.0)
    stage_message = Column(Text)
    error_message = Column(Text)
    media_path = Column(Text)
    transcript = Column(JSONB)
    scene_boundaries = Column(JSONB)
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    updated_at = Column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )
    clips = relationship("Clip", back_populates="job", cascade="all, delete-orphan")
 class Clip(Base):
    __tablename__ = "clips"
    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    job_id = Column(
        UUID(as_uuid=True), ForeignKey("jobs.id", ondelete="CASCADE"), nullable=False
    )
    title = Column(Text, nullable=False)
    start_time = Column(Float, nullable=False)
    end_time = Column(Float, nullable=False)
    virality_score = Column(Float, nullable=False, default=0.0)
    category = Column(Text)
    reasoning = Column(Text)
    transcript_segment = Column(Text)
    thumbnail_path = Column(Text)
    raw_clip_path = Column(Text)
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    job = relationship("Job", back_populates="clips")
    renders = relationship(
        "RenderRequest", back_populates="clip", cascade="all, delete-orphan"
    )
 class RenderRequest(Base):
    __tablename__ = "render_requests"
    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
    clip_id = Column(
        UUID(as_uuid=True), ForeignKey("clips.id", ondelete="CASCADE"), nullable=False
    )
    aspect_ratio = Column(
        Enum("16:9", "9:16", "1:1", "4:5", name="aspect_ratio"),
        nullable=False,
        default="9:16",
    )
    subtitle_style = Column(String, nullable=False, default="tiktok")
    status = Column(
        Enum("pending", "rendering", "complete", "failed", name="render_status"),
        nullable=False,
        default="pending",
    )
    progress = Column(Float, nullable=False, default=0.0)
    output_path = Column(Text)
    error_message = Column(Text)
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    updated_at = Column(
        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
    )
    clip = relationship("Clip", back_populates="renders")
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@ -0,0 +1,83 @@
 from datetime import datetime
 from typing import Optional
 from uuid import UUID
 from pydantic import BaseModel, Field
 # --- Job Schemas ---
 class JobCreate(BaseModel):
    source_type: str = Field(..., pattern="^(youtube|upload)$")
    source_url: Optional[str] = None
 class JobResponse(BaseModel):
    id: UUID
    source_type: str
    source_url: Optional[str]
    source_filename: Optional[str]
    title: Optional[str]
    duration: Optional[float]
    status: str
    progress: float
    stage_message: Optional[str]
    error_message: Optional[str]
    created_at: datetime
    updated_at: datetime
    model_config = {"from_attributes": True}
 class JobProgress(BaseModel):
    status: str
    progress: float
    stage_message: Optional[str]
 # --- Clip Schemas ---
 class ClipResponse(BaseModel):
    id: UUID
    job_id: UUID
    title: str
    start_time: float
    end_time: float
    duration: Optional[float] = None
    virality_score: float
    category: Optional[str]
    reasoning: Optional[str]
    transcript_segment: Optional[str]
    thumbnail_path: Optional[str]
    raw_clip_path: Optional[str]
    created_at: datetime
    model_config = {"from_attributes": True}
 # --- Render Schemas ---
 class RenderCreate(BaseModel):
    aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$")
    subtitle_style: str = Field(default="tiktok")
 class RenderResponse(BaseModel):
    id: UUID
    clip_id: UUID
    aspect_ratio: str
    subtitle_style: str
    status: str
    progress: float
    output_path: Optional[str]
    error_message: Optional[str]
    created_at: datetime
    updated_at: datetime
    model_config = {"from_attributes": True}
 class BulkRenderCreate(BaseModel):
    clip_ids: list[UUID]
    aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$")
    subtitle_style: str = Field(default="tiktok")
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
--- a/backend/app/services/ai_analysis.py
+++ b/backend/app/services/ai_analysis.py
@ -0,0 +1,169 @@
 """AI clip analysis using Ollama (local LLM)."""
 import json
 import logging
 import re
 import httpx
 from app.config import settings
 logger = logging.getLogger(__name__)
 SYSTEM_PROMPT = """You are a viral video clip analyst. Given a video transcript with timestamps, identify the best short clips that would perform well on social media (TikTok, YouTube Shorts, Instagram Reels).
 For each clip, provide:
 - A catchy title (max 60 chars)
 - Start and end timestamps (in seconds)
 - Virality score (0-100)
 - Category (one of: hook, story, insight, humor, emotional, controversial, educational)
 - Brief reasoning for why this clip would go viral
 Rules:
 - Clips should be {min_dur}-{max_dur} seconds long
 - Identify {target} clips, ranked by virality potential
 - Clips should start and end at natural sentence boundaries
 - Prefer clips with strong hooks in the first 3 seconds
 - Look for emotional peaks, surprising statements, quotable moments
 - Avoid clips that start mid-sentence or end abruptly
 Respond ONLY with valid JSON in this exact format:
 {{
  "clips": [
    {{
      "title": "Clip title here",
      "start_time": 12.5,
      "end_time": 45.2,
      "virality_score": 85,
      "category": "hook",
      "reasoning": "Why this clip would perform well"
    }}
  ]
 }}"""
 async def analyze_transcript(
    transcript: dict,
    video_title: str = "",
    video_duration: float = 0,
 ) -> list[dict]:
    """Use Ollama to identify the best clips from a transcript.
    Args:
        transcript: dict with 'text', 'words', 'segments' from transcription service
        video_title: original video title for context
        video_duration: total video duration in seconds
    Returns:
        List of clip dicts with title, start_time, end_time, virality_score, category, reasoning
    """
    # Build timestamped transcript for the LLM
    text = transcript.get("text", "")
    segments = transcript.get("segments", [])
    if segments:
        timestamped = "\n".join(
            f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
            f"{s.get('text', '').strip()}"
            for s in segments
        )
    else:
        # Fall back to plain text with rough time estimates
        timestamped = text
    system = SYSTEM_PROMPT.format(
        min_dur=settings.clip_min_duration,
        max_dur=settings.clip_max_duration,
        target=settings.target_clips,
    )
    user_prompt = f"""Video Title: {video_title}
 Video Duration: {_fmt_time(video_duration)}
 Transcript:
 {timestamped}
 Identify the {settings.target_clips} best viral clips from this transcript."""
    logger.info(f"Sending transcript to Ollama ({settings.ollama_model})...")
    async with httpx.AsyncClient(timeout=300.0) as client:
        response = await client.post(
            f"{settings.ollama_url}/api/chat",
            json={
                "model": settings.ollama_model,
                "messages": [
                    {"role": "system", "content": system},
                    {"role": "user", "content": user_prompt},
                ],
                "stream": False,
                "options": {
                    "temperature": 0.3,
                    "num_predict": 4096,
                },
            },
        )
        response.raise_for_status()
        result = response.json()
    content = result.get("message", {}).get("content", "")
    clips = _parse_clips(content, video_duration)
    logger.info(f"AI identified {len(clips)} clips")
    return clips
 def _parse_clips(content: str, video_duration: float) -> list[dict]:
    """Parse LLM response into clip list, handling imperfect JSON."""
    # Try to extract JSON from response
    json_match = re.search(r"\{[\s\S]*\}", content)
    if not json_match:
        logger.error(f"No JSON found in LLM response: {content[:200]}")
        return []
    try:
        data = json.loads(json_match.group())
    except json.JSONDecodeError:
        # Try to fix common JSON issues
        fixed = json_match.group()
        fixed = re.sub(r",\s*}", "}", fixed)
        fixed = re.sub(r",\s*]", "]", fixed)
        try:
            data = json.loads(fixed)
        except json.JSONDecodeError:
            logger.error(f"Failed to parse LLM JSON: {content[:200]}")
            return []
    raw_clips = data.get("clips", [])
    clips = []
    for c in raw_clips:
        start = float(c.get("start_time", 0))
        end = float(c.get("end_time", 0))
        # Validate
        if end <= start:
            continue
        if start < 0:
            start = 0
        if end > video_duration and video_duration > 0:
            end = video_duration
        clips.append({
            "title": str(c.get("title", "Untitled"))[:100],
            "start_time": round(start, 2),
            "end_time": round(end, 2),
            "virality_score": max(0, min(100, float(c.get("virality_score", 50)))),
            "category": str(c.get("category", "general")),
            "reasoning": str(c.get("reasoning", "")),
        })
    # Sort by virality score descending
    clips.sort(key=lambda x: x["virality_score"], reverse=True)
    return clips
 def _fmt_time(seconds: float) -> str:
    """Format seconds as MM:SS."""
    m, s = divmod(int(seconds), 60)
    return f"{m:02d}:{s:02d}"
--- a/backend/app/services/clip_extraction.py
+++ b/backend/app/services/clip_extraction.py
@ -0,0 +1,117 @@
 """Clip extraction service using FFmpeg."""
 import asyncio
 import logging
 import os
 from app.config import settings
 logger = logging.getLogger(__name__)
 async def extract_clip(
    video_path: str,
    start_time: float,
    end_time: float,
    output_path: str,
 ) -> str:
    """Extract a clip from video using FFmpeg stream copy (instant, no re-encode).
    Args:
        video_path: path to source video
        start_time: clip start in seconds
        end_time: clip end in seconds
        output_path: where to write the clip
    Returns:
        output_path
    """
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    duration = end_time - start_time
    # Use stream copy for speed - seek before input for accuracy
    cmd = [
        "ffmpeg",
        "-ss", str(start_time),
        "-i", video_path,
        "-t", str(duration),
        "-c", "copy",
        "-avoid_negative_ts", "make_zero",
        "-y",
        output_path,
    ]
    logger.info(
        f"Extracting clip: {start_time:.1f}s - {end_time:.1f}s -> {output_path}"
    )
    proc = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    _, stderr = await proc.communicate()
    if proc.returncode != 0:
        raise RuntimeError(f"FFmpeg clip extraction failed: {stderr.decode()}")
    size_mb = os.path.getsize(output_path) / (1024 * 1024)
    logger.info(f"Extracted clip: {output_path} ({size_mb:.1f} MB)")
    return output_path
 async def extract_thumbnail(
    video_path: str,
    timestamp: float,
    output_path: str,
 ) -> str:
    """Extract a single frame as thumbnail."""
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cmd = [
        "ffmpeg",
        "-ss", str(timestamp),
        "-i", video_path,
        "-vframes", "1",
        "-q:v", "2",
        "-y",
        output_path,
    ]
    proc = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    _, stderr = await proc.communicate()
    if proc.returncode != 0:
        raise RuntimeError(f"FFmpeg thumbnail extraction failed: {stderr.decode()}")
    return output_path
 async def get_video_duration(video_path: str) -> float:
    """Get video duration in seconds using ffprobe."""
    cmd = [
        "ffprobe",
        "-v", "quiet",
        "-print_format", "json",
        "-show_format",
        video_path,
    ]
    proc = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    stdout, _ = await proc.communicate()
    if proc.returncode != 0:
        return 0.0
    import json
    data = json.loads(stdout.decode())
    return float(data.get("format", {}).get("duration", 0))
--- a/backend/app/services/download.py
+++ b/backend/app/services/download.py
@ -0,0 +1,117 @@
 """Video download service using yt-dlp."""
 import os
 import re
 import logging
 from dataclasses import dataclass
 from typing import Optional
 import yt_dlp
 from app.config import settings
 logger = logging.getLogger(__name__)
 COOKIES_FILE = settings.ytdlp_cookies_file
@dataclass
 class VideoInfo:
    title: str
    duration: float
    video_path: str
    video_id: str
 def extract_video_id(url: str) -> Optional[str]:
    patterns = [
        r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
        r"youtube\.com/shorts/([a-zA-Z0-9_-]{11})",
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None
 def _base_opts() -> dict:
    opts = {"quiet": True, "no_warnings": True}
    if COOKIES_FILE and os.path.exists(COOKIES_FILE):
        opts["cookiefile"] = COOKIES_FILE
    return opts
 async def get_video_metadata(url: str) -> dict:
    """Get video metadata without downloading."""
    opts = _base_opts()
    opts["extract_flat"] = False
    with yt_dlp.YoutubeDL(opts) as ydl:
        info = ydl.extract_info(url, download=False)
        return {
            "title": info.get("title", "Unknown"),
            "duration": info.get("duration", 0),
            "video_id": info.get("id", ""),
        }
 async def download_video(url: str, output_dir: str) -> VideoInfo:
    """Download video from YouTube URL. Downloads video+audio for clip extraction."""
    os.makedirs(output_dir, exist_ok=True)
    video_id = extract_video_id(url) or "video"
    output_template = os.path.join(output_dir, f"{video_id}.%(ext)s")
    opts = _base_opts()
    opts.update({
        # Download best video+audio merged to mp4
        "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
        "merge_output_format": "mp4",
        "outtmpl": output_template,
    })
    with yt_dlp.YoutubeDL(opts) as ydl:
        info = ydl.extract_info(url, download=True)
        video_path = os.path.join(output_dir, f"{video_id}.mp4")
        if not os.path.exists(video_path):
            # Find whatever file was downloaded
            for f in os.listdir(output_dir):
                if f.startswith(video_id) and not f.endswith(".part"):
                    video_path = os.path.join(output_dir, f)
                    break
        duration = info.get("duration", 0)
        if duration > settings.max_video_duration:
            raise ValueError(
                f"Video is {duration}s, max is {settings.max_video_duration}s"
            )
        logger.info(f"Downloaded: {info.get('title')} ({duration}s) -> {video_path}")
        return VideoInfo(
            title=info.get("title", "Unknown"),
            duration=duration,
            video_path=video_path,
            video_id=video_id,
        )
 async def extract_audio(video_path: str, output_path: str) -> str:
    """Extract audio from video file for transcription."""
    import asyncio
    proc = await asyncio.create_subprocess_exec(
        "ffmpeg", "-i", video_path,
        "-vn", "-acodec", "libmp3lame", "-q:a", "4",
        "-y", output_path,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    _, stderr = await proc.communicate()
    if proc.returncode != 0:
        raise RuntimeError(f"FFmpeg audio extraction failed: {stderr.decode()}")
    logger.info(f"Extracted audio: {output_path}")
    return output_path
--- a/backend/app/services/transcription.py
+++ b/backend/app/services/transcription.py
@ -0,0 +1,82 @@
 """Transcription service using self-hosted faster-whisper-server."""
 import logging
 import os
 import httpx
 from app.config import settings
 logger = logging.getLogger(__name__)
 async def transcribe(audio_path: str) -> dict:
    """Transcribe audio file using local Whisper API.
    Returns dict with:
        - text: full transcript text
        - words: list of {word, start, end} with word-level timestamps
        - language: detected language
        - duration: audio duration
    """
    url = f"{settings.whisper_api_url}/v1/audio/transcriptions"
    async with httpx.AsyncClient(timeout=900.0) as client:
        with open(audio_path, "rb") as f:
            files = {"file": (os.path.basename(audio_path), f, "audio/mpeg")}
            data = {
                "model": settings.whisper_model,
                "response_format": "verbose_json",
                "timestamp_granularities[]": "word",
            }
            logger.info(f"Transcribing {audio_path} via {settings.whisper_api_url}")
            response = await client.post(url, files=files, data=data)
            response.raise_for_status()
            result = response.json()
    text = result.get("text", "").strip()
    words = result.get("words", [])
    segments = result.get("segments", [])
    # Build word-level timestamps
    word_timestamps = []
    if words:
        for w in words:
            word_timestamps.append({
                "word": w.get("word", ""),
                "start": w.get("start", 0.0),
                "end": w.get("end", 0.0),
            })
    elif segments:
        # Fall back to segment-level if word-level not available
        for seg in segments:
            for w in seg.get("words", []):
                word_timestamps.append({
                    "word": w.get("word", ""),
                    "start": w.get("start", 0.0),
                    "end": w.get("end", 0.0),
                })
    logger.info(
        f"Transcription complete: {len(text)} chars, "
        f"{len(word_timestamps)} word timestamps"
    )
    return {
        "text": text,
        "words": word_timestamps,
        "segments": segments,
        "language": result.get("language", "en"),
        "duration": result.get("duration", 0.0),
    }
 def get_transcript_segment(words: list[dict], start: float, end: float) -> str:
    """Extract transcript text for a given time range."""
    segment_words = [
        w["word"]
        for w in words
        if w["start"] >= start - 0.5 and w["end"] <= end + 0.5
    ]
    return " ".join(segment_words).strip()
--- a/backend/app/worker.py
+++ b/backend/app/worker.py
@ -0,0 +1,33 @@
 """ARQ worker entry point."""
 import logging
 from urllib.parse import urlparse
 from arq import cron
 from arq.connections import RedisSettings
 from app.config import settings
 from app.workers.tasks import process_job, render_clip
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
 )
 def _redis_settings() -> RedisSettings:
    parsed = urlparse(settings.redis_url)
    return RedisSettings(
        host=parsed.hostname or "redis",
        port=parsed.port or 6379,
        database=int(parsed.path.lstrip("/") or "0"),
    )
 class WorkerSettings:
    functions = [process_job, render_clip]
    redis_settings = _redis_settings()
    max_jobs = settings.max_concurrent_jobs
    job_timeout = 3600  # 1 hour max per job
    keep_result = 3600
    health_check_interval = 30
--- a/backend/app/workers/init.py
+++ b/backend/app/workers/init.py
--- a/backend/app/workers/tasks.py
+++ b/backend/app/workers/tasks.py
@ -0,0 +1,295 @@
 """Pipeline orchestration tasks for ARQ worker."""
 import json
 import logging
 import os
 import uuid
 import redis.asyncio as aioredis
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
 from app.config import settings
 from app.models import Job, Clip
 from app.services import download, transcription, ai_analysis, clip_extraction
 logger = logging.getLogger(__name__)
 async def _get_session() -> AsyncSession:
    engine = create_async_engine(settings.database_url, echo=False)
    session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
    return session_factory()
 async def _publish_progress(
    redis: aioredis.Redis,
    job_id: str,
    status: str,
    progress: float,
    stage_message: str,
 ):
    """Publish progress update via Redis pub/sub."""
    data = {
        "status": status,
        "progress": round(progress, 2),
        "stage_message": stage_message,
    }
    await redis.publish(f"job:{job_id}:progress", json.dumps(data))
 async def _update_job(
    db: AsyncSession,
    job: Job,
    status: str,
    progress: float,
    stage_message: str,
    **kwargs,
 ):
    """Update job in database."""
    job.status = status
    job.progress = progress
    job.stage_message = stage_message
    for k, v in kwargs.items():
        setattr(job, k, v)
    await db.commit()
 async def process_job(ctx: dict, job_id: str):
    """Main pipeline: download → transcribe → AI analysis → extract clips."""
    r = ctx.get("redis") or aioredis.from_url(settings.redis_url)
    db = await _get_session()
    try:
        job = await db.get(Job, uuid.UUID(job_id))
        if not job:
            logger.error(f"Job {job_id} not found")
            return
        logger.info(f"Processing job {job_id}: {job.source_type}")
        # === STAGE 1: DOWNLOAD ===
        await _update_job(db, job, "downloading", 0.05, "Downloading video...")
        await _publish_progress(r, job_id, "downloading", 0.05, "Downloading video...")
        job_media_dir = os.path.join(settings.media_dir, job_id)
        os.makedirs(job_media_dir, exist_ok=True)
        if job.source_type == "youtube":
            video_info = await download.download_video(job.source_url, job_media_dir)
            job.title = video_info.title
            job.duration = video_info.duration
            job.media_path = video_info.video_path
        elif job.media_path:
            # Uploaded file - get duration
            duration = await clip_extraction.get_video_duration(job.media_path)
            job.duration = duration
            if not job.title:
                job.title = job.source_filename or "Uploaded Video"
        else:
            raise ValueError("No video source available")
        await db.commit()
        await _publish_progress(
            r, job_id, "downloading", 0.20,
            f"Downloaded: {job.title} ({job.duration:.0f}s)"
        )
        # === STAGE 2: TRANSCRIBE ===
        await _update_job(
            db, job, "transcribing", 0.25,
            "Extracting audio and transcribing..."
        )
        await _publish_progress(
            r, job_id, "transcribing", 0.25,
            "Extracting audio and transcribing..."
        )
        # Extract audio for transcription
        audio_path = os.path.join(job_media_dir, "audio.mp3")
        await download.extract_audio(job.media_path, audio_path)
        await _publish_progress(
            r, job_id, "transcribing", 0.30,
            "Transcribing with Whisper..."
        )
        transcript = await transcription.transcribe(audio_path)
        job.transcript = transcript
        await db.commit()
        word_count = len(transcript.get("words", []))
        await _publish_progress(
            r, job_id, "transcribing", 0.50,
            f"Transcription complete: {word_count} words"
        )
        # === STAGE 3: AI ANALYSIS ===
        await _update_job(
            db, job, "analyzing", 0.55,
            "AI analyzing transcript for viral clips..."
        )
        await _publish_progress(
            r, job_id, "analyzing", 0.55,
            "AI analyzing transcript for viral clips..."
        )
        clips_data = await ai_analysis.analyze_transcript(
            transcript=transcript,
            video_title=job.title or "",
            video_duration=job.duration or 0,
        )
        if not clips_data:
            raise ValueError("AI analysis returned no clips")
        await _publish_progress(
            r, job_id, "analyzing", 0.70,
            f"Found {len(clips_data)} potential clips"
        )
        # === STAGE 4: EXTRACT CLIPS ===
        await _update_job(
            db, job, "extracting", 0.75,
            f"Extracting {len(clips_data)} clips..."
        )
        await _publish_progress(
            r, job_id, "extracting", 0.75,
            f"Extracting {len(clips_data)} clips..."
        )
        clips_dir = os.path.join(settings.clips_dir, job_id)
        os.makedirs(clips_dir, exist_ok=True)
        for i, cd in enumerate(clips_data):
            clip_filename = f"clip_{i:02d}.mp4"
            clip_path = os.path.join(clips_dir, clip_filename)
            thumb_path = os.path.join(clips_dir, f"thumb_{i:02d}.jpg")
            # Extract the clip
            await clip_extraction.extract_clip(
                video_path=job.media_path,
                start_time=cd["start_time"],
                end_time=cd["end_time"],
                output_path=clip_path,
            )
            # Extract thumbnail at 25% into the clip
            thumb_time = cd["start_time"] + (cd["end_time"] - cd["start_time"]) * 0.25
            try:
                await clip_extraction.extract_thumbnail(
                    video_path=job.media_path,
                    timestamp=thumb_time,
                    output_path=thumb_path,
                )
            except Exception:
                thumb_path = None
            # Get transcript segment for this clip
            segment_text = transcription.get_transcript_segment(
                transcript.get("words", []),
                cd["start_time"],
                cd["end_time"],
            )
            # Save clip to database
            clip = Clip(
                job_id=job.id,
                title=cd["title"],
                start_time=cd["start_time"],
                end_time=cd["end_time"],
                virality_score=cd["virality_score"],
                category=cd["category"],
                reasoning=cd["reasoning"],
                transcript_segment=segment_text,
                thumbnail_path=thumb_path,
                raw_clip_path=clip_path,
            )
            db.add(clip)
            progress = 0.75 + (0.20 * (i + 1) / len(clips_data))
            await _publish_progress(
                r, job_id, "extracting", progress,
                f"Extracted clip {i + 1}/{len(clips_data)}: {cd['title']}"
            )
        await db.commit()
        # === COMPLETE ===
        await _update_job(
            db, job, "complete", 1.0,
            f"Done! {len(clips_data)} clips extracted"
        )
        await _publish_progress(
            r, job_id, "complete", 1.0,
            f"Done! {len(clips_data)} clips extracted"
        )
        # Clean up audio file
        if os.path.exists(audio_path):
            os.remove(audio_path)
        logger.info(f"Job {job_id} complete: {len(clips_data)} clips")
    except Exception as e:
        logger.exception(f"Job {job_id} failed: {e}")
        try:
            await _update_job(
                db, job, "failed", job.progress,
                str(e), error_message=str(e),
            )
            await _publish_progress(
                r, job_id, "failed", job.progress, f"Error: {e}"
            )
        except Exception:
            pass
    finally:
        await db.close()
 async def render_clip(ctx: dict, render_id: str):
    """Render a clip with subtitles and aspect ratio conversion.
    (Phase 3 - stub for now, copies raw clip)"""
    from app.models import RenderRequest
    db = await _get_session()
    try:
        render = await db.get(RenderRequest, uuid.UUID(render_id))
        if not render:
            return
        render.status = "rendering"
        render.progress = 0.5
        await db.commit()
        clip = await db.get(Clip, render.clip_id)
        if not clip or not clip.raw_clip_path:
            render.status = "failed"
            render.error_message = "Clip not found or not extracted"
            await db.commit()
            return
        # Phase 1: just copy the raw clip as-is
        # Phase 3 will add subtitle rendering + aspect ratio conversion
        import shutil
        renders_dir = os.path.join(settings.renders_dir, str(render.clip_id))
        os.makedirs(renders_dir, exist_ok=True)
        output = os.path.join(
            renders_dir,
            f"render_{render.aspect_ratio.replace(':', 'x')}.mp4"
        )
        shutil.copy2(clip.raw_clip_path, output)
        render.output_path = output
        render.status = "complete"
        render.progress = 1.0
        await db.commit()
        logger.info(f"Render {render_id} complete: {output}")
    except Exception as e:
        logger.exception(f"Render {render_id} failed: {e}")
        render.status = "failed"
        render.error_message = str(e)
        await db.commit()
    finally:
        await db.close()
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,13 @@
 fastapi==0.115.6
 uvicorn[standard]==0.34.0
 sqlalchemy[asyncio]==2.0.36
 asyncpg==0.30.0
 pydantic==2.10.3
 pydantic-settings==2.7.0
 arq==0.26.1
 redis==5.2.1
 httpx==0.28.1
 yt-dlp==2024.12.23
 sse-starlette==2.2.1
 python-multipart==0.0.20
 aiofiles==24.1.0
--- a/database/init.sql
+++ b/database/init.sql
@ -0,0 +1,113 @@
 -- ClipForge Database Schema
 CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
 -- Job status enum
 CREATE TYPE job_status AS ENUM (
    'pending',
    'downloading',
    'transcribing',
    'analyzing',
    'extracting',
    'complete',
    'failed'
 );
 -- Source type enum
 CREATE TYPE source_type AS ENUM (
    'youtube',
    'upload'
 );
 -- Aspect ratio enum
 CREATE TYPE aspect_ratio AS ENUM (
    '16:9',
    '9:16',
    '1:1',
    '4:5'
 );
 -- Render status enum
 CREATE TYPE render_status AS ENUM (
    'pending',
    'rendering',
    'complete',
    'failed'
 );
 -- Jobs table
 CREATE TABLE jobs (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    source_type source_type NOT NULL,
    source_url TEXT,
    source_filename TEXT,
    title TEXT,
    duration FLOAT,
    status job_status NOT NULL DEFAULT 'pending',
    progress FLOAT NOT NULL DEFAULT 0.0,
    stage_message TEXT,
    error_message TEXT,
    media_path TEXT,
    transcript JSONB,
    scene_boundaries JSONB,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
 );
 -- Clips table
 CREATE TABLE clips (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    job_id UUID NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
    title TEXT NOT NULL,
    start_time FLOAT NOT NULL,
    end_time FLOAT NOT NULL,
    duration FLOAT GENERATED ALWAYS AS (end_time - start_time) STORED,
    virality_score FLOAT NOT NULL DEFAULT 0.0,
    category TEXT,
    reasoning TEXT,
    transcript_segment TEXT,
    thumbnail_path TEXT,
    raw_clip_path TEXT,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
 );
 -- Render requests table
 CREATE TABLE render_requests (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    clip_id UUID NOT NULL REFERENCES clips(id) ON DELETE CASCADE,
    aspect_ratio aspect_ratio NOT NULL DEFAULT '9:16',
    subtitle_style TEXT NOT NULL DEFAULT 'tiktok',
    status render_status NOT NULL DEFAULT 'pending',
    progress FLOAT NOT NULL DEFAULT 0.0,
    output_path TEXT,
    error_message TEXT,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
 );
 -- Indexes
 CREATE INDEX idx_jobs_status ON jobs(status);
 CREATE INDEX idx_jobs_created_at ON jobs(created_at DESC);
 CREATE INDEX idx_clips_job_id ON clips(job_id);
 CREATE INDEX idx_clips_virality ON clips(virality_score DESC);
 CREATE INDEX idx_renders_clip_id ON render_requests(clip_id);
 CREATE INDEX idx_renders_status ON render_requests(status);
 -- Updated_at trigger
 CREATE OR REPLACE FUNCTION update_updated_at()
 RETURNS TRIGGER AS $$
 BEGIN
    NEW.updated_at = NOW();
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 CREATE TRIGGER jobs_updated_at
    BEFORE UPDATE ON jobs
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at();
 CREATE TRIGGER renders_updated_at
    BEFORE UPDATE ON render_requests
    FOR EACH ROW
    EXECUTE FUNCTION update_updated_at();
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,91 @@
 services:
  postgres:
    image: postgres:15-alpine
    restart: unless-stopped
    environment:
      POSTGRES_USER: ${POSTGRES_USER:-clipforge}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme_clipforge_2025}
      POSTGRES_DB: ${POSTGRES_DB:-clipforge}
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-clipforge}"]
      interval: 5s
      timeout: 5s
      retries: 5
  redis:
    image: redis:7-alpine
    restart: unless-stopped
    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 5s
      timeout: 5s
      retries: 5
  backend:
    build:
      context: ./backend
      dockerfile: Dockerfile
    restart: unless-stopped
    env_file: .env
    environment:
      - DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge}
      - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
    volumes:
      - media_data:/data
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.clipforge.rule=Host(`clip.jeffemmett.com`)"
      - "traefik.http.services.clipforge.loadbalancer.server.port=8000"
    networks:
      - default
      - traefik-public
  worker:
    build:
      context: ./backend
      dockerfile: Dockerfile
    restart: unless-stopped
    command: ["python", "-m", "app.worker"]
    env_file: .env
    environment:
      - DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge}
      - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
    volumes:
      - media_data:/data
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    extra_hosts:
      - "host.docker.internal:host-gateway"
  frontend:
    build:
      context: ./frontend
      dockerfile: Dockerfile
    restart: unless-stopped
    labels:
      - "traefik.enable=false"
    networks:
      - default
 volumes:
  postgres_data:
  redis_data:
  media_data:
 networks:
  traefik-public:
    external: true
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@ -0,0 +1,6 @@
 FROM nginx:alpine
 # Placeholder frontend - Phase 4 will replace with React build
 RUN echo '<!DOCTYPE html><html><head><title>ClipForge</title><style>body{font-family:system-ui;display:flex;justify-content:center;align-items:center;min-height:100vh;margin:0;background:#0a0a0a;color:#fff}main{text-align:center;max-width:600px;padding:2rem}.logo{font-size:3rem;margin-bottom:1rem}h1{margin:0 0 0.5rem}p{color:#888;margin:0 0 2rem}.status{background:#111;border:1px solid #333;border-radius:8px;padding:1.5rem;text-align:left}code{color:#0f0}</style></head><body><main><div class="logo">&#9997;&#65039;</div><h1>ClipForge</h1><p>Self-hosted AI video clipper</p><div class="status"><p><strong>API:</strong> <code>POST /api/jobs</code></p><p><strong>Status:</strong> <code>GET /api/jobs/{id}</code></p><p><strong>Clips:</strong> <code>GET /api/jobs/{id}/clips</code></p><p style="margin-top:1rem;color:#888">Frontend coming in Phase 4</p></div></main></body></html>' > /usr/share/nginx/html/index.html
 EXPOSE 80