feat: ClipForge Phase 1 - core pipeline MVP

Self-hosted AI video clipper (Opus Clip alternative). Pipeline: YouTube URL -> yt-dlp download -> Whisper transcription -> Ollama AI clip selection -> FFmpeg extraction. - FastAPI backend with PostgreSQL + Redis + ARQ worker - 7-stage processing pipeline with SSE progress tracking - Services: download (yt-dlp), transcription (whisper.jeffemmett.com), AI analysis (Ollama), clip extraction (FFmpeg stream copy) - API: create jobs, track progress, list clips, render, download - Docker Compose with Traefik labels for clip.jeffemmett.com Cost: $0/video using existing infrastructure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 12:27:43 +00:00 · 2026-02-08 12:27:43 +00:00 · 6aa8a676ec
commit 6aa8a676ec
26 changed files with 1687 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,33 @@
+# ClipForge Environment Configuration
+
+# Database
+POSTGRES_USER=clipforge
+POSTGRES_PASSWORD=changeme_clipforge_2025
+POSTGRES_DB=clipforge
+DATABASE_URL=postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge
+
+# Redis
+REDIS_URL=redis://redis:6379/0
+
+# Whisper (self-hosted)
+WHISPER_API_URL=https://whisper.jeffemmett.com
+WHISPER_MODEL=deepdml/faster-whisper-large-v3-turbo-ct2
+
+# Ollama (local)
+OLLAMA_URL=http://host.docker.internal:11434
+OLLAMA_MODEL=llama3.1:8b
+
+# Storage paths (inside container)
+MEDIA_DIR=/data/media
+CLIPS_DIR=/data/clips
+RENDERS_DIR=/data/renders
+
+# yt-dlp
+YTDLP_COOKIES_FILE=
+MAX_VIDEO_DURATION=7200
+
+# Processing
+MAX_CONCURRENT_JOBS=2
+CLIP_MIN_DURATION=15
+CLIP_MAX_DURATION=90
+TARGET_CLIPS=5
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
+.env
+__pycache__/
+*.pyc
+.venv/
+node_modules/
+dist/
+data/
+*.egg-info/
+.DS_Store
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -0,0 +1,20 @@
+FROM python:3.12-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application
+COPY . .
+
+EXPOSE 8000
+
+# Default: run API server
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/backend/app/init.py
+++ b/backend/app/init.py
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
--- a/backend/app/api/routes/init.py
+++ b/backend/app/api/routes/init.py
--- a/backend/app/api/routes/clips.py
+++ b/backend/app/api/routes/clips.py
@ -0,0 +1,34 @@
+from uuid import UUID
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import FileResponse
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.database import get_db
+from app.models import Clip
+
+router = APIRouter()
+
+
+@router.get("/clips/{clip_id}")
+async def get_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)):
+    clip = await db.get(Clip, clip_id)
+    if not clip:
+        raise HTTPException(404, "Clip not found")
+    clip.duration = clip.end_time - clip.start_time
+    return clip
+
+
+@router.get("/clips/{clip_id}/preview")
+async def preview_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)):
+    clip = await db.get(Clip, clip_id)
+    if not clip:
+        raise HTTPException(404, "Clip not found")
+    if not clip.raw_clip_path:
+        raise HTTPException(404, "Clip not yet extracted")
+
+    return FileResponse(
+        clip.raw_clip_path,
+        media_type="video/mp4",
+        filename=f"{clip.title}.mp4",
+    )
--- a/backend/app/api/routes/jobs.py
+++ b/backend/app/api/routes/jobs.py
@ -0,0 +1,167 @@
+import asyncio
+import json
+from uuid import UUID
+
+from arq import create_pool
+from arq.connections import RedisSettings
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sse_starlette.sse import EventSourceResponse
+
+from app.config import settings
+from app.database import get_db
+from app.models import Job, Clip
+from app.schemas import JobCreate, JobResponse, ClipResponse
+
+router = APIRouter()
+
+
+def _redis_settings() -> RedisSettings:
+    from urllib.parse import urlparse
+    parsed = urlparse(settings.redis_url)
+    return RedisSettings(
+        host=parsed.hostname or "redis",
+        port=parsed.port or 6379,
+        database=int(parsed.path.lstrip("/") or "0"),
+    )
+
+
+@router.post("/jobs", response_model=JobResponse, status_code=201)
+async def create_job(job_in: JobCreate, db: AsyncSession = Depends(get_db)):
+    if job_in.source_type == "youtube" and not job_in.source_url:
+        raise HTTPException(400, "source_url required for youtube source")
+
+    job = Job(
+        source_type=job_in.source_type,
+        source_url=job_in.source_url,
+        status="pending",
+    )
+    db.add(job)
+    await db.commit()
+    await db.refresh(job)
+
+    # Enqueue processing
+    pool = await create_pool(_redis_settings())
+    await pool.enqueue_job("process_job", str(job.id))
+    await pool.close()
+
+    return job
+
+
+@router.post("/jobs/upload", response_model=JobResponse, status_code=201)
+async def create_job_upload(
+    file: UploadFile = File(...),
+    db: AsyncSession = Depends(get_db),
+):
+    import os
+    import aiofiles
+
+    os.makedirs(settings.media_dir, exist_ok=True)
+    safe_name = file.filename.replace("/", "_").replace("..", "_")
+    dest = os.path.join(settings.media_dir, f"upload_{safe_name}")
+
+    async with aiofiles.open(dest, "wb") as f:
+        while chunk := await file.read(1024 * 1024):
+            await f.write(chunk)
+
+    job = Job(
+        source_type="upload",
+        source_filename=safe_name,
+        media_path=dest,
+        status="pending",
+    )
+    db.add(job)
+    await db.commit()
+    await db.refresh(job)
+
+    pool = await create_pool(_redis_settings())
+    await pool.enqueue_job("process_job", str(job.id))
+    await pool.close()
+
+    return job
+
+
+@router.get("/jobs", response_model=list[JobResponse])
+async def list_jobs(
+    limit: int = 20,
+    offset: int = 0,
+    db: AsyncSession = Depends(get_db),
+):
+    result = await db.execute(
+        select(Job).order_by(Job.created_at.desc()).offset(offset).limit(limit)
+    )
+    return result.scalars().all()
+
+
+@router.get("/jobs/{job_id}", response_model=JobResponse)
+async def get_job(job_id: UUID, db: AsyncSession = Depends(get_db)):
+    job = await db.get(Job, job_id)
+    if not job:
+        raise HTTPException(404, "Job not found")
+    return job
+
+
+@router.get("/jobs/{job_id}/clips", response_model=list[ClipResponse])
+async def get_job_clips(job_id: UUID, db: AsyncSession = Depends(get_db)):
+    job = await db.get(Job, job_id)
+    if not job:
+        raise HTTPException(404, "Job not found")
+
+    result = await db.execute(
+        select(Clip)
+        .where(Clip.job_id == job_id)
+        .order_by(Clip.virality_score.desc())
+    )
+    clips = result.scalars().all()
+    # Compute duration manually since it's a generated column
+    for clip in clips:
+        clip.duration = clip.end_time - clip.start_time
+    return clips
+
+
+@router.get("/jobs/{job_id}/progress")
+async def job_progress_sse(job_id: UUID, db: AsyncSession = Depends(get_db)):
+    job = await db.get(Job, job_id)
+    if not job:
+        raise HTTPException(404, "Job not found")
+
+    async def event_stream():
+        import redis.asyncio as aioredis
+
+        r = aioredis.from_url(settings.redis_url)
+        pubsub = r.pubsub()
+        await pubsub.subscribe(f"job:{job_id}:progress")
+
+        # Send current state immediately
+        await db.refresh(job)
+        yield {
+            "event": "progress",
+            "data": json.dumps({
+                "status": job.status,
+                "progress": job.progress,
+                "stage_message": job.stage_message,
+            }),
+        }
+
+        if job.status in ("complete", "failed"):
+            await pubsub.unsubscribe()
+            await r.close()
+            return
+
+        try:
+            while True:
+                msg = await pubsub.get_message(
+                    ignore_subscribe_messages=True, timeout=1.0
+                )
+                if msg and msg["type"] == "message":
+                    data = json.loads(msg["data"])
+                    yield {"event": "progress", "data": json.dumps(data)}
+                    if data.get("status") in ("complete", "failed"):
+                        break
+                await asyncio.sleep(0.5)
+        finally:
+            await pubsub.unsubscribe()
+            await r.close()
+
+    return EventSourceResponse(event_stream())
--- a/backend/app/api/routes/renders.py
+++ b/backend/app/api/routes/renders.py
@ -0,0 +1,111 @@
+from uuid import UUID
+
+from arq import create_pool
+from arq.connections import RedisSettings
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import FileResponse
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.config import settings
+from app.database import get_db
+from app.models import Clip, RenderRequest
+from app.schemas import RenderCreate, RenderResponse, BulkRenderCreate
+
+router = APIRouter()
+
+
+def _redis_settings() -> RedisSettings:
+    from urllib.parse import urlparse
+    parsed = urlparse(settings.redis_url)
+    return RedisSettings(
+        host=parsed.hostname or "redis",
+        port=parsed.port or 6379,
+        database=int(parsed.path.lstrip("/") or "0"),
+    )
+
+
+@router.post("/clips/{clip_id}/render", response_model=RenderResponse, status_code=201)
+async def render_clip(
+    clip_id: UUID,
+    render_in: RenderCreate,
+    db: AsyncSession = Depends(get_db),
+):
+    clip = await db.get(Clip, clip_id)
+    if not clip:
+        raise HTTPException(404, "Clip not found")
+    if not clip.raw_clip_path:
+        raise HTTPException(400, "Clip not yet extracted")
+
+    render = RenderRequest(
+        clip_id=clip_id,
+        aspect_ratio=render_in.aspect_ratio,
+        subtitle_style=render_in.subtitle_style,
+        status="pending",
+    )
+    db.add(render)
+    await db.commit()
+    await db.refresh(render)
+
+    pool = await create_pool(_redis_settings())
+    await pool.enqueue_job("render_clip", str(render.id))
+    await pool.close()
+
+    return render
+
+
+@router.post("/jobs/{job_id}/render-all", response_model=list[RenderResponse], status_code=201)
+async def render_all_clips(
+    job_id: UUID,
+    bulk_in: BulkRenderCreate,
+    db: AsyncSession = Depends(get_db),
+):
+    renders = []
+    pool = await create_pool(_redis_settings())
+
+    for clip_id in bulk_in.clip_ids:
+        clip = await db.get(Clip, clip_id)
+        if not clip or not clip.raw_clip_path:
+            continue
+
+        render = RenderRequest(
+            clip_id=clip_id,
+            aspect_ratio=bulk_in.aspect_ratio,
+            subtitle_style=bulk_in.subtitle_style,
+            status="pending",
+        )
+        db.add(render)
+        await db.commit()
+        await db.refresh(render)
+
+        await pool.enqueue_job("render_clip", str(render.id))
+        renders.append(render)
+
+    await pool.close()
+    return renders
+
+
+@router.get("/renders/{render_id}", response_model=RenderResponse)
+async def get_render(render_id: UUID, db: AsyncSession = Depends(get_db)):
+    render = await db.get(RenderRequest, render_id)
+    if not render:
+        raise HTTPException(404, "Render not found")
+    return render
+
+
+@router.get("/renders/{render_id}/download")
+async def download_render(render_id: UUID, db: AsyncSession = Depends(get_db)):
+    render = await db.get(RenderRequest, render_id)
+    if not render:
+        raise HTTPException(404, "Render not found")
+    if render.status != "complete" or not render.output_path:
+        raise HTTPException(400, "Render not complete")
+
+    clip = await db.get(Clip, render.clip_id)
+    filename = f"{clip.title}_{render.aspect_ratio.replace(':', 'x')}.mp4" if clip else "clip.mp4"
+
+    return FileResponse(
+        render.output_path,
+        media_type="video/mp4",
+        filename=filename,
+    )
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -0,0 +1,37 @@
+from pydantic_settings import BaseSettings
+
+
+class Settings(BaseSettings):
+    # Database
+    database_url: str = "postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge"
+
+    # Redis
+    redis_url: str = "redis://redis:6379/0"
+
+    # Whisper
+    whisper_api_url: str = "https://whisper.jeffemmett.com"
+    whisper_model: str = "deepdml/faster-whisper-large-v3-turbo-ct2"
+
+    # Ollama
+    ollama_url: str = "http://host.docker.internal:11434"
+    ollama_model: str = "llama3.1:8b"
+
+    # Storage
+    media_dir: str = "/data/media"
+    clips_dir: str = "/data/clips"
+    renders_dir: str = "/data/renders"
+
+    # yt-dlp
+    ytdlp_cookies_file: str = ""
+    max_video_duration: int = 7200
+
+    # Processing
+    max_concurrent_jobs: int = 2
+    clip_min_duration: int = 15
+    clip_max_duration: int = 90
+    target_clips: int = 5
+
+    model_config = {"env_file": ".env", "extra": "ignore"}
+
+
+settings = Settings()
--- a/backend/app/database.py
+++ b/backend/app/database.py
@ -0,0 +1,11 @@
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
+
+from app.config import settings
+
+engine = create_async_engine(settings.database_url, echo=False)
+async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+
+
+async def get_db() -> AsyncSession:
+    async with async_session() as session:
+        yield session
--- a/backend/app/main.py
+++ b/backend/app/main.py
@ -0,0 +1,36 @@
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.api.routes import jobs, clips, renders
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    yield
+
+
+app = FastAPI(
+    title="ClipForge",
+    description="Self-hosted AI video clipper",
+    version="0.1.0",
+    lifespan=lifespan,
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(jobs.router, prefix="/api")
+app.include_router(clips.router, prefix="/api")
+app.include_router(renders.router, prefix="/api")
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok", "service": "clipforge"}
--- a/backend/app/models.py
+++ b/backend/app/models.py
@ -0,0 +1,110 @@
+import uuid
+from datetime import datetime
+
+from sqlalchemy import (
+    Column,
+    DateTime,
+    Enum,
+    Float,
+    ForeignKey,
+    Index,
+    String,
+    Text,
+    func,
+)
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import DeclarativeBase, relationship
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class Job(Base):
+    __tablename__ = "jobs"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    source_type = Column(Enum("youtube", "upload", name="source_type"), nullable=False)
+    source_url = Column(Text)
+    source_filename = Column(Text)
+    title = Column(Text)
+    duration = Column(Float)
+    status = Column(
+        Enum(
+            "pending",
+            "downloading",
+            "transcribing",
+            "analyzing",
+            "extracting",
+            "complete",
+            "failed",
+            name="job_status",
+        ),
+        nullable=False,
+        default="pending",
+    )
+    progress = Column(Float, nullable=False, default=0.0)
+    stage_message = Column(Text)
+    error_message = Column(Text)
+    media_path = Column(Text)
+    transcript = Column(JSONB)
+    scene_boundaries = Column(JSONB)
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
+    )
+
+    clips = relationship("Clip", back_populates="job", cascade="all, delete-orphan")
+
+
+class Clip(Base):
+    __tablename__ = "clips"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    job_id = Column(
+        UUID(as_uuid=True), ForeignKey("jobs.id", ondelete="CASCADE"), nullable=False
+    )
+    title = Column(Text, nullable=False)
+    start_time = Column(Float, nullable=False)
+    end_time = Column(Float, nullable=False)
+    virality_score = Column(Float, nullable=False, default=0.0)
+    category = Column(Text)
+    reasoning = Column(Text)
+    transcript_segment = Column(Text)
+    thumbnail_path = Column(Text)
+    raw_clip_path = Column(Text)
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+
+    job = relationship("Job", back_populates="clips")
+    renders = relationship(
+        "RenderRequest", back_populates="clip", cascade="all, delete-orphan"
+    )
+
+
+class RenderRequest(Base):
+    __tablename__ = "render_requests"
+
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    clip_id = Column(
+        UUID(as_uuid=True), ForeignKey("clips.id", ondelete="CASCADE"), nullable=False
+    )
+    aspect_ratio = Column(
+        Enum("16:9", "9:16", "1:1", "4:5", name="aspect_ratio"),
+        nullable=False,
+        default="9:16",
+    )
+    subtitle_style = Column(String, nullable=False, default="tiktok")
+    status = Column(
+        Enum("pending", "rendering", "complete", "failed", name="render_status"),
+        nullable=False,
+        default="pending",
+    )
+    progress = Column(Float, nullable=False, default=0.0)
+    output_path = Column(Text)
+    error_message = Column(Text)
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+    updated_at = Column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
+    )
+
+    clip = relationship("Clip", back_populates="renders")
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@ -0,0 +1,83 @@
+from datetime import datetime
+from typing import Optional
+from uuid import UUID
+
+from pydantic import BaseModel, Field
+
+
+# --- Job Schemas ---
+
+class JobCreate(BaseModel):
+    source_type: str = Field(..., pattern="^(youtube|upload)$")
+    source_url: Optional[str] = None
+
+
+class JobResponse(BaseModel):
+    id: UUID
+    source_type: str
+    source_url: Optional[str]
+    source_filename: Optional[str]
+    title: Optional[str]
+    duration: Optional[float]
+    status: str
+    progress: float
+    stage_message: Optional[str]
+    error_message: Optional[str]
+    created_at: datetime
+    updated_at: datetime
+
+    model_config = {"from_attributes": True}
+
+
+class JobProgress(BaseModel):
+    status: str
+    progress: float
+    stage_message: Optional[str]
+
+
+# --- Clip Schemas ---
+
+class ClipResponse(BaseModel):
+    id: UUID
+    job_id: UUID
+    title: str
+    start_time: float
+    end_time: float
+    duration: Optional[float] = None
+    virality_score: float
+    category: Optional[str]
+    reasoning: Optional[str]
+    transcript_segment: Optional[str]
+    thumbnail_path: Optional[str]
+    raw_clip_path: Optional[str]
+    created_at: datetime
+
+    model_config = {"from_attributes": True}
+
+
+# --- Render Schemas ---
+
+class RenderCreate(BaseModel):
+    aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$")
+    subtitle_style: str = Field(default="tiktok")
+
+
+class RenderResponse(BaseModel):
+    id: UUID
+    clip_id: UUID
+    aspect_ratio: str
+    subtitle_style: str
+    status: str
+    progress: float
+    output_path: Optional[str]
+    error_message: Optional[str]
+    created_at: datetime
+    updated_at: datetime
+
+    model_config = {"from_attributes": True}
+
+
+class BulkRenderCreate(BaseModel):
+    clip_ids: list[UUID]
+    aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$")
+    subtitle_style: str = Field(default="tiktok")
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
--- a/backend/app/services/ai_analysis.py
+++ b/backend/app/services/ai_analysis.py
@ -0,0 +1,169 @@
+"""AI clip analysis using Ollama (local LLM)."""
+
+import json
+import logging
+import re
+
+import httpx
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = """You are a viral video clip analyst. Given a video transcript with timestamps, identify the best short clips that would perform well on social media (TikTok, YouTube Shorts, Instagram Reels).
+
+For each clip, provide:
+- A catchy title (max 60 chars)
+- Start and end timestamps (in seconds)
+- Virality score (0-100)
+- Category (one of: hook, story, insight, humor, emotional, controversial, educational)
+- Brief reasoning for why this clip would go viral
+
+Rules:
+- Clips should be {min_dur}-{max_dur} seconds long
+- Identify {target} clips, ranked by virality potential
+- Clips should start and end at natural sentence boundaries
+- Prefer clips with strong hooks in the first 3 seconds
+- Look for emotional peaks, surprising statements, quotable moments
+- Avoid clips that start mid-sentence or end abruptly
+
+Respond ONLY with valid JSON in this exact format:
+{{
+  "clips": [
+    {{
+      "title": "Clip title here",
+      "start_time": 12.5,
+      "end_time": 45.2,
+      "virality_score": 85,
+      "category": "hook",
+      "reasoning": "Why this clip would perform well"
+    }}
+  ]
+}}"""
+
+
+async def analyze_transcript(
+    transcript: dict,
+    video_title: str = "",
+    video_duration: float = 0,
+) -> list[dict]:
+    """Use Ollama to identify the best clips from a transcript.
+
+    Args:
+        transcript: dict with 'text', 'words', 'segments' from transcription service
+        video_title: original video title for context
+        video_duration: total video duration in seconds
+
+    Returns:
+        List of clip dicts with title, start_time, end_time, virality_score, category, reasoning
+    """
+    # Build timestamped transcript for the LLM
+    text = transcript.get("text", "")
+    segments = transcript.get("segments", [])
+
+    if segments:
+        timestamped = "\n".join(
+            f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
+            f"{s.get('text', '').strip()}"
+            for s in segments
+        )
+    else:
+        # Fall back to plain text with rough time estimates
+        timestamped = text
+
+    system = SYSTEM_PROMPT.format(
+        min_dur=settings.clip_min_duration,
+        max_dur=settings.clip_max_duration,
+        target=settings.target_clips,
+    )
+
+    user_prompt = f"""Video Title: {video_title}
+Video Duration: {_fmt_time(video_duration)}
+
+Transcript:
+{timestamped}
+
+Identify the {settings.target_clips} best viral clips from this transcript."""
+
+    logger.info(f"Sending transcript to Ollama ({settings.ollama_model})...")
+
+    async with httpx.AsyncClient(timeout=300.0) as client:
+        response = await client.post(
+            f"{settings.ollama_url}/api/chat",
+            json={
+                "model": settings.ollama_model,
+                "messages": [
+                    {"role": "system", "content": system},
+                    {"role": "user", "content": user_prompt},
+                ],
+                "stream": False,
+                "options": {
+                    "temperature": 0.3,
+                    "num_predict": 4096,
+                },
+            },
+        )
+        response.raise_for_status()
+        result = response.json()
+
+    content = result.get("message", {}).get("content", "")
+    clips = _parse_clips(content, video_duration)
+
+    logger.info(f"AI identified {len(clips)} clips")
+    return clips
+
+
+def _parse_clips(content: str, video_duration: float) -> list[dict]:
+    """Parse LLM response into clip list, handling imperfect JSON."""
+    # Try to extract JSON from response
+    json_match = re.search(r"\{[\s\S]*\}", content)
+    if not json_match:
+        logger.error(f"No JSON found in LLM response: {content[:200]}")
+        return []
+
+    try:
+        data = json.loads(json_match.group())
+    except json.JSONDecodeError:
+        # Try to fix common JSON issues
+        fixed = json_match.group()
+        fixed = re.sub(r",\s*}", "}", fixed)
+        fixed = re.sub(r",\s*]", "]", fixed)
+        try:
+            data = json.loads(fixed)
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse LLM JSON: {content[:200]}")
+            return []
+
+    raw_clips = data.get("clips", [])
+    clips = []
+
+    for c in raw_clips:
+        start = float(c.get("start_time", 0))
+        end = float(c.get("end_time", 0))
+
+        # Validate
+        if end <= start:
+            continue
+        if start < 0:
+            start = 0
+        if end > video_duration and video_duration > 0:
+            end = video_duration
+
+        clips.append({
+            "title": str(c.get("title", "Untitled"))[:100],
+            "start_time": round(start, 2),
+            "end_time": round(end, 2),
+            "virality_score": max(0, min(100, float(c.get("virality_score", 50)))),
+            "category": str(c.get("category", "general")),
+            "reasoning": str(c.get("reasoning", "")),
+        })
+
+    # Sort by virality score descending
+    clips.sort(key=lambda x: x["virality_score"], reverse=True)
+    return clips
+
+
+def _fmt_time(seconds: float) -> str:
+    """Format seconds as MM:SS."""
+    m, s = divmod(int(seconds), 60)
+    return f"{m:02d}:{s:02d}"
--- a/backend/app/services/clip_extraction.py
+++ b/backend/app/services/clip_extraction.py
@ -0,0 +1,117 @@
+"""Clip extraction service using FFmpeg."""
+
+import asyncio
+import logging
+import os
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+async def extract_clip(
+    video_path: str,
+    start_time: float,
+    end_time: float,
+    output_path: str,
+) -> str:
+    """Extract a clip from video using FFmpeg stream copy (instant, no re-encode).
+
+    Args:
+        video_path: path to source video
+        start_time: clip start in seconds
+        end_time: clip end in seconds
+        output_path: where to write the clip
+
+    Returns:
+        output_path
+    """
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+    duration = end_time - start_time
+
+    # Use stream copy for speed - seek before input for accuracy
+    cmd = [
+        "ffmpeg",
+        "-ss", str(start_time),
+        "-i", video_path,
+        "-t", str(duration),
+        "-c", "copy",
+        "-avoid_negative_ts", "make_zero",
+        "-y",
+        output_path,
+    ]
+
+    logger.info(
+        f"Extracting clip: {start_time:.1f}s - {end_time:.1f}s -> {output_path}"
+    )
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        raise RuntimeError(f"FFmpeg clip extraction failed: {stderr.decode()}")
+
+    size_mb = os.path.getsize(output_path) / (1024 * 1024)
+    logger.info(f"Extracted clip: {output_path} ({size_mb:.1f} MB)")
+    return output_path
+
+
+async def extract_thumbnail(
+    video_path: str,
+    timestamp: float,
+    output_path: str,
+) -> str:
+    """Extract a single frame as thumbnail."""
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+    cmd = [
+        "ffmpeg",
+        "-ss", str(timestamp),
+        "-i", video_path,
+        "-vframes", "1",
+        "-q:v", "2",
+        "-y",
+        output_path,
+    ]
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        raise RuntimeError(f"FFmpeg thumbnail extraction failed: {stderr.decode()}")
+
+    return output_path
+
+
+async def get_video_duration(video_path: str) -> float:
+    """Get video duration in seconds using ffprobe."""
+    cmd = [
+        "ffprobe",
+        "-v", "quiet",
+        "-print_format", "json",
+        "-show_format",
+        video_path,
+    ]
+
+    proc = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    stdout, _ = await proc.communicate()
+
+    if proc.returncode != 0:
+        return 0.0
+
+    import json
+    data = json.loads(stdout.decode())
+    return float(data.get("format", {}).get("duration", 0))
--- a/backend/app/services/download.py
+++ b/backend/app/services/download.py
@ -0,0 +1,117 @@
+"""Video download service using yt-dlp."""
+
+import os
+import re
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+import yt_dlp
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+COOKIES_FILE = settings.ytdlp_cookies_file
+
+
+@dataclass
+class VideoInfo:
+    title: str
+    duration: float
+    video_path: str
+    video_id: str
+
+
+def extract_video_id(url: str) -> Optional[str]:
+    patterns = [
+        r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
+        r"youtube\.com/shorts/([a-zA-Z0-9_-]{11})",
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url)
+        if match:
+            return match.group(1)
+    return None
+
+
+def _base_opts() -> dict:
+    opts = {"quiet": True, "no_warnings": True}
+    if COOKIES_FILE and os.path.exists(COOKIES_FILE):
+        opts["cookiefile"] = COOKIES_FILE
+    return opts
+
+
+async def get_video_metadata(url: str) -> dict:
+    """Get video metadata without downloading."""
+    opts = _base_opts()
+    opts["extract_flat"] = False
+
+    with yt_dlp.YoutubeDL(opts) as ydl:
+        info = ydl.extract_info(url, download=False)
+        return {
+            "title": info.get("title", "Unknown"),
+            "duration": info.get("duration", 0),
+            "video_id": info.get("id", ""),
+        }
+
+
+async def download_video(url: str, output_dir: str) -> VideoInfo:
+    """Download video from YouTube URL. Downloads video+audio for clip extraction."""
+    os.makedirs(output_dir, exist_ok=True)
+
+    video_id = extract_video_id(url) or "video"
+    output_template = os.path.join(output_dir, f"{video_id}.%(ext)s")
+
+    opts = _base_opts()
+    opts.update({
+        # Download best video+audio merged to mp4
+        "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
+        "merge_output_format": "mp4",
+        "outtmpl": output_template,
+    })
+
+    with yt_dlp.YoutubeDL(opts) as ydl:
+        info = ydl.extract_info(url, download=True)
+
+        video_path = os.path.join(output_dir, f"{video_id}.mp4")
+        if not os.path.exists(video_path):
+            # Find whatever file was downloaded
+            for f in os.listdir(output_dir):
+                if f.startswith(video_id) and not f.endswith(".part"):
+                    video_path = os.path.join(output_dir, f)
+                    break
+
+        duration = info.get("duration", 0)
+        if duration > settings.max_video_duration:
+            raise ValueError(
+                f"Video is {duration}s, max is {settings.max_video_duration}s"
+            )
+
+        logger.info(f"Downloaded: {info.get('title')} ({duration}s) -> {video_path}")
+        return VideoInfo(
+            title=info.get("title", "Unknown"),
+            duration=duration,
+            video_path=video_path,
+            video_id=video_id,
+        )
+
+
+async def extract_audio(video_path: str, output_path: str) -> str:
+    """Extract audio from video file for transcription."""
+    import asyncio
+
+    proc = await asyncio.create_subprocess_exec(
+        "ffmpeg", "-i", video_path,
+        "-vn", "-acodec", "libmp3lame", "-q:a", "4",
+        "-y", output_path,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    _, stderr = await proc.communicate()
+
+    if proc.returncode != 0:
+        raise RuntimeError(f"FFmpeg audio extraction failed: {stderr.decode()}")
+
+    logger.info(f"Extracted audio: {output_path}")
+    return output_path
--- a/backend/app/services/transcription.py
+++ b/backend/app/services/transcription.py
@ -0,0 +1,82 @@
+"""Transcription service using self-hosted faster-whisper-server."""
+
+import logging
+import os
+
+import httpx
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+async def transcribe(audio_path: str) -> dict:
+    """Transcribe audio file using local Whisper API.
+
+    Returns dict with:
+        - text: full transcript text
+        - words: list of {word, start, end} with word-level timestamps
+        - language: detected language
+        - duration: audio duration
+    """
+    url = f"{settings.whisper_api_url}/v1/audio/transcriptions"
+
+    async with httpx.AsyncClient(timeout=900.0) as client:
+        with open(audio_path, "rb") as f:
+            files = {"file": (os.path.basename(audio_path), f, "audio/mpeg")}
+            data = {
+                "model": settings.whisper_model,
+                "response_format": "verbose_json",
+                "timestamp_granularities[]": "word",
+            }
+
+            logger.info(f"Transcribing {audio_path} via {settings.whisper_api_url}")
+            response = await client.post(url, files=files, data=data)
+            response.raise_for_status()
+            result = response.json()
+
+    text = result.get("text", "").strip()
+    words = result.get("words", [])
+    segments = result.get("segments", [])
+
+    # Build word-level timestamps
+    word_timestamps = []
+    if words:
+        for w in words:
+            word_timestamps.append({
+                "word": w.get("word", ""),
+                "start": w.get("start", 0.0),
+                "end": w.get("end", 0.0),
+            })
+    elif segments:
+        # Fall back to segment-level if word-level not available
+        for seg in segments:
+            for w in seg.get("words", []):
+                word_timestamps.append({
+                    "word": w.get("word", ""),
+                    "start": w.get("start", 0.0),
+                    "end": w.get("end", 0.0),
+                })
+
+    logger.info(
+        f"Transcription complete: {len(text)} chars, "
+        f"{len(word_timestamps)} word timestamps"
+    )
+
+    return {
+        "text": text,
+        "words": word_timestamps,
+        "segments": segments,
+        "language": result.get("language", "en"),
+        "duration": result.get("duration", 0.0),
+    }
+
+
+def get_transcript_segment(words: list[dict], start: float, end: float) -> str:
+    """Extract transcript text for a given time range."""
+    segment_words = [
+        w["word"]
+        for w in words
+        if w["start"] >= start - 0.5 and w["end"] <= end + 0.5
+    ]
+    return " ".join(segment_words).strip()
--- a/backend/app/worker.py
+++ b/backend/app/worker.py
@ -0,0 +1,33 @@
+"""ARQ worker entry point."""
+
+import logging
+from urllib.parse import urlparse
+
+from arq import cron
+from arq.connections import RedisSettings
+
+from app.config import settings
+from app.workers.tasks import process_job, render_clip
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+
+
+def _redis_settings() -> RedisSettings:
+    parsed = urlparse(settings.redis_url)
+    return RedisSettings(
+        host=parsed.hostname or "redis",
+        port=parsed.port or 6379,
+        database=int(parsed.path.lstrip("/") or "0"),
+    )
+
+
+class WorkerSettings:
+    functions = [process_job, render_clip]
+    redis_settings = _redis_settings()
+    max_jobs = settings.max_concurrent_jobs
+    job_timeout = 3600  # 1 hour max per job
+    keep_result = 3600
+    health_check_interval = 30
--- a/backend/app/workers/init.py
+++ b/backend/app/workers/init.py
--- a/backend/app/workers/tasks.py
+++ b/backend/app/workers/tasks.py
@ -0,0 +1,295 @@
+"""Pipeline orchestration tasks for ARQ worker."""
+
+import json
+import logging
+import os
+import uuid
+
+import redis.asyncio as aioredis
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
+
+from app.config import settings
+from app.models import Job, Clip
+from app.services import download, transcription, ai_analysis, clip_extraction
+
+logger = logging.getLogger(__name__)
+
+
+async def _get_session() -> AsyncSession:
+    engine = create_async_engine(settings.database_url, echo=False)
+    session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+    return session_factory()
+
+
+async def _publish_progress(
+    redis: aioredis.Redis,
+    job_id: str,
+    status: str,
+    progress: float,
+    stage_message: str,
+):
+    """Publish progress update via Redis pub/sub."""
+    data = {
+        "status": status,
+        "progress": round(progress, 2),
+        "stage_message": stage_message,
+    }
+    await redis.publish(f"job:{job_id}:progress", json.dumps(data))
+
+
+async def _update_job(
+    db: AsyncSession,
+    job: Job,
+    status: str,
+    progress: float,
+    stage_message: str,
+    **kwargs,
+):
+    """Update job in database."""
+    job.status = status
+    job.progress = progress
+    job.stage_message = stage_message
+    for k, v in kwargs.items():
+        setattr(job, k, v)
+    await db.commit()
+
+
+async def process_job(ctx: dict, job_id: str):
+    """Main pipeline: download → transcribe → AI analysis → extract clips."""
+    r = ctx.get("redis") or aioredis.from_url(settings.redis_url)
+    db = await _get_session()
+
+    try:
+        job = await db.get(Job, uuid.UUID(job_id))
+        if not job:
+            logger.error(f"Job {job_id} not found")
+            return
+
+        logger.info(f"Processing job {job_id}: {job.source_type}")
+
+        # === STAGE 1: DOWNLOAD ===
+        await _update_job(db, job, "downloading", 0.05, "Downloading video...")
+        await _publish_progress(r, job_id, "downloading", 0.05, "Downloading video...")
+
+        job_media_dir = os.path.join(settings.media_dir, job_id)
+        os.makedirs(job_media_dir, exist_ok=True)
+
+        if job.source_type == "youtube":
+            video_info = await download.download_video(job.source_url, job_media_dir)
+            job.title = video_info.title
+            job.duration = video_info.duration
+            job.media_path = video_info.video_path
+        elif job.media_path:
+            # Uploaded file - get duration
+            duration = await clip_extraction.get_video_duration(job.media_path)
+            job.duration = duration
+            if not job.title:
+                job.title = job.source_filename or "Uploaded Video"
+        else:
+            raise ValueError("No video source available")
+
+        await db.commit()
+        await _publish_progress(
+            r, job_id, "downloading", 0.20,
+            f"Downloaded: {job.title} ({job.duration:.0f}s)"
+        )
+
+        # === STAGE 2: TRANSCRIBE ===
+        await _update_job(
+            db, job, "transcribing", 0.25,
+            "Extracting audio and transcribing..."
+        )
+        await _publish_progress(
+            r, job_id, "transcribing", 0.25,
+            "Extracting audio and transcribing..."
+        )
+
+        # Extract audio for transcription
+        audio_path = os.path.join(job_media_dir, "audio.mp3")
+        await download.extract_audio(job.media_path, audio_path)
+
+        await _publish_progress(
+            r, job_id, "transcribing", 0.30,
+            "Transcribing with Whisper..."
+        )
+
+        transcript = await transcription.transcribe(audio_path)
+        job.transcript = transcript
+        await db.commit()
+
+        word_count = len(transcript.get("words", []))
+        await _publish_progress(
+            r, job_id, "transcribing", 0.50,
+            f"Transcription complete: {word_count} words"
+        )
+
+        # === STAGE 3: AI ANALYSIS ===
+        await _update_job(
+            db, job, "analyzing", 0.55,
+            "AI analyzing transcript for viral clips..."
+        )
+        await _publish_progress(
+            r, job_id, "analyzing", 0.55,
+            "AI analyzing transcript for viral clips..."
+        )
+
+        clips_data = await ai_analysis.analyze_transcript(
+            transcript=transcript,
+            video_title=job.title or "",
+            video_duration=job.duration or 0,
+        )
+
+        if not clips_data:
+            raise ValueError("AI analysis returned no clips")
+
+        await _publish_progress(
+            r, job_id, "analyzing", 0.70,
+            f"Found {len(clips_data)} potential clips"
+        )
+
+        # === STAGE 4: EXTRACT CLIPS ===
+        await _update_job(
+            db, job, "extracting", 0.75,
+            f"Extracting {len(clips_data)} clips..."
+        )
+        await _publish_progress(
+            r, job_id, "extracting", 0.75,
+            f"Extracting {len(clips_data)} clips..."
+        )
+
+        clips_dir = os.path.join(settings.clips_dir, job_id)
+        os.makedirs(clips_dir, exist_ok=True)
+
+        for i, cd in enumerate(clips_data):
+            clip_filename = f"clip_{i:02d}.mp4"
+            clip_path = os.path.join(clips_dir, clip_filename)
+            thumb_path = os.path.join(clips_dir, f"thumb_{i:02d}.jpg")
+
+            # Extract the clip
+            await clip_extraction.extract_clip(
+                video_path=job.media_path,
+                start_time=cd["start_time"],
+                end_time=cd["end_time"],
+                output_path=clip_path,
+            )
+
+            # Extract thumbnail at 25% into the clip
+            thumb_time = cd["start_time"] + (cd["end_time"] - cd["start_time"]) * 0.25
+            try:
+                await clip_extraction.extract_thumbnail(
+                    video_path=job.media_path,
+                    timestamp=thumb_time,
+                    output_path=thumb_path,
+                )
+            except Exception:
+                thumb_path = None
+
+            # Get transcript segment for this clip
+            segment_text = transcription.get_transcript_segment(
+                transcript.get("words", []),
+                cd["start_time"],
+                cd["end_time"],
+            )
+
+            # Save clip to database
+            clip = Clip(
+                job_id=job.id,
+                title=cd["title"],
+                start_time=cd["start_time"],
+                end_time=cd["end_time"],
+                virality_score=cd["virality_score"],
+                category=cd["category"],
+                reasoning=cd["reasoning"],
+                transcript_segment=segment_text,
+                thumbnail_path=thumb_path,
+                raw_clip_path=clip_path,
+            )
+            db.add(clip)
+
+            progress = 0.75 + (0.20 * (i + 1) / len(clips_data))
+            await _publish_progress(
+                r, job_id, "extracting", progress,
+                f"Extracted clip {i + 1}/{len(clips_data)}: {cd['title']}"
+            )
+
+        await db.commit()
+
+        # === COMPLETE ===
+        await _update_job(
+            db, job, "complete", 1.0,
+            f"Done! {len(clips_data)} clips extracted"
+        )
+        await _publish_progress(
+            r, job_id, "complete", 1.0,
+            f"Done! {len(clips_data)} clips extracted"
+        )
+
+        # Clean up audio file
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
+
+        logger.info(f"Job {job_id} complete: {len(clips_data)} clips")
+
+    except Exception as e:
+        logger.exception(f"Job {job_id} failed: {e}")
+        try:
+            await _update_job(
+                db, job, "failed", job.progress,
+                str(e), error_message=str(e),
+            )
+            await _publish_progress(
+                r, job_id, "failed", job.progress, f"Error: {e}"
+            )
+        except Exception:
+            pass
+    finally:
+        await db.close()
+
+
+async def render_clip(ctx: dict, render_id: str):
+    """Render a clip with subtitles and aspect ratio conversion.
+    (Phase 3 - stub for now, copies raw clip)"""
+    from app.models import RenderRequest
+
+    db = await _get_session()
+    try:
+        render = await db.get(RenderRequest, uuid.UUID(render_id))
+        if not render:
+            return
+
+        render.status = "rendering"
+        render.progress = 0.5
+        await db.commit()
+
+        clip = await db.get(Clip, render.clip_id)
+        if not clip or not clip.raw_clip_path:
+            render.status = "failed"
+            render.error_message = "Clip not found or not extracted"
+            await db.commit()
+            return
+
+        # Phase 1: just copy the raw clip as-is
+        # Phase 3 will add subtitle rendering + aspect ratio conversion
+        import shutil
+        renders_dir = os.path.join(settings.renders_dir, str(render.clip_id))
+        os.makedirs(renders_dir, exist_ok=True)
+        output = os.path.join(
+            renders_dir,
+            f"render_{render.aspect_ratio.replace(':', 'x')}.mp4"
+        )
+        shutil.copy2(clip.raw_clip_path, output)
+
+        render.output_path = output
+        render.status = "complete"
+        render.progress = 1.0
+        await db.commit()
+
+        logger.info(f"Render {render_id} complete: {output}")
+    except Exception as e:
+        logger.exception(f"Render {render_id} failed: {e}")
+        render.status = "failed"
+        render.error_message = str(e)
+        await db.commit()
+    finally:
+        await db.close()
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -0,0 +1,13 @@
+fastapi==0.115.6
+uvicorn[standard]==0.34.0
+sqlalchemy[asyncio]==2.0.36
+asyncpg==0.30.0
+pydantic==2.10.3
+pydantic-settings==2.7.0
+arq==0.26.1
+redis==5.2.1
+httpx==0.28.1
+yt-dlp==2024.12.23
+sse-starlette==2.2.1
+python-multipart==0.0.20
+aiofiles==24.1.0
--- a/database/init.sql
+++ b/database/init.sql
@ -0,0 +1,113 @@
+-- ClipForge Database Schema
+
+CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
+
+-- Job status enum
+CREATE TYPE job_status AS ENUM (
+    'pending',
+    'downloading',
+    'transcribing',
+    'analyzing',
+    'extracting',
+    'complete',
+    'failed'
+);
+
+-- Source type enum
+CREATE TYPE source_type AS ENUM (
+    'youtube',
+    'upload'
+);
+
+-- Aspect ratio enum
+CREATE TYPE aspect_ratio AS ENUM (
+    '16:9',
+    '9:16',
+    '1:1',
+    '4:5'
+);
+
+-- Render status enum
+CREATE TYPE render_status AS ENUM (
+    'pending',
+    'rendering',
+    'complete',
+    'failed'
+);
+
+-- Jobs table
+CREATE TABLE jobs (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    source_type source_type NOT NULL,
+    source_url TEXT,
+    source_filename TEXT,
+    title TEXT,
+    duration FLOAT,
+    status job_status NOT NULL DEFAULT 'pending',
+    progress FLOAT NOT NULL DEFAULT 0.0,
+    stage_message TEXT,
+    error_message TEXT,
+    media_path TEXT,
+    transcript JSONB,
+    scene_boundaries JSONB,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+
+-- Clips table
+CREATE TABLE clips (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    job_id UUID NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
+    title TEXT NOT NULL,
+    start_time FLOAT NOT NULL,
+    end_time FLOAT NOT NULL,
+    duration FLOAT GENERATED ALWAYS AS (end_time - start_time) STORED,
+    virality_score FLOAT NOT NULL DEFAULT 0.0,
+    category TEXT,
+    reasoning TEXT,
+    transcript_segment TEXT,
+    thumbnail_path TEXT,
+    raw_clip_path TEXT,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+
+-- Render requests table
+CREATE TABLE render_requests (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    clip_id UUID NOT NULL REFERENCES clips(id) ON DELETE CASCADE,
+    aspect_ratio aspect_ratio NOT NULL DEFAULT '9:16',
+    subtitle_style TEXT NOT NULL DEFAULT 'tiktok',
+    status render_status NOT NULL DEFAULT 'pending',
+    progress FLOAT NOT NULL DEFAULT 0.0,
+    output_path TEXT,
+    error_message TEXT,
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+
+-- Indexes
+CREATE INDEX idx_jobs_status ON jobs(status);
+CREATE INDEX idx_jobs_created_at ON jobs(created_at DESC);
+CREATE INDEX idx_clips_job_id ON clips(job_id);
+CREATE INDEX idx_clips_virality ON clips(virality_score DESC);
+CREATE INDEX idx_renders_clip_id ON render_requests(clip_id);
+CREATE INDEX idx_renders_status ON render_requests(status);
+
+-- Updated_at trigger
+CREATE OR REPLACE FUNCTION update_updated_at()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = NOW();
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER jobs_updated_at
+    BEFORE UPDATE ON jobs
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at();
+
+CREATE TRIGGER renders_updated_at
+    BEFORE UPDATE ON render_requests
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at();
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,91 @@
+services:
+  postgres:
+    image: postgres:15-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: ${POSTGRES_USER:-clipforge}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme_clipforge_2025}
+      POSTGRES_DB: ${POSTGRES_DB:-clipforge}
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+      - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-clipforge}"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  redis:
+    image: redis:7-alpine
+    restart: unless-stopped
+    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
+    volumes:
+      - redis_data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  backend:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile
+    restart: unless-stopped
+    env_file: .env
+    environment:
+      - DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge}
+      - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
+    volumes:
+      - media_data:/data
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    labels:
+      - "traefik.enable=true"
+      - "traefik.http.routers.clipforge.rule=Host(`clip.jeffemmett.com`)"
+      - "traefik.http.services.clipforge.loadbalancer.server.port=8000"
+    networks:
+      - default
+      - traefik-public
+
+  worker:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile
+    restart: unless-stopped
+    command: ["python", "-m", "app.worker"]
+    env_file: .env
+    environment:
+      - DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge}
+      - REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
+    volumes:
+      - media_data:/data
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  frontend:
+    build:
+      context: ./frontend
+      dockerfile: Dockerfile
+    restart: unless-stopped
+    labels:
+      - "traefik.enable=false"
+    networks:
+      - default
+
+volumes:
+  postgres_data:
+  redis_data:
+  media_data:
+
+networks:
+  traefik-public:
+    external: true
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@ -0,0 +1,6 @@
+FROM nginx:alpine
+
+# Placeholder frontend - Phase 4 will replace with React build
+RUN echo '<!DOCTYPE html><html><head><title>ClipForge</title><style>body{font-family:system-ui;display:flex;justify-content:center;align-items:center;min-height:100vh;margin:0;background:#0a0a0a;color:#fff}main{text-align:center;max-width:600px;padding:2rem}.logo{font-size:3rem;margin-bottom:1rem}h1{margin:0 0 0.5rem}p{color:#888;margin:0 0 2rem}.status{background:#111;border:1px solid #333;border-radius:8px;padding:1.5rem;text-align:left}code{color:#0f0}</style></head><body><main><div class="logo">&#9997;&#65039;</div><h1>ClipForge</h1><p>Self-hosted AI video clipper</p><div class="status"><p><strong>API:</strong> <code>POST /api/jobs</code></p><p><strong>Status:</strong> <code>GET /api/jobs/{id}</code></p><p><strong>Clips:</strong> <code>GET /api/jobs/{id}/clips</code></p><p style="margin-top:1rem;color:#888">Frontend coming in Phase 4</p></div></main></body></html>' > /usr/share/nginx/html/index.html
+
+EXPOSE 80