feat: ClipForge Phase 1 - core pipeline MVP
Self-hosted AI video clipper (Opus Clip alternative). Pipeline: YouTube URL -> yt-dlp download -> Whisper transcription -> Ollama AI clip selection -> FFmpeg extraction. - FastAPI backend with PostgreSQL + Redis + ARQ worker - 7-stage processing pipeline with SSE progress tracking - Services: download (yt-dlp), transcription (whisper.jeffemmett.com), AI analysis (Ollama), clip extraction (FFmpeg stream copy) - API: create jobs, track progress, list clips, render, download - Docker Compose with Traefik labels for clip.jeffemmett.com Cost: $0/video using existing infrastructure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
6aa8a676ec
|
|
@ -0,0 +1,33 @@
|
||||||
|
# ClipForge Environment Configuration
|
||||||
|
|
||||||
|
# Database
|
||||||
|
POSTGRES_USER=clipforge
|
||||||
|
POSTGRES_PASSWORD=changeme_clipforge_2025
|
||||||
|
POSTGRES_DB=clipforge
|
||||||
|
DATABASE_URL=postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
REDIS_URL=redis://redis:6379/0
|
||||||
|
|
||||||
|
# Whisper (self-hosted)
|
||||||
|
WHISPER_API_URL=https://whisper.jeffemmett.com
|
||||||
|
WHISPER_MODEL=deepdml/faster-whisper-large-v3-turbo-ct2
|
||||||
|
|
||||||
|
# Ollama (local)
|
||||||
|
OLLAMA_URL=http://host.docker.internal:11434
|
||||||
|
OLLAMA_MODEL=llama3.1:8b
|
||||||
|
|
||||||
|
# Storage paths (inside container)
|
||||||
|
MEDIA_DIR=/data/media
|
||||||
|
CLIPS_DIR=/data/clips
|
||||||
|
RENDERS_DIR=/data/renders
|
||||||
|
|
||||||
|
# yt-dlp
|
||||||
|
YTDLP_COOKIES_FILE=
|
||||||
|
MAX_VIDEO_DURATION=7200
|
||||||
|
|
||||||
|
# Processing
|
||||||
|
MAX_CONCURRENT_JOBS=2
|
||||||
|
CLIP_MIN_DURATION=15
|
||||||
|
CLIP_MAX_DURATION=90
|
||||||
|
TARGET_CLIPS=5
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
.env
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
.venv/
|
||||||
|
node_modules/
|
||||||
|
dist/
|
||||||
|
data/
|
||||||
|
*.egg-info/
|
||||||
|
.DS_Store
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
ffmpeg \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Default: run API server
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.database import get_db
|
||||||
|
from app.models import Clip
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/clips/{clip_id}")
|
||||||
|
async def get_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
clip = await db.get(Clip, clip_id)
|
||||||
|
if not clip:
|
||||||
|
raise HTTPException(404, "Clip not found")
|
||||||
|
clip.duration = clip.end_time - clip.start_time
|
||||||
|
return clip
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/clips/{clip_id}/preview")
|
||||||
|
async def preview_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
clip = await db.get(Clip, clip_id)
|
||||||
|
if not clip:
|
||||||
|
raise HTTPException(404, "Clip not found")
|
||||||
|
if not clip.raw_clip_path:
|
||||||
|
raise HTTPException(404, "Clip not yet extracted")
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
clip.raw_clip_path,
|
||||||
|
media_type="video/mp4",
|
||||||
|
filename=f"{clip.title}.mp4",
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,167 @@
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from arq import create_pool
|
||||||
|
from arq.connections import RedisSettings
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
from sse_starlette.sse import EventSourceResponse
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.database import get_db
|
||||||
|
from app.models import Job, Clip
|
||||||
|
from app.schemas import JobCreate, JobResponse, ClipResponse
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def _redis_settings() -> RedisSettings:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(settings.redis_url)
|
||||||
|
return RedisSettings(
|
||||||
|
host=parsed.hostname or "redis",
|
||||||
|
port=parsed.port or 6379,
|
||||||
|
database=int(parsed.path.lstrip("/") or "0"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/jobs", response_model=JobResponse, status_code=201)
|
||||||
|
async def create_job(job_in: JobCreate, db: AsyncSession = Depends(get_db)):
|
||||||
|
if job_in.source_type == "youtube" and not job_in.source_url:
|
||||||
|
raise HTTPException(400, "source_url required for youtube source")
|
||||||
|
|
||||||
|
job = Job(
|
||||||
|
source_type=job_in.source_type,
|
||||||
|
source_url=job_in.source_url,
|
||||||
|
status="pending",
|
||||||
|
)
|
||||||
|
db.add(job)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(job)
|
||||||
|
|
||||||
|
# Enqueue processing
|
||||||
|
pool = await create_pool(_redis_settings())
|
||||||
|
await pool.enqueue_job("process_job", str(job.id))
|
||||||
|
await pool.close()
|
||||||
|
|
||||||
|
return job
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/jobs/upload", response_model=JobResponse, status_code=201)
|
||||||
|
async def create_job_upload(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
import os
|
||||||
|
import aiofiles
|
||||||
|
|
||||||
|
os.makedirs(settings.media_dir, exist_ok=True)
|
||||||
|
safe_name = file.filename.replace("/", "_").replace("..", "_")
|
||||||
|
dest = os.path.join(settings.media_dir, f"upload_{safe_name}")
|
||||||
|
|
||||||
|
async with aiofiles.open(dest, "wb") as f:
|
||||||
|
while chunk := await file.read(1024 * 1024):
|
||||||
|
await f.write(chunk)
|
||||||
|
|
||||||
|
job = Job(
|
||||||
|
source_type="upload",
|
||||||
|
source_filename=safe_name,
|
||||||
|
media_path=dest,
|
||||||
|
status="pending",
|
||||||
|
)
|
||||||
|
db.add(job)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(job)
|
||||||
|
|
||||||
|
pool = await create_pool(_redis_settings())
|
||||||
|
await pool.enqueue_job("process_job", str(job.id))
|
||||||
|
await pool.close()
|
||||||
|
|
||||||
|
return job
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/jobs", response_model=list[JobResponse])
|
||||||
|
async def list_jobs(
|
||||||
|
limit: int = 20,
|
||||||
|
offset: int = 0,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
result = await db.execute(
|
||||||
|
select(Job).order_by(Job.created_at.desc()).offset(offset).limit(limit)
|
||||||
|
)
|
||||||
|
return result.scalars().all()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/jobs/{job_id}", response_model=JobResponse)
|
||||||
|
async def get_job(job_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
job = await db.get(Job, job_id)
|
||||||
|
if not job:
|
||||||
|
raise HTTPException(404, "Job not found")
|
||||||
|
return job
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/jobs/{job_id}/clips", response_model=list[ClipResponse])
|
||||||
|
async def get_job_clips(job_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
job = await db.get(Job, job_id)
|
||||||
|
if not job:
|
||||||
|
raise HTTPException(404, "Job not found")
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(Clip)
|
||||||
|
.where(Clip.job_id == job_id)
|
||||||
|
.order_by(Clip.virality_score.desc())
|
||||||
|
)
|
||||||
|
clips = result.scalars().all()
|
||||||
|
# Compute duration manually since it's a generated column
|
||||||
|
for clip in clips:
|
||||||
|
clip.duration = clip.end_time - clip.start_time
|
||||||
|
return clips
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/jobs/{job_id}/progress")
|
||||||
|
async def job_progress_sse(job_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
job = await db.get(Job, job_id)
|
||||||
|
if not job:
|
||||||
|
raise HTTPException(404, "Job not found")
|
||||||
|
|
||||||
|
async def event_stream():
|
||||||
|
import redis.asyncio as aioredis
|
||||||
|
|
||||||
|
r = aioredis.from_url(settings.redis_url)
|
||||||
|
pubsub = r.pubsub()
|
||||||
|
await pubsub.subscribe(f"job:{job_id}:progress")
|
||||||
|
|
||||||
|
# Send current state immediately
|
||||||
|
await db.refresh(job)
|
||||||
|
yield {
|
||||||
|
"event": "progress",
|
||||||
|
"data": json.dumps({
|
||||||
|
"status": job.status,
|
||||||
|
"progress": job.progress,
|
||||||
|
"stage_message": job.stage_message,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.status in ("complete", "failed"):
|
||||||
|
await pubsub.unsubscribe()
|
||||||
|
await r.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
msg = await pubsub.get_message(
|
||||||
|
ignore_subscribe_messages=True, timeout=1.0
|
||||||
|
)
|
||||||
|
if msg and msg["type"] == "message":
|
||||||
|
data = json.loads(msg["data"])
|
||||||
|
yield {"event": "progress", "data": json.dumps(data)}
|
||||||
|
if data.get("status") in ("complete", "failed"):
|
||||||
|
break
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
finally:
|
||||||
|
await pubsub.unsubscribe()
|
||||||
|
await r.close()
|
||||||
|
|
||||||
|
return EventSourceResponse(event_stream())
|
||||||
|
|
@ -0,0 +1,111 @@
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from arq import create_pool
|
||||||
|
from arq.connections import RedisSettings
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.database import get_db
|
||||||
|
from app.models import Clip, RenderRequest
|
||||||
|
from app.schemas import RenderCreate, RenderResponse, BulkRenderCreate
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
def _redis_settings() -> RedisSettings:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(settings.redis_url)
|
||||||
|
return RedisSettings(
|
||||||
|
host=parsed.hostname or "redis",
|
||||||
|
port=parsed.port or 6379,
|
||||||
|
database=int(parsed.path.lstrip("/") or "0"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/clips/{clip_id}/render", response_model=RenderResponse, status_code=201)
|
||||||
|
async def render_clip(
|
||||||
|
clip_id: UUID,
|
||||||
|
render_in: RenderCreate,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
clip = await db.get(Clip, clip_id)
|
||||||
|
if not clip:
|
||||||
|
raise HTTPException(404, "Clip not found")
|
||||||
|
if not clip.raw_clip_path:
|
||||||
|
raise HTTPException(400, "Clip not yet extracted")
|
||||||
|
|
||||||
|
render = RenderRequest(
|
||||||
|
clip_id=clip_id,
|
||||||
|
aspect_ratio=render_in.aspect_ratio,
|
||||||
|
subtitle_style=render_in.subtitle_style,
|
||||||
|
status="pending",
|
||||||
|
)
|
||||||
|
db.add(render)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(render)
|
||||||
|
|
||||||
|
pool = await create_pool(_redis_settings())
|
||||||
|
await pool.enqueue_job("render_clip", str(render.id))
|
||||||
|
await pool.close()
|
||||||
|
|
||||||
|
return render
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/jobs/{job_id}/render-all", response_model=list[RenderResponse], status_code=201)
|
||||||
|
async def render_all_clips(
|
||||||
|
job_id: UUID,
|
||||||
|
bulk_in: BulkRenderCreate,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
renders = []
|
||||||
|
pool = await create_pool(_redis_settings())
|
||||||
|
|
||||||
|
for clip_id in bulk_in.clip_ids:
|
||||||
|
clip = await db.get(Clip, clip_id)
|
||||||
|
if not clip or not clip.raw_clip_path:
|
||||||
|
continue
|
||||||
|
|
||||||
|
render = RenderRequest(
|
||||||
|
clip_id=clip_id,
|
||||||
|
aspect_ratio=bulk_in.aspect_ratio,
|
||||||
|
subtitle_style=bulk_in.subtitle_style,
|
||||||
|
status="pending",
|
||||||
|
)
|
||||||
|
db.add(render)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(render)
|
||||||
|
|
||||||
|
await pool.enqueue_job("render_clip", str(render.id))
|
||||||
|
renders.append(render)
|
||||||
|
|
||||||
|
await pool.close()
|
||||||
|
return renders
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/renders/{render_id}", response_model=RenderResponse)
|
||||||
|
async def get_render(render_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
render = await db.get(RenderRequest, render_id)
|
||||||
|
if not render:
|
||||||
|
raise HTTPException(404, "Render not found")
|
||||||
|
return render
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/renders/{render_id}/download")
|
||||||
|
async def download_render(render_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||||
|
render = await db.get(RenderRequest, render_id)
|
||||||
|
if not render:
|
||||||
|
raise HTTPException(404, "Render not found")
|
||||||
|
if render.status != "complete" or not render.output_path:
|
||||||
|
raise HTTPException(400, "Render not complete")
|
||||||
|
|
||||||
|
clip = await db.get(Clip, render.clip_id)
|
||||||
|
filename = f"{clip.title}_{render.aspect_ratio.replace(':', 'x')}.mp4" if clip else "clip.mp4"
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
render.output_path,
|
||||||
|
media_type="video/mp4",
|
||||||
|
filename=filename,
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
# Database
|
||||||
|
database_url: str = "postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge"
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
redis_url: str = "redis://redis:6379/0"
|
||||||
|
|
||||||
|
# Whisper
|
||||||
|
whisper_api_url: str = "https://whisper.jeffemmett.com"
|
||||||
|
whisper_model: str = "deepdml/faster-whisper-large-v3-turbo-ct2"
|
||||||
|
|
||||||
|
# Ollama
|
||||||
|
ollama_url: str = "http://host.docker.internal:11434"
|
||||||
|
ollama_model: str = "llama3.1:8b"
|
||||||
|
|
||||||
|
# Storage
|
||||||
|
media_dir: str = "/data/media"
|
||||||
|
clips_dir: str = "/data/clips"
|
||||||
|
renders_dir: str = "/data/renders"
|
||||||
|
|
||||||
|
# yt-dlp
|
||||||
|
ytdlp_cookies_file: str = ""
|
||||||
|
max_video_duration: int = 7200
|
||||||
|
|
||||||
|
# Processing
|
||||||
|
max_concurrent_jobs: int = 2
|
||||||
|
clip_min_duration: int = 15
|
||||||
|
clip_max_duration: int = 90
|
||||||
|
target_clips: int = 5
|
||||||
|
|
||||||
|
model_config = {"env_file": ".env", "extra": "ignore"}
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
engine = create_async_engine(settings.database_url, echo=False)
|
||||||
|
async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_db() -> AsyncSession:
|
||||||
|
async with async_session() as session:
|
||||||
|
yield session
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
from app.api.routes import jobs, clips, renders
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="ClipForge",
|
||||||
|
description="Self-hosted AI video clipper",
|
||||||
|
version="0.1.0",
|
||||||
|
lifespan=lifespan,
|
||||||
|
)
|
||||||
|
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
app.include_router(jobs.router, prefix="/api")
|
||||||
|
app.include_router(clips.router, prefix="/api")
|
||||||
|
app.include_router(renders.router, prefix="/api")
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
return {"status": "ok", "service": "clipforge"}
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
Column,
|
||||||
|
DateTime,
|
||||||
|
Enum,
|
||||||
|
Float,
|
||||||
|
ForeignKey,
|
||||||
|
Index,
|
||||||
|
String,
|
||||||
|
Text,
|
||||||
|
func,
|
||||||
|
)
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||||
|
from sqlalchemy.orm import DeclarativeBase, relationship
|
||||||
|
|
||||||
|
|
||||||
|
class Base(DeclarativeBase):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Job(Base):
|
||||||
|
__tablename__ = "jobs"
|
||||||
|
|
||||||
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||||
|
source_type = Column(Enum("youtube", "upload", name="source_type"), nullable=False)
|
||||||
|
source_url = Column(Text)
|
||||||
|
source_filename = Column(Text)
|
||||||
|
title = Column(Text)
|
||||||
|
duration = Column(Float)
|
||||||
|
status = Column(
|
||||||
|
Enum(
|
||||||
|
"pending",
|
||||||
|
"downloading",
|
||||||
|
"transcribing",
|
||||||
|
"analyzing",
|
||||||
|
"extracting",
|
||||||
|
"complete",
|
||||||
|
"failed",
|
||||||
|
name="job_status",
|
||||||
|
),
|
||||||
|
nullable=False,
|
||||||
|
default="pending",
|
||||||
|
)
|
||||||
|
progress = Column(Float, nullable=False, default=0.0)
|
||||||
|
stage_message = Column(Text)
|
||||||
|
error_message = Column(Text)
|
||||||
|
media_path = Column(Text)
|
||||||
|
transcript = Column(JSONB)
|
||||||
|
scene_boundaries = Column(JSONB)
|
||||||
|
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
updated_at = Column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
clips = relationship("Clip", back_populates="job", cascade="all, delete-orphan")
|
||||||
|
|
||||||
|
|
||||||
|
class Clip(Base):
|
||||||
|
__tablename__ = "clips"
|
||||||
|
|
||||||
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||||
|
job_id = Column(
|
||||||
|
UUID(as_uuid=True), ForeignKey("jobs.id", ondelete="CASCADE"), nullable=False
|
||||||
|
)
|
||||||
|
title = Column(Text, nullable=False)
|
||||||
|
start_time = Column(Float, nullable=False)
|
||||||
|
end_time = Column(Float, nullable=False)
|
||||||
|
virality_score = Column(Float, nullable=False, default=0.0)
|
||||||
|
category = Column(Text)
|
||||||
|
reasoning = Column(Text)
|
||||||
|
transcript_segment = Column(Text)
|
||||||
|
thumbnail_path = Column(Text)
|
||||||
|
raw_clip_path = Column(Text)
|
||||||
|
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
|
||||||
|
job = relationship("Job", back_populates="clips")
|
||||||
|
renders = relationship(
|
||||||
|
"RenderRequest", back_populates="clip", cascade="all, delete-orphan"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RenderRequest(Base):
|
||||||
|
__tablename__ = "render_requests"
|
||||||
|
|
||||||
|
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||||
|
clip_id = Column(
|
||||||
|
UUID(as_uuid=True), ForeignKey("clips.id", ondelete="CASCADE"), nullable=False
|
||||||
|
)
|
||||||
|
aspect_ratio = Column(
|
||||||
|
Enum("16:9", "9:16", "1:1", "4:5", name="aspect_ratio"),
|
||||||
|
nullable=False,
|
||||||
|
default="9:16",
|
||||||
|
)
|
||||||
|
subtitle_style = Column(String, nullable=False, default="tiktok")
|
||||||
|
status = Column(
|
||||||
|
Enum("pending", "rendering", "complete", "failed", name="render_status"),
|
||||||
|
nullable=False,
|
||||||
|
default="pending",
|
||||||
|
)
|
||||||
|
progress = Column(Float, nullable=False, default=0.0)
|
||||||
|
output_path = Column(Text)
|
||||||
|
error_message = Column(Text)
|
||||||
|
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
updated_at = Column(
|
||||||
|
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
|
||||||
|
)
|
||||||
|
|
||||||
|
clip = relationship("Clip", back_populates="renders")
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
# --- Job Schemas ---
|
||||||
|
|
||||||
|
class JobCreate(BaseModel):
|
||||||
|
source_type: str = Field(..., pattern="^(youtube|upload)$")
|
||||||
|
source_url: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class JobResponse(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
source_type: str
|
||||||
|
source_url: Optional[str]
|
||||||
|
source_filename: Optional[str]
|
||||||
|
title: Optional[str]
|
||||||
|
duration: Optional[float]
|
||||||
|
status: str
|
||||||
|
progress: float
|
||||||
|
stage_message: Optional[str]
|
||||||
|
error_message: Optional[str]
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
model_config = {"from_attributes": True}
|
||||||
|
|
||||||
|
|
||||||
|
class JobProgress(BaseModel):
|
||||||
|
status: str
|
||||||
|
progress: float
|
||||||
|
stage_message: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
|
# --- Clip Schemas ---
|
||||||
|
|
||||||
|
class ClipResponse(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
job_id: UUID
|
||||||
|
title: str
|
||||||
|
start_time: float
|
||||||
|
end_time: float
|
||||||
|
duration: Optional[float] = None
|
||||||
|
virality_score: float
|
||||||
|
category: Optional[str]
|
||||||
|
reasoning: Optional[str]
|
||||||
|
transcript_segment: Optional[str]
|
||||||
|
thumbnail_path: Optional[str]
|
||||||
|
raw_clip_path: Optional[str]
|
||||||
|
created_at: datetime
|
||||||
|
|
||||||
|
model_config = {"from_attributes": True}
|
||||||
|
|
||||||
|
|
||||||
|
# --- Render Schemas ---
|
||||||
|
|
||||||
|
class RenderCreate(BaseModel):
|
||||||
|
aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$")
|
||||||
|
subtitle_style: str = Field(default="tiktok")
|
||||||
|
|
||||||
|
|
||||||
|
class RenderResponse(BaseModel):
|
||||||
|
id: UUID
|
||||||
|
clip_id: UUID
|
||||||
|
aspect_ratio: str
|
||||||
|
subtitle_style: str
|
||||||
|
status: str
|
||||||
|
progress: float
|
||||||
|
output_path: Optional[str]
|
||||||
|
error_message: Optional[str]
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
model_config = {"from_attributes": True}
|
||||||
|
|
||||||
|
|
||||||
|
class BulkRenderCreate(BaseModel):
|
||||||
|
clip_ids: list[UUID]
|
||||||
|
aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$")
|
||||||
|
subtitle_style: str = Field(default="tiktok")
|
||||||
|
|
@ -0,0 +1,169 @@
|
||||||
|
"""AI clip analysis using Ollama (local LLM)."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
SYSTEM_PROMPT = """You are a viral video clip analyst. Given a video transcript with timestamps, identify the best short clips that would perform well on social media (TikTok, YouTube Shorts, Instagram Reels).
|
||||||
|
|
||||||
|
For each clip, provide:
|
||||||
|
- A catchy title (max 60 chars)
|
||||||
|
- Start and end timestamps (in seconds)
|
||||||
|
- Virality score (0-100)
|
||||||
|
- Category (one of: hook, story, insight, humor, emotional, controversial, educational)
|
||||||
|
- Brief reasoning for why this clip would go viral
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Clips should be {min_dur}-{max_dur} seconds long
|
||||||
|
- Identify {target} clips, ranked by virality potential
|
||||||
|
- Clips should start and end at natural sentence boundaries
|
||||||
|
- Prefer clips with strong hooks in the first 3 seconds
|
||||||
|
- Look for emotional peaks, surprising statements, quotable moments
|
||||||
|
- Avoid clips that start mid-sentence or end abruptly
|
||||||
|
|
||||||
|
Respond ONLY with valid JSON in this exact format:
|
||||||
|
{{
|
||||||
|
"clips": [
|
||||||
|
{{
|
||||||
|
"title": "Clip title here",
|
||||||
|
"start_time": 12.5,
|
||||||
|
"end_time": 45.2,
|
||||||
|
"virality_score": 85,
|
||||||
|
"category": "hook",
|
||||||
|
"reasoning": "Why this clip would perform well"
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
|
async def analyze_transcript(
|
||||||
|
transcript: dict,
|
||||||
|
video_title: str = "",
|
||||||
|
video_duration: float = 0,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Use Ollama to identify the best clips from a transcript.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
transcript: dict with 'text', 'words', 'segments' from transcription service
|
||||||
|
video_title: original video title for context
|
||||||
|
video_duration: total video duration in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of clip dicts with title, start_time, end_time, virality_score, category, reasoning
|
||||||
|
"""
|
||||||
|
# Build timestamped transcript for the LLM
|
||||||
|
text = transcript.get("text", "")
|
||||||
|
segments = transcript.get("segments", [])
|
||||||
|
|
||||||
|
if segments:
|
||||||
|
timestamped = "\n".join(
|
||||||
|
f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
|
||||||
|
f"{s.get('text', '').strip()}"
|
||||||
|
for s in segments
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fall back to plain text with rough time estimates
|
||||||
|
timestamped = text
|
||||||
|
|
||||||
|
system = SYSTEM_PROMPT.format(
|
||||||
|
min_dur=settings.clip_min_duration,
|
||||||
|
max_dur=settings.clip_max_duration,
|
||||||
|
target=settings.target_clips,
|
||||||
|
)
|
||||||
|
|
||||||
|
user_prompt = f"""Video Title: {video_title}
|
||||||
|
Video Duration: {_fmt_time(video_duration)}
|
||||||
|
|
||||||
|
Transcript:
|
||||||
|
{timestamped}
|
||||||
|
|
||||||
|
Identify the {settings.target_clips} best viral clips from this transcript."""
|
||||||
|
|
||||||
|
logger.info(f"Sending transcript to Ollama ({settings.ollama_model})...")
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||||
|
response = await client.post(
|
||||||
|
f"{settings.ollama_url}/api/chat",
|
||||||
|
json={
|
||||||
|
"model": settings.ollama_model,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": system},
|
||||||
|
{"role": "user", "content": user_prompt},
|
||||||
|
],
|
||||||
|
"stream": False,
|
||||||
|
"options": {
|
||||||
|
"temperature": 0.3,
|
||||||
|
"num_predict": 4096,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
content = result.get("message", {}).get("content", "")
|
||||||
|
clips = _parse_clips(content, video_duration)
|
||||||
|
|
||||||
|
logger.info(f"AI identified {len(clips)} clips")
|
||||||
|
return clips
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_clips(content: str, video_duration: float) -> list[dict]:
|
||||||
|
"""Parse LLM response into clip list, handling imperfect JSON."""
|
||||||
|
# Try to extract JSON from response
|
||||||
|
json_match = re.search(r"\{[\s\S]*\}", content)
|
||||||
|
if not json_match:
|
||||||
|
logger.error(f"No JSON found in LLM response: {content[:200]}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(json_match.group())
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Try to fix common JSON issues
|
||||||
|
fixed = json_match.group()
|
||||||
|
fixed = re.sub(r",\s*}", "}", fixed)
|
||||||
|
fixed = re.sub(r",\s*]", "]", fixed)
|
||||||
|
try:
|
||||||
|
data = json.loads(fixed)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"Failed to parse LLM JSON: {content[:200]}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
raw_clips = data.get("clips", [])
|
||||||
|
clips = []
|
||||||
|
|
||||||
|
for c in raw_clips:
|
||||||
|
start = float(c.get("start_time", 0))
|
||||||
|
end = float(c.get("end_time", 0))
|
||||||
|
|
||||||
|
# Validate
|
||||||
|
if end <= start:
|
||||||
|
continue
|
||||||
|
if start < 0:
|
||||||
|
start = 0
|
||||||
|
if end > video_duration and video_duration > 0:
|
||||||
|
end = video_duration
|
||||||
|
|
||||||
|
clips.append({
|
||||||
|
"title": str(c.get("title", "Untitled"))[:100],
|
||||||
|
"start_time": round(start, 2),
|
||||||
|
"end_time": round(end, 2),
|
||||||
|
"virality_score": max(0, min(100, float(c.get("virality_score", 50)))),
|
||||||
|
"category": str(c.get("category", "general")),
|
||||||
|
"reasoning": str(c.get("reasoning", "")),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by virality score descending
|
||||||
|
clips.sort(key=lambda x: x["virality_score"], reverse=True)
|
||||||
|
return clips
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_time(seconds: float) -> str:
|
||||||
|
"""Format seconds as MM:SS."""
|
||||||
|
m, s = divmod(int(seconds), 60)
|
||||||
|
return f"{m:02d}:{s:02d}"
|
||||||
|
|
@ -0,0 +1,117 @@
|
||||||
|
"""Clip extraction service using FFmpeg."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_clip(
|
||||||
|
video_path: str,
|
||||||
|
start_time: float,
|
||||||
|
end_time: float,
|
||||||
|
output_path: str,
|
||||||
|
) -> str:
|
||||||
|
"""Extract a clip from video using FFmpeg stream copy (instant, no re-encode).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_path: path to source video
|
||||||
|
start_time: clip start in seconds
|
||||||
|
end_time: clip end in seconds
|
||||||
|
output_path: where to write the clip
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
output_path
|
||||||
|
"""
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
|
||||||
|
duration = end_time - start_time
|
||||||
|
|
||||||
|
# Use stream copy for speed - seek before input for accuracy
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-ss", str(start_time),
|
||||||
|
"-i", video_path,
|
||||||
|
"-t", str(duration),
|
||||||
|
"-c", "copy",
|
||||||
|
"-avoid_negative_ts", "make_zero",
|
||||||
|
"-y",
|
||||||
|
output_path,
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Extracting clip: {start_time:.1f}s - {end_time:.1f}s -> {output_path}"
|
||||||
|
)
|
||||||
|
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
_, stderr = await proc.communicate()
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError(f"FFmpeg clip extraction failed: {stderr.decode()}")
|
||||||
|
|
||||||
|
size_mb = os.path.getsize(output_path) / (1024 * 1024)
|
||||||
|
logger.info(f"Extracted clip: {output_path} ({size_mb:.1f} MB)")
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_thumbnail(
|
||||||
|
video_path: str,
|
||||||
|
timestamp: float,
|
||||||
|
output_path: str,
|
||||||
|
) -> str:
|
||||||
|
"""Extract a single frame as thumbnail."""
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg",
|
||||||
|
"-ss", str(timestamp),
|
||||||
|
"-i", video_path,
|
||||||
|
"-vframes", "1",
|
||||||
|
"-q:v", "2",
|
||||||
|
"-y",
|
||||||
|
output_path,
|
||||||
|
]
|
||||||
|
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
_, stderr = await proc.communicate()
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError(f"FFmpeg thumbnail extraction failed: {stderr.decode()}")
|
||||||
|
|
||||||
|
return output_path
|
||||||
|
|
||||||
|
|
||||||
|
async def get_video_duration(video_path: str) -> float:
|
||||||
|
"""Get video duration in seconds using ffprobe."""
|
||||||
|
cmd = [
|
||||||
|
"ffprobe",
|
||||||
|
"-v", "quiet",
|
||||||
|
"-print_format", "json",
|
||||||
|
"-show_format",
|
||||||
|
video_path,
|
||||||
|
]
|
||||||
|
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
stdout, _ = await proc.communicate()
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
import json
|
||||||
|
data = json.loads(stdout.decode())
|
||||||
|
return float(data.get("format", {}).get("duration", 0))
|
||||||
|
|
@ -0,0 +1,117 @@
|
||||||
|
"""Video download service using yt-dlp."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import yt_dlp
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
COOKIES_FILE = settings.ytdlp_cookies_file
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VideoInfo:
|
||||||
|
title: str
|
||||||
|
duration: float
|
||||||
|
video_path: str
|
||||||
|
video_id: str
|
||||||
|
|
||||||
|
|
||||||
|
def extract_video_id(url: str) -> Optional[str]:
|
||||||
|
patterns = [
|
||||||
|
r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})",
|
||||||
|
r"youtube\.com/shorts/([a-zA-Z0-9_-]{11})",
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, url)
|
||||||
|
if match:
|
||||||
|
return match.group(1)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _base_opts() -> dict:
|
||||||
|
opts = {"quiet": True, "no_warnings": True}
|
||||||
|
if COOKIES_FILE and os.path.exists(COOKIES_FILE):
|
||||||
|
opts["cookiefile"] = COOKIES_FILE
|
||||||
|
return opts
|
||||||
|
|
||||||
|
|
||||||
|
async def get_video_metadata(url: str) -> dict:
|
||||||
|
"""Get video metadata without downloading."""
|
||||||
|
opts = _base_opts()
|
||||||
|
opts["extract_flat"] = False
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(opts) as ydl:
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
return {
|
||||||
|
"title": info.get("title", "Unknown"),
|
||||||
|
"duration": info.get("duration", 0),
|
||||||
|
"video_id": info.get("id", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def download_video(url: str, output_dir: str) -> VideoInfo:
|
||||||
|
"""Download video from YouTube URL. Downloads video+audio for clip extraction."""
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
video_id = extract_video_id(url) or "video"
|
||||||
|
output_template = os.path.join(output_dir, f"{video_id}.%(ext)s")
|
||||||
|
|
||||||
|
opts = _base_opts()
|
||||||
|
opts.update({
|
||||||
|
# Download best video+audio merged to mp4
|
||||||
|
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
|
||||||
|
"merge_output_format": "mp4",
|
||||||
|
"outtmpl": output_template,
|
||||||
|
})
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(opts) as ydl:
|
||||||
|
info = ydl.extract_info(url, download=True)
|
||||||
|
|
||||||
|
video_path = os.path.join(output_dir, f"{video_id}.mp4")
|
||||||
|
if not os.path.exists(video_path):
|
||||||
|
# Find whatever file was downloaded
|
||||||
|
for f in os.listdir(output_dir):
|
||||||
|
if f.startswith(video_id) and not f.endswith(".part"):
|
||||||
|
video_path = os.path.join(output_dir, f)
|
||||||
|
break
|
||||||
|
|
||||||
|
duration = info.get("duration", 0)
|
||||||
|
if duration > settings.max_video_duration:
|
||||||
|
raise ValueError(
|
||||||
|
f"Video is {duration}s, max is {settings.max_video_duration}s"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Downloaded: {info.get('title')} ({duration}s) -> {video_path}")
|
||||||
|
return VideoInfo(
|
||||||
|
title=info.get("title", "Unknown"),
|
||||||
|
duration=duration,
|
||||||
|
video_path=video_path,
|
||||||
|
video_id=video_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_audio(video_path: str, output_path: str) -> str:
|
||||||
|
"""Extract audio from video file for transcription."""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"ffmpeg", "-i", video_path,
|
||||||
|
"-vn", "-acodec", "libmp3lame", "-q:a", "4",
|
||||||
|
"-y", output_path,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
_, stderr = await proc.communicate()
|
||||||
|
|
||||||
|
if proc.returncode != 0:
|
||||||
|
raise RuntimeError(f"FFmpeg audio extraction failed: {stderr.decode()}")
|
||||||
|
|
||||||
|
logger.info(f"Extracted audio: {output_path}")
|
||||||
|
return output_path
|
||||||
|
|
@ -0,0 +1,82 @@
|
||||||
|
"""Transcription service using self-hosted faster-whisper-server."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def transcribe(audio_path: str) -> dict:
|
||||||
|
"""Transcribe audio file using local Whisper API.
|
||||||
|
|
||||||
|
Returns dict with:
|
||||||
|
- text: full transcript text
|
||||||
|
- words: list of {word, start, end} with word-level timestamps
|
||||||
|
- language: detected language
|
||||||
|
- duration: audio duration
|
||||||
|
"""
|
||||||
|
url = f"{settings.whisper_api_url}/v1/audio/transcriptions"
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=900.0) as client:
|
||||||
|
with open(audio_path, "rb") as f:
|
||||||
|
files = {"file": (os.path.basename(audio_path), f, "audio/mpeg")}
|
||||||
|
data = {
|
||||||
|
"model": settings.whisper_model,
|
||||||
|
"response_format": "verbose_json",
|
||||||
|
"timestamp_granularities[]": "word",
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Transcribing {audio_path} via {settings.whisper_api_url}")
|
||||||
|
response = await client.post(url, files=files, data=data)
|
||||||
|
response.raise_for_status()
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
text = result.get("text", "").strip()
|
||||||
|
words = result.get("words", [])
|
||||||
|
segments = result.get("segments", [])
|
||||||
|
|
||||||
|
# Build word-level timestamps
|
||||||
|
word_timestamps = []
|
||||||
|
if words:
|
||||||
|
for w in words:
|
||||||
|
word_timestamps.append({
|
||||||
|
"word": w.get("word", ""),
|
||||||
|
"start": w.get("start", 0.0),
|
||||||
|
"end": w.get("end", 0.0),
|
||||||
|
})
|
||||||
|
elif segments:
|
||||||
|
# Fall back to segment-level if word-level not available
|
||||||
|
for seg in segments:
|
||||||
|
for w in seg.get("words", []):
|
||||||
|
word_timestamps.append({
|
||||||
|
"word": w.get("word", ""),
|
||||||
|
"start": w.get("start", 0.0),
|
||||||
|
"end": w.get("end", 0.0),
|
||||||
|
})
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Transcription complete: {len(text)} chars, "
|
||||||
|
f"{len(word_timestamps)} word timestamps"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"text": text,
|
||||||
|
"words": word_timestamps,
|
||||||
|
"segments": segments,
|
||||||
|
"language": result.get("language", "en"),
|
||||||
|
"duration": result.get("duration", 0.0),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_transcript_segment(words: list[dict], start: float, end: float) -> str:
|
||||||
|
"""Extract transcript text for a given time range."""
|
||||||
|
segment_words = [
|
||||||
|
w["word"]
|
||||||
|
for w in words
|
||||||
|
if w["start"] >= start - 0.5 and w["end"] <= end + 0.5
|
||||||
|
]
|
||||||
|
return " ".join(segment_words).strip()
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
"""ARQ worker entry point."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from arq import cron
|
||||||
|
from arq.connections import RedisSettings
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.workers.tasks import process_job, render_clip
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _redis_settings() -> RedisSettings:
|
||||||
|
parsed = urlparse(settings.redis_url)
|
||||||
|
return RedisSettings(
|
||||||
|
host=parsed.hostname or "redis",
|
||||||
|
port=parsed.port or 6379,
|
||||||
|
database=int(parsed.path.lstrip("/") or "0"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WorkerSettings:
|
||||||
|
functions = [process_job, render_clip]
|
||||||
|
redis_settings = _redis_settings()
|
||||||
|
max_jobs = settings.max_concurrent_jobs
|
||||||
|
job_timeout = 3600 # 1 hour max per job
|
||||||
|
keep_result = 3600
|
||||||
|
health_check_interval = 30
|
||||||
|
|
@ -0,0 +1,295 @@
|
||||||
|
"""Pipeline orchestration tasks for ARQ worker."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import redis.asyncio as aioredis
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.models import Job, Clip
|
||||||
|
from app.services import download, transcription, ai_analysis, clip_extraction
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_session() -> AsyncSession:
|
||||||
|
engine = create_async_engine(settings.database_url, echo=False)
|
||||||
|
session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||||
|
return session_factory()
|
||||||
|
|
||||||
|
|
||||||
|
async def _publish_progress(
|
||||||
|
redis: aioredis.Redis,
|
||||||
|
job_id: str,
|
||||||
|
status: str,
|
||||||
|
progress: float,
|
||||||
|
stage_message: str,
|
||||||
|
):
|
||||||
|
"""Publish progress update via Redis pub/sub."""
|
||||||
|
data = {
|
||||||
|
"status": status,
|
||||||
|
"progress": round(progress, 2),
|
||||||
|
"stage_message": stage_message,
|
||||||
|
}
|
||||||
|
await redis.publish(f"job:{job_id}:progress", json.dumps(data))
|
||||||
|
|
||||||
|
|
||||||
|
async def _update_job(
|
||||||
|
db: AsyncSession,
|
||||||
|
job: Job,
|
||||||
|
status: str,
|
||||||
|
progress: float,
|
||||||
|
stage_message: str,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
"""Update job in database."""
|
||||||
|
job.status = status
|
||||||
|
job.progress = progress
|
||||||
|
job.stage_message = stage_message
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
setattr(job, k, v)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def process_job(ctx: dict, job_id: str):
|
||||||
|
"""Main pipeline: download → transcribe → AI analysis → extract clips."""
|
||||||
|
r = ctx.get("redis") or aioredis.from_url(settings.redis_url)
|
||||||
|
db = await _get_session()
|
||||||
|
|
||||||
|
try:
|
||||||
|
job = await db.get(Job, uuid.UUID(job_id))
|
||||||
|
if not job:
|
||||||
|
logger.error(f"Job {job_id} not found")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"Processing job {job_id}: {job.source_type}")
|
||||||
|
|
||||||
|
# === STAGE 1: DOWNLOAD ===
|
||||||
|
await _update_job(db, job, "downloading", 0.05, "Downloading video...")
|
||||||
|
await _publish_progress(r, job_id, "downloading", 0.05, "Downloading video...")
|
||||||
|
|
||||||
|
job_media_dir = os.path.join(settings.media_dir, job_id)
|
||||||
|
os.makedirs(job_media_dir, exist_ok=True)
|
||||||
|
|
||||||
|
if job.source_type == "youtube":
|
||||||
|
video_info = await download.download_video(job.source_url, job_media_dir)
|
||||||
|
job.title = video_info.title
|
||||||
|
job.duration = video_info.duration
|
||||||
|
job.media_path = video_info.video_path
|
||||||
|
elif job.media_path:
|
||||||
|
# Uploaded file - get duration
|
||||||
|
duration = await clip_extraction.get_video_duration(job.media_path)
|
||||||
|
job.duration = duration
|
||||||
|
if not job.title:
|
||||||
|
job.title = job.source_filename or "Uploaded Video"
|
||||||
|
else:
|
||||||
|
raise ValueError("No video source available")
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "downloading", 0.20,
|
||||||
|
f"Downloaded: {job.title} ({job.duration:.0f}s)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# === STAGE 2: TRANSCRIBE ===
|
||||||
|
await _update_job(
|
||||||
|
db, job, "transcribing", 0.25,
|
||||||
|
"Extracting audio and transcribing..."
|
||||||
|
)
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "transcribing", 0.25,
|
||||||
|
"Extracting audio and transcribing..."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract audio for transcription
|
||||||
|
audio_path = os.path.join(job_media_dir, "audio.mp3")
|
||||||
|
await download.extract_audio(job.media_path, audio_path)
|
||||||
|
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "transcribing", 0.30,
|
||||||
|
"Transcribing with Whisper..."
|
||||||
|
)
|
||||||
|
|
||||||
|
transcript = await transcription.transcribe(audio_path)
|
||||||
|
job.transcript = transcript
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
word_count = len(transcript.get("words", []))
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "transcribing", 0.50,
|
||||||
|
f"Transcription complete: {word_count} words"
|
||||||
|
)
|
||||||
|
|
||||||
|
# === STAGE 3: AI ANALYSIS ===
|
||||||
|
await _update_job(
|
||||||
|
db, job, "analyzing", 0.55,
|
||||||
|
"AI analyzing transcript for viral clips..."
|
||||||
|
)
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "analyzing", 0.55,
|
||||||
|
"AI analyzing transcript for viral clips..."
|
||||||
|
)
|
||||||
|
|
||||||
|
clips_data = await ai_analysis.analyze_transcript(
|
||||||
|
transcript=transcript,
|
||||||
|
video_title=job.title or "",
|
||||||
|
video_duration=job.duration or 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not clips_data:
|
||||||
|
raise ValueError("AI analysis returned no clips")
|
||||||
|
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "analyzing", 0.70,
|
||||||
|
f"Found {len(clips_data)} potential clips"
|
||||||
|
)
|
||||||
|
|
||||||
|
# === STAGE 4: EXTRACT CLIPS ===
|
||||||
|
await _update_job(
|
||||||
|
db, job, "extracting", 0.75,
|
||||||
|
f"Extracting {len(clips_data)} clips..."
|
||||||
|
)
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "extracting", 0.75,
|
||||||
|
f"Extracting {len(clips_data)} clips..."
|
||||||
|
)
|
||||||
|
|
||||||
|
clips_dir = os.path.join(settings.clips_dir, job_id)
|
||||||
|
os.makedirs(clips_dir, exist_ok=True)
|
||||||
|
|
||||||
|
for i, cd in enumerate(clips_data):
|
||||||
|
clip_filename = f"clip_{i:02d}.mp4"
|
||||||
|
clip_path = os.path.join(clips_dir, clip_filename)
|
||||||
|
thumb_path = os.path.join(clips_dir, f"thumb_{i:02d}.jpg")
|
||||||
|
|
||||||
|
# Extract the clip
|
||||||
|
await clip_extraction.extract_clip(
|
||||||
|
video_path=job.media_path,
|
||||||
|
start_time=cd["start_time"],
|
||||||
|
end_time=cd["end_time"],
|
||||||
|
output_path=clip_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract thumbnail at 25% into the clip
|
||||||
|
thumb_time = cd["start_time"] + (cd["end_time"] - cd["start_time"]) * 0.25
|
||||||
|
try:
|
||||||
|
await clip_extraction.extract_thumbnail(
|
||||||
|
video_path=job.media_path,
|
||||||
|
timestamp=thumb_time,
|
||||||
|
output_path=thumb_path,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
thumb_path = None
|
||||||
|
|
||||||
|
# Get transcript segment for this clip
|
||||||
|
segment_text = transcription.get_transcript_segment(
|
||||||
|
transcript.get("words", []),
|
||||||
|
cd["start_time"],
|
||||||
|
cd["end_time"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save clip to database
|
||||||
|
clip = Clip(
|
||||||
|
job_id=job.id,
|
||||||
|
title=cd["title"],
|
||||||
|
start_time=cd["start_time"],
|
||||||
|
end_time=cd["end_time"],
|
||||||
|
virality_score=cd["virality_score"],
|
||||||
|
category=cd["category"],
|
||||||
|
reasoning=cd["reasoning"],
|
||||||
|
transcript_segment=segment_text,
|
||||||
|
thumbnail_path=thumb_path,
|
||||||
|
raw_clip_path=clip_path,
|
||||||
|
)
|
||||||
|
db.add(clip)
|
||||||
|
|
||||||
|
progress = 0.75 + (0.20 * (i + 1) / len(clips_data))
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "extracting", progress,
|
||||||
|
f"Extracted clip {i + 1}/{len(clips_data)}: {cd['title']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
# === COMPLETE ===
|
||||||
|
await _update_job(
|
||||||
|
db, job, "complete", 1.0,
|
||||||
|
f"Done! {len(clips_data)} clips extracted"
|
||||||
|
)
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "complete", 1.0,
|
||||||
|
f"Done! {len(clips_data)} clips extracted"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Clean up audio file
|
||||||
|
if os.path.exists(audio_path):
|
||||||
|
os.remove(audio_path)
|
||||||
|
|
||||||
|
logger.info(f"Job {job_id} complete: {len(clips_data)} clips")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Job {job_id} failed: {e}")
|
||||||
|
try:
|
||||||
|
await _update_job(
|
||||||
|
db, job, "failed", job.progress,
|
||||||
|
str(e), error_message=str(e),
|
||||||
|
)
|
||||||
|
await _publish_progress(
|
||||||
|
r, job_id, "failed", job.progress, f"Error: {e}"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
||||||
|
|
||||||
|
async def render_clip(ctx: dict, render_id: str):
|
||||||
|
"""Render a clip with subtitles and aspect ratio conversion.
|
||||||
|
(Phase 3 - stub for now, copies raw clip)"""
|
||||||
|
from app.models import RenderRequest
|
||||||
|
|
||||||
|
db = await _get_session()
|
||||||
|
try:
|
||||||
|
render = await db.get(RenderRequest, uuid.UUID(render_id))
|
||||||
|
if not render:
|
||||||
|
return
|
||||||
|
|
||||||
|
render.status = "rendering"
|
||||||
|
render.progress = 0.5
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
clip = await db.get(Clip, render.clip_id)
|
||||||
|
if not clip or not clip.raw_clip_path:
|
||||||
|
render.status = "failed"
|
||||||
|
render.error_message = "Clip not found or not extracted"
|
||||||
|
await db.commit()
|
||||||
|
return
|
||||||
|
|
||||||
|
# Phase 1: just copy the raw clip as-is
|
||||||
|
# Phase 3 will add subtitle rendering + aspect ratio conversion
|
||||||
|
import shutil
|
||||||
|
renders_dir = os.path.join(settings.renders_dir, str(render.clip_id))
|
||||||
|
os.makedirs(renders_dir, exist_ok=True)
|
||||||
|
output = os.path.join(
|
||||||
|
renders_dir,
|
||||||
|
f"render_{render.aspect_ratio.replace(':', 'x')}.mp4"
|
||||||
|
)
|
||||||
|
shutil.copy2(clip.raw_clip_path, output)
|
||||||
|
|
||||||
|
render.output_path = output
|
||||||
|
render.status = "complete"
|
||||||
|
render.progress = 1.0
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
logger.info(f"Render {render_id} complete: {output}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Render {render_id} failed: {e}")
|
||||||
|
render.status = "failed"
|
||||||
|
render.error_message = str(e)
|
||||||
|
await db.commit()
|
||||||
|
finally:
|
||||||
|
await db.close()
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
fastapi==0.115.6
|
||||||
|
uvicorn[standard]==0.34.0
|
||||||
|
sqlalchemy[asyncio]==2.0.36
|
||||||
|
asyncpg==0.30.0
|
||||||
|
pydantic==2.10.3
|
||||||
|
pydantic-settings==2.7.0
|
||||||
|
arq==0.26.1
|
||||||
|
redis==5.2.1
|
||||||
|
httpx==0.28.1
|
||||||
|
yt-dlp==2024.12.23
|
||||||
|
sse-starlette==2.2.1
|
||||||
|
python-multipart==0.0.20
|
||||||
|
aiofiles==24.1.0
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
-- ClipForge Database Schema
|
||||||
|
|
||||||
|
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||||
|
|
||||||
|
-- Job status enum
|
||||||
|
CREATE TYPE job_status AS ENUM (
|
||||||
|
'pending',
|
||||||
|
'downloading',
|
||||||
|
'transcribing',
|
||||||
|
'analyzing',
|
||||||
|
'extracting',
|
||||||
|
'complete',
|
||||||
|
'failed'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Source type enum
|
||||||
|
CREATE TYPE source_type AS ENUM (
|
||||||
|
'youtube',
|
||||||
|
'upload'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Aspect ratio enum
|
||||||
|
CREATE TYPE aspect_ratio AS ENUM (
|
||||||
|
'16:9',
|
||||||
|
'9:16',
|
||||||
|
'1:1',
|
||||||
|
'4:5'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Render status enum
|
||||||
|
CREATE TYPE render_status AS ENUM (
|
||||||
|
'pending',
|
||||||
|
'rendering',
|
||||||
|
'complete',
|
||||||
|
'failed'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Jobs table
|
||||||
|
CREATE TABLE jobs (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
source_type source_type NOT NULL,
|
||||||
|
source_url TEXT,
|
||||||
|
source_filename TEXT,
|
||||||
|
title TEXT,
|
||||||
|
duration FLOAT,
|
||||||
|
status job_status NOT NULL DEFAULT 'pending',
|
||||||
|
progress FLOAT NOT NULL DEFAULT 0.0,
|
||||||
|
stage_message TEXT,
|
||||||
|
error_message TEXT,
|
||||||
|
media_path TEXT,
|
||||||
|
transcript JSONB,
|
||||||
|
scene_boundaries JSONB,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Clips table
|
||||||
|
CREATE TABLE clips (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
job_id UUID NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
start_time FLOAT NOT NULL,
|
||||||
|
end_time FLOAT NOT NULL,
|
||||||
|
duration FLOAT GENERATED ALWAYS AS (end_time - start_time) STORED,
|
||||||
|
virality_score FLOAT NOT NULL DEFAULT 0.0,
|
||||||
|
category TEXT,
|
||||||
|
reasoning TEXT,
|
||||||
|
transcript_segment TEXT,
|
||||||
|
thumbnail_path TEXT,
|
||||||
|
raw_clip_path TEXT,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Render requests table
|
||||||
|
CREATE TABLE render_requests (
|
||||||
|
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
||||||
|
clip_id UUID NOT NULL REFERENCES clips(id) ON DELETE CASCADE,
|
||||||
|
aspect_ratio aspect_ratio NOT NULL DEFAULT '9:16',
|
||||||
|
subtitle_style TEXT NOT NULL DEFAULT 'tiktok',
|
||||||
|
status render_status NOT NULL DEFAULT 'pending',
|
||||||
|
progress FLOAT NOT NULL DEFAULT 0.0,
|
||||||
|
output_path TEXT,
|
||||||
|
error_message TEXT,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes
|
||||||
|
CREATE INDEX idx_jobs_status ON jobs(status);
|
||||||
|
CREATE INDEX idx_jobs_created_at ON jobs(created_at DESC);
|
||||||
|
CREATE INDEX idx_clips_job_id ON clips(job_id);
|
||||||
|
CREATE INDEX idx_clips_virality ON clips(virality_score DESC);
|
||||||
|
CREATE INDEX idx_renders_clip_id ON render_requests(clip_id);
|
||||||
|
CREATE INDEX idx_renders_status ON render_requests(status);
|
||||||
|
|
||||||
|
-- Updated_at trigger
|
||||||
|
CREATE OR REPLACE FUNCTION update_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TRIGGER jobs_updated_at
|
||||||
|
BEFORE UPDATE ON jobs
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_updated_at();
|
||||||
|
|
||||||
|
CREATE TRIGGER renders_updated_at
|
||||||
|
BEFORE UPDATE ON render_requests
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_updated_at();
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:15-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-clipforge}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme_clipforge_2025}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-clipforge}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
- ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-clipforge}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
|
||||||
|
volumes:
|
||||||
|
- redis_data:/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
backend:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
env_file: .env
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge}
|
||||||
|
- REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
|
||||||
|
volumes:
|
||||||
|
- media_data:/data
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.routers.clipforge.rule=Host(`clip.jeffemmett.com`)"
|
||||||
|
- "traefik.http.services.clipforge.loadbalancer.server.port=8000"
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- traefik-public
|
||||||
|
|
||||||
|
worker:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
command: ["python", "-m", "app.worker"]
|
||||||
|
env_file: .env
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=${DATABASE_URL:-postgresql+asyncpg://clipforge:changeme_clipforge_2025@postgres:5432/clipforge}
|
||||||
|
- REDIS_URL=${REDIS_URL:-redis://redis:6379/0}
|
||||||
|
volumes:
|
||||||
|
- media_data:/data
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
build:
|
||||||
|
context: ./frontend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=false"
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
redis_data:
|
||||||
|
media_data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
traefik-public:
|
||||||
|
external: true
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
FROM nginx:alpine
|
||||||
|
|
||||||
|
# Placeholder frontend - Phase 4 will replace with React build
|
||||||
|
RUN echo '<!DOCTYPE html><html><head><title>ClipForge</title><style>body{font-family:system-ui;display:flex;justify-content:center;align-items:center;min-height:100vh;margin:0;background:#0a0a0a;color:#fff}main{text-align:center;max-width:600px;padding:2rem}.logo{font-size:3rem;margin-bottom:1rem}h1{margin:0 0 0.5rem}p{color:#888;margin:0 0 2rem}.status{background:#111;border:1px solid #333;border-radius:8px;padding:1.5rem;text-align:left}code{color:#0f0}</style></head><body><main><div class="logo">✍️</div><h1>ClipForge</h1><p>Self-hosted AI video clipper</p><div class="status"><p><strong>API:</strong> <code>POST /api/jobs</code></p><p><strong>Status:</strong> <code>GET /api/jobs/{id}</code></p><p><strong>Clips:</strong> <code>GET /api/jobs/{id}/clips</code></p><p style="margin-top:1rem;color:#888">Frontend coming in Phase 4</p></div></main></body></html>' > /usr/share/nginx/html/index.html
|
||||||
|
|
||||||
|
EXPOSE 80
|
||||||
Loading…
Reference in New Issue