diff --git a/backend/Dockerfile b/backend/Dockerfile index 6810a6c..ac1a713 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -2,7 +2,7 @@ FROM python:3.12-slim # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ - ffmpeg curl unzip \ + ffmpeg curl unzip fonts-liberation fonts-dejavu-core \ && rm -rf /var/lib/apt/lists/* # Install deno (required by yt-dlp for YouTube JS extraction) diff --git a/backend/app/api/routes/clips.py b/backend/app/api/routes/clips.py index 71f7600..c64db2e 100644 --- a/backend/app/api/routes/clips.py +++ b/backend/app/api/routes/clips.py @@ -27,6 +27,21 @@ async def preview_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)): if not clip.raw_clip_path: raise HTTPException(404, "Clip not yet extracted") + return FileResponse( + clip.raw_clip_path, + media_type="video/mp4", + content_disposition_type="inline", + ) + + +@router.get("/clips/{clip_id}/download") +async def download_clip(clip_id: UUID, db: AsyncSession = Depends(get_db)): + clip = await db.get(Clip, clip_id) + if not clip: + raise HTTPException(404, "Clip not found") + if not clip.raw_clip_path: + raise HTTPException(404, "Clip not yet extracted") + return FileResponse( clip.raw_clip_path, media_type="video/mp4", diff --git a/backend/app/api/routes/renders.py b/backend/app/api/routes/renders.py index 83a50de..09bd8c6 100644 --- a/backend/app/api/routes/renders.py +++ b/backend/app/api/routes/renders.py @@ -109,3 +109,18 @@ async def download_render(render_id: UUID, db: AsyncSession = Depends(get_db)): media_type="video/mp4", filename=filename, ) + + +@router.get("/renders/{render_id}/preview") +async def preview_render(render_id: UUID, db: AsyncSession = Depends(get_db)): + render = await db.get(RenderRequest, render_id) + if not render: + raise HTTPException(404, "Render not found") + if render.status != "complete" or not render.output_path: + raise HTTPException(400, "Render not complete") + + return FileResponse( + render.output_path, + media_type="video/mp4", + content_disposition_type="inline", + ) diff --git a/backend/app/frontend.py b/backend/app/frontend.py index c4c993a..a46cf00 100644 --- a/backend/app/frontend.py +++ b/backend/app/frontend.py @@ -93,10 +93,28 @@ header p { color: var(--text-dim); font-size: .95rem; } .clip-reasoning { color: var(--text-dim); font-size: .85rem; line-height: 1.5; margin-top: .5rem; } .clip-transcript { background: #0a0a0a; border-radius: 8px; padding: .75rem 1rem; margin-top: .75rem; font-size: .8rem; color: #aaa; line-height: 1.5; font-style: italic; max-height: 80px; overflow-y: auto; } -.clip-actions { display: flex; gap: .5rem; margin-top: .75rem; } +.clip-video { width: 100%; border-radius: 8px; margin-top: .75rem; background: #000; } +.clip-actions { display: flex; gap: .5rem; margin-top: .75rem; flex-wrap: wrap; align-items: center; } .clip-btn { padding: .4rem .75rem; background: transparent; border: 1px solid var(--border); border-radius: 8px; - color: var(--text-dim); font-size: .8rem; cursor: pointer; transition: all .2s; text-decoration: none; } + color: var(--text-dim); font-size: .8rem; cursor: pointer; transition: all .2s; text-decoration: none; display: inline-flex; align-items: center; gap: .3rem; } .clip-btn:hover { border-color: var(--accent); color: var(--accent); } +.clip-btn.primary { background: var(--accent); border-color: var(--accent); color: #fff; } +.clip-btn.primary:hover { background: var(--accent-hover); } +.render-options { display: none; margin-top: .75rem; padding: .75rem; background: #0d0d0d; border: 1px solid var(--border); border-radius: 8px; } +.render-options.visible { display: block; } +.render-row { display: flex; gap: .75rem; align-items: center; margin-bottom: .5rem; flex-wrap: wrap; } +.render-row:last-child { margin-bottom: 0; } +.render-label { font-size: .8rem; color: var(--text-dim); min-width: 70px; } +.render-select { padding: .35rem .5rem; background: var(--surface); border: 1px solid var(--border); border-radius: 6px; + color: var(--text); font-size: .8rem; outline: none; cursor: pointer; } +.render-select:focus { border-color: var(--accent); } +.style-preview { display: flex; gap: .5rem; flex-wrap: wrap; } +.style-chip { padding: .25rem .6rem; border: 1px solid var(--border); border-radius: 6px; font-size: .75rem; + color: var(--text-dim); cursor: pointer; transition: all .2s; } +.style-chip:hover { border-color: var(--accent); color: var(--accent); } +.style-chip.active { border-color: var(--accent); color: var(--accent); background: #1a1025; } +.render-status { font-size: .8rem; color: var(--text-dim); margin-top: .5rem; } +.render-status.done { color: var(--green); } /* History */ .history-section { margin-top: 2.5rem; border-top: 1px solid var(--border); padding-top: 1.5rem; } @@ -312,7 +330,7 @@ async function loadClips(jobId) { document.getElementById('clipsCount').textContent = clips.length + ' clip' + (clips.length !== 1 ? 's' : ''); const list = document.getElementById('clipsList'); - list.innerHTML = clips.map(c => { + list.innerHTML = clips.map((c, i) => { const dur = (c.end_time - c.start_time).toFixed(1); const badge = 'badge-' + (c.category || 'general'); const scoreColor = c.virality_score >= 75 ? 'var(--green)' : c.virality_score >= 50 ? 'var(--yellow)' : 'var(--text-dim)'; @@ -329,9 +347,38 @@ async function loadClips(jobId) { '' + dur + 's' + '' + (c.reasoning ? '
' + esc(c.reasoning) + '
' : '') + + '' + (c.transcript_segment ? '
"' + esc(c.transcript_segment) + '"
' : '') + '
' + - 'Preview' + + 'Download' + + '' + + '
' + + '
' + + '
' + + 'Captions' + + '
' + + '
TikTok
' + + '
Hormozi
' + + '
Karaoke
' + + '
Minimal
' + + '
None
' + + '
' + + '
' + + '
' + + 'Aspect' + + '' + + '
' + + '
' + + '' + + '
' + + '
' + '
' + ''; }).join(''); @@ -396,6 +443,51 @@ function fmtTime(s) { } function esc(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; } +// Render controls +const renderStyles = {}; // clip_id -> style +function toggleRender(clipId) { + const el = document.getElementById('render-' + clipId); + el.classList.toggle('visible'); +} +function selectStyle(clipId, style, chip) { + renderStyles[clipId] = style; + document.querySelectorAll('#styles-' + clipId + ' .style-chip').forEach(c => c.classList.remove('active')); + chip.classList.add('active'); +} +async function renderClip(clipId) { + const style = renderStyles[clipId] || 'tiktok'; + const aspect = document.getElementById('aspect-' + clipId).value; + const statusEl = document.getElementById('rstatus-' + clipId); + statusEl.textContent = 'Rendering...'; + statusEl.className = 'render-status'; + try { + const res = await fetch(API + '/api/clips/' + clipId + '/render', { + method: 'POST', headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({aspect_ratio: aspect, subtitle_style: style}) + }); + if (!res.ok) { const e = await res.json(); throw new Error(e.detail || 'Render failed'); } + const render = await res.json(); + pollRender(clipId, render.id); + } catch (e) { statusEl.textContent = 'Error: ' + e.message; } +} +async function pollRender(clipId, renderId) { + const statusEl = document.getElementById('rstatus-' + clipId); + try { + const res = await fetch(API + '/api/renders/' + renderId); + const r = await res.json(); + if (r.status === 'complete') { + statusEl.className = 'render-status done'; + statusEl.innerHTML = 'Done! Download rendered clip' + + ' Preview'; + } else if (r.status === 'failed') { + statusEl.textContent = 'Failed: ' + (r.error_message || 'unknown error'); + } else { + statusEl.textContent = 'Rendering... ' + Math.round(r.progress * 100) + '%'; + setTimeout(() => pollRender(clipId, renderId), 2000); + } + } catch { setTimeout(() => pollRender(clipId, renderId), 3000); } +} + // URL enter key document.getElementById('urlInput').addEventListener('keydown', e => { if (e.key === 'Enter') submitUrl(); }); diff --git a/backend/app/schemas.py b/backend/app/schemas.py index d432bb3..a23dade 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -59,7 +59,7 @@ class ClipResponse(BaseModel): class RenderCreate(BaseModel): aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$") - subtitle_style: str = Field(default="tiktok") + subtitle_style: str = Field(default="tiktok", pattern="^(tiktok|hormozi|karaoke|minimal|none)$") class RenderResponse(BaseModel): @@ -80,4 +80,4 @@ class RenderResponse(BaseModel): class BulkRenderCreate(BaseModel): clip_ids: list[UUID] aspect_ratio: str = Field(default="9:16", pattern="^(16:9|9:16|1:1|4:5)$") - subtitle_style: str = Field(default="tiktok") + subtitle_style: str = Field(default="tiktok", pattern="^(tiktok|hormozi|karaoke|minimal|none)$") diff --git a/backend/app/services/subtitle_render.py b/backend/app/services/subtitle_render.py new file mode 100644 index 0000000..c375803 --- /dev/null +++ b/backend/app/services/subtitle_render.py @@ -0,0 +1,218 @@ +"""Subtitle rendering service using FFmpeg ASS filter. + +Generates word-by-word animated captions in various styles, then burns +them into the video with optional aspect ratio conversion. +""" + +import asyncio +import logging +import os +import tempfile + +logger = logging.getLogger(__name__) + +# ASS style definitions for each caption style. +# Playres Y is set to 720 (matching 1280x720 source). +ASS_HEADER = """[Script Info] +ScriptType: v4.00+ +PlayResX: 1280 +PlayResY: 720 +WrapStyle: 0 + +[V4+ Styles] +Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding +{styles} + +[Events] +Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text +{events}""" + +# Each style defines: font, colors (AABBGGRR format), border, alignment +STYLES = { + "tiktok": { + "name": "TikTok", + "def": "Style: Default,Arial,52,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,0,2,40,40,40,1", + "highlight": "Style: Highlight,Arial,52,&H0000FFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,0,2,40,40,40,1", + "words_per_group": 3, + }, + "hormozi": { + "name": "Hormozi", + "def": "Style: Default,Impact,60,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,2,0,1,4,0,2,40,40,50,1", + "highlight": "Style: Highlight,Impact,60,&H0000DDFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,105,105,2,0,1,4,0,2,40,40,50,1", + "words_per_group": 2, + }, + "karaoke": { + "name": "Karaoke", + "def": "Style: Default,Arial,48,&H80FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,2,0,2,40,40,40,1", + "highlight": "Style: Highlight,Arial,48,&H0000FF00,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,2,0,2,40,40,40,1", + "words_per_group": 4, + }, + "minimal": { + "name": "Minimal", + "def": "Style: Default,Helvetica,40,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,0,0,0,0,100,100,0,0,1,2,0,2,40,40,30,1", + "highlight": None, + "words_per_group": 5, + }, +} + + +def _ts(seconds: float) -> str: + """Format seconds as ASS timestamp H:MM:SS.cc.""" + h = int(seconds // 3600) + m = int((seconds % 3600) // 60) + s = seconds % 60 + return f"{h}:{m:02d}:{s:05.2f}" + + +def _build_word_groups(words: list[dict], words_per_group: int) -> list[dict]: + """Group consecutive words for display as caption lines.""" + groups = [] + for i in range(0, len(words), words_per_group): + chunk = words[i : i + words_per_group] + if not chunk: + continue + groups.append( + { + "words": chunk, + "text": " ".join(w["word"].strip() for w in chunk), + "start": chunk[0]["start"], + "end": chunk[-1]["end"], + } + ) + return groups + + +def generate_ass( + words: list[dict], + clip_start: float, + clip_end: float, + style_name: str, +) -> str: + """Generate ASS subtitle content from word-level timestamps. + + Args: + words: list of {word, start, end} from Whisper + clip_start: clip start time in seconds (absolute, in source video) + clip_end: clip end time in seconds + style_name: one of tiktok, hormozi, karaoke, minimal + """ + style = STYLES.get(style_name, STYLES["tiktok"]) + + # Filter words to clip range and shift to clip-relative times + clip_words = [] + for w in words: + if w["end"] < clip_start - 0.3 or w["start"] > clip_end + 0.3: + continue + clip_words.append( + { + "word": w["word"].strip(), + "start": max(0, w["start"] - clip_start), + "end": max(0, w["end"] - clip_start), + } + ) + + if not clip_words: + return "" + + groups = _build_word_groups(clip_words, style["words_per_group"]) + + styles_str = style["def"] + if style.get("highlight"): + styles_str += "\n" + style["highlight"] + + events = [] + for g in groups: + start = _ts(g["start"]) + end = _ts(g["end"]) + + if style.get("highlight"): + # Build text with word-by-word highlight using override tags + parts = [] + for w in g["words"]: + w_start = w["start"] - g["start"] + # Fade-in highlight: override color at word start time + # Using \kf (karaoke fill) for smooth highlight + duration_cs = int((w["end"] - w["start"]) * 100) + parts.append(f"{{\\kf{duration_cs}}}{w['word']}") + text = " ".join(parts) + events.append( + f"Dialogue: 0,{start},{end},Highlight,,0,0,0,,{text}" + ) + else: + # Simple display, no highlight animation + events.append( + f"Dialogue: 0,{start},{end},Default,,0,0,0,,{g['text']}" + ) + + return ASS_HEADER.format(styles=styles_str, events="\n".join(events)) + + +async def render_with_subtitles( + video_path: str, + output_path: str, + ass_content: str, + aspect_ratio: str = "9:16", +) -> str: + """Render video with burned-in ASS subtitles and aspect ratio conversion. + + Args: + video_path: path to raw clip mp4 + output_path: where to write rendered output + ass_content: ASS subtitle content string + aspect_ratio: target aspect ratio (9:16, 16:9, 1:1, 4:5) + """ + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Write ASS to temp file + ass_file = tempfile.NamedTemporaryFile( + suffix=".ass", delete=False, mode="w", encoding="utf-8" + ) + ass_file.write(ass_content) + ass_file.close() + + try: + # Build FFmpeg filter chain + filters = [] + + # Aspect ratio conversion with padding + ratio_map = {"9:16": (720, 1280), "16:9": (1280, 720), "1:1": (720, 720), "4:5": (576, 720)} + w, h = ratio_map.get(aspect_ratio, (720, 1280)) + filters.append(f"scale={w}:{h}:force_original_aspect_ratio=decrease") + filters.append(f"pad={w}:{h}:(ow-iw)/2:(oh-ih)/2:black") + + # Burn in subtitles (ass_file path needs escaped colons on Windows, but we're on Linux) + ass_escaped = ass_file.name.replace(":", "\\:") + if ass_content: + filters.append(f"ass={ass_escaped}") + + vf = ",".join(filters) + + cmd = [ + "ffmpeg", + "-i", video_path, + "-vf", vf, + "-c:v", "libx264", "-preset", "fast", "-crf", "23", + "-c:a", "aac", "-b:a", "128k", + "-movflags", "+faststart", + "-y", + output_path, + ] + + logger.info(f"Rendering: {video_path} -> {output_path} ({aspect_ratio}, subs={bool(ass_content)})") + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await proc.communicate() + + if proc.returncode != 0: + raise RuntimeError(f"FFmpeg render failed: {stderr.decode()[-500:]}") + + size_mb = os.path.getsize(output_path) / (1024 * 1024) + logger.info(f"Rendered: {output_path} ({size_mb:.1f} MB)") + return output_path + + finally: + os.unlink(ass_file.name) diff --git a/backend/app/workers/tasks.py b/backend/app/workers/tasks.py index 708edd8..788133d 100644 --- a/backend/app/workers/tasks.py +++ b/backend/app/workers/tasks.py @@ -256,9 +256,9 @@ async def process_job(ctx: dict, job_id: str): async def render_clip(ctx: dict, render_id: str): - """Render a clip with subtitles and aspect ratio conversion. - (Phase 3 - stub for now, copies raw clip)""" - from app.models import RenderRequest + """Render a clip with subtitles and aspect ratio conversion.""" + from app.models import RenderRequest, Job + from app.services.subtitle_render import generate_ass, render_with_subtitles db = await _get_session() try: @@ -267,7 +267,7 @@ async def render_clip(ctx: dict, render_id: str): return render.status = "rendering" - render.progress = 0.5 + render.progress = 0.2 await db.commit() clip = await db.get(Clip, render.clip_id) @@ -277,16 +277,30 @@ async def render_clip(ctx: dict, render_id: str): await db.commit() return - # Phase 1: just copy the raw clip as-is - # Phase 3 will add subtitle rendering + aspect ratio conversion - import shutil renders_dir = os.path.join(settings.renders_dir, str(render.clip_id)) os.makedirs(renders_dir, exist_ok=True) output = os.path.join( renders_dir, - f"render_{render.aspect_ratio.replace(':', 'x')}.mp4" + f"render_{render.subtitle_style}_{render.aspect_ratio.replace(':', 'x')}.mp4", + ) + + # Generate subtitles from word-level transcript + ass_content = "" + if render.subtitle_style != "none": + job = await db.get(Job, clip.job_id) + words = (job.transcript or {}).get("words", []) if job else [] + if words: + ass_content = generate_ass( + words, clip.start_time, clip.end_time, render.subtitle_style + ) + logger.info(f"Generated ASS subtitles ({len(ass_content)} chars) for render {render_id}") + + render.progress = 0.4 + await db.commit() + + await render_with_subtitles( + clip.raw_clip_path, output, ass_content, render.aspect_ratio ) - shutil.copy2(clip.raw_clip_path, output) render.output_path = output render.status = "complete"