From db4b7b10fd7375eefc7bf6fca987b404a697761f Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Tue, 10 Feb 2026 01:43:38 +0000 Subject: [PATCH] fix: use raw seconds in transcript timestamps to prevent AI misinterpretation The AI model was interpreting MM:SS timestamps (e.g., 38:07) as decimal seconds (38.07s) instead of 2287s, causing clips from long videos to have near-zero durations. Switching to raw seconds (e.g., [2287.0s - 2295.0s]) eliminates the ambiguity. Co-Authored-By: Claude Opus 4.6 --- backend/app/services/ai_analysis.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/backend/app/services/ai_analysis.py b/backend/app/services/ai_analysis.py index 3ee07b2..3bd12f7 100644 --- a/backend/app/services/ai_analysis.py +++ b/backend/app/services/ai_analysis.py @@ -57,7 +57,7 @@ def _sample_segments(segments: list[dict], max_chars: int) -> list[dict]: lines = [] for s in segments: line = ( - f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] " + f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] " f"{s.get('text', '').strip()}" ) lines.append((s, line)) @@ -148,7 +148,7 @@ async def analyze_transcript( if segments: sampled = _sample_segments(segments, MAX_TRANSCRIPT_CHARS) timestamped = "\n".join( - f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] " + f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] " f"{s.get('text', '').strip()}" for s in sampled ) @@ -163,7 +163,7 @@ async def analyze_transcript( ) user_prompt = f"""Video Title: {video_title} -Video Duration: {_fmt_time(video_duration)} +Video Duration: {video_duration:.0f} seconds Transcript: {timestamped} @@ -358,9 +358,3 @@ def _parse_clips(content: str, video_duration: float) -> list[dict]: # Sort by virality score descending clips.sort(key=lambda x: x["virality_score"], reverse=True) return clips - - -def _fmt_time(seconds: float) -> str: - """Format seconds as MM:SS.""" - m, s = divmod(int(seconds), 60) - return f"{m:02d}:{s:02d}"