fix: use raw seconds in transcript timestamps to prevent AI misinterpretation

The AI model was interpreting MM:SS timestamps (e.g., 38:07) as decimal seconds (38.07s) instead of 2287s, causing clips from long videos to have near-zero durations. Switching to raw seconds (e.g., [2287.0s - 2295.0s]) eliminates the ambiguity. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 01:43:38 +00:00 · 2026-02-10 01:43:38 +00:00 · db4b7b10fd
parent 3ce7945096
commit db4b7b10fd
1 changed files with 3 additions and 9 deletions
--- a/backend/app/services/ai_analysis.py
+++ b/backend/app/services/ai_analysis.py
@ -57,7 +57,7 @@ def _sample_segments(segments: list[dict], max_chars: int) -> list[dict]:
    lines = []
    for s in segments:
        line = (
-            f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
+            f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] "
            f"{s.get('text', '').strip()}"
        )
        lines.append((s, line))
@ -148,7 +148,7 @@ async def analyze_transcript(
    if segments:
        sampled = _sample_segments(segments, MAX_TRANSCRIPT_CHARS)
        timestamped = "\n".join(
-            f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
+            f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] "
            f"{s.get('text', '').strip()}"
            for s in sampled
        )
@ -163,7 +163,7 @@ async def analyze_transcript(
    )

    user_prompt = f"""Video Title: {video_title}
-Video Duration: {_fmt_time(video_duration)}
+Video Duration: {video_duration:.0f} seconds

 Transcript:
 {timestamped}
@ -358,9 +358,3 @@ def _parse_clips(content: str, video_duration: float) -> list[dict]:
    # Sort by virality score descending
    clips.sort(key=lambda x: x["virality_score"], reverse=True)
    return clips
-
-
-def _fmt_time(seconds: float) -> str:
-    """Format seconds as MM:SS."""
-    m, s = divmod(int(seconds), 60)
-    return f"{m:02d}:{s:02d}"