fix: use raw seconds in transcript timestamps to prevent AI misinterpretation

The AI model was interpreting MM:SS timestamps (e.g., 38:07) as decimal
seconds (38.07s) instead of 2287s, causing clips from long videos to have
near-zero durations. Switching to raw seconds (e.g., [2287.0s - 2295.0s])
eliminates the ambiguity.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-02-10 01:43:38 +00:00
parent 3ce7945096
commit db4b7b10fd
1 changed files with 3 additions and 9 deletions

View File

@ -57,7 +57,7 @@ def _sample_segments(segments: list[dict], max_chars: int) -> list[dict]:
lines = []
for s in segments:
line = (
f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] "
f"{s.get('text', '').strip()}"
)
lines.append((s, line))
@ -148,7 +148,7 @@ async def analyze_transcript(
if segments:
sampled = _sample_segments(segments, MAX_TRANSCRIPT_CHARS)
timestamped = "\n".join(
f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] "
f"{s.get('text', '').strip()}"
for s in sampled
)
@ -163,7 +163,7 @@ async def analyze_transcript(
)
user_prompt = f"""Video Title: {video_title}
Video Duration: {_fmt_time(video_duration)}
Video Duration: {video_duration:.0f} seconds
Transcript:
{timestamped}
@ -358,9 +358,3 @@ def _parse_clips(content: str, video_duration: float) -> list[dict]:
# Sort by virality score descending
clips.sort(key=lambda x: x["virality_score"], reverse=True)
return clips
def _fmt_time(seconds: float) -> str:
"""Format seconds as MM:SS."""
m, s = divmod(int(seconds), 60)
return f"{m:02d}:{s:02d}"