fix: use raw seconds in transcript timestamps to prevent AI misinterpretation
The AI model was interpreting MM:SS timestamps (e.g., 38:07) as decimal seconds (38.07s) instead of 2287s, causing clips from long videos to have near-zero durations. Switching to raw seconds (e.g., [2287.0s - 2295.0s]) eliminates the ambiguity. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
3ce7945096
commit
db4b7b10fd
|
|
@ -57,7 +57,7 @@ def _sample_segments(segments: list[dict], max_chars: int) -> list[dict]:
|
|||
lines = []
|
||||
for s in segments:
|
||||
line = (
|
||||
f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
|
||||
f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] "
|
||||
f"{s.get('text', '').strip()}"
|
||||
)
|
||||
lines.append((s, line))
|
||||
|
|
@ -148,7 +148,7 @@ async def analyze_transcript(
|
|||
if segments:
|
||||
sampled = _sample_segments(segments, MAX_TRANSCRIPT_CHARS)
|
||||
timestamped = "\n".join(
|
||||
f"[{_fmt_time(s.get('start', 0))} - {_fmt_time(s.get('end', 0))}] "
|
||||
f"[{s.get('start', 0):.1f}s - {s.get('end', 0):.1f}s] "
|
||||
f"{s.get('text', '').strip()}"
|
||||
for s in sampled
|
||||
)
|
||||
|
|
@ -163,7 +163,7 @@ async def analyze_transcript(
|
|||
)
|
||||
|
||||
user_prompt = f"""Video Title: {video_title}
|
||||
Video Duration: {_fmt_time(video_duration)}
|
||||
Video Duration: {video_duration:.0f} seconds
|
||||
|
||||
Transcript:
|
||||
{timestamped}
|
||||
|
|
@ -358,9 +358,3 @@ def _parse_clips(content: str, video_duration: float) -> list[dict]:
|
|||
# Sort by virality score descending
|
||||
clips.sort(key=lambda x: x["virality_score"], reverse=True)
|
||||
return clips
|
||||
|
||||
|
||||
def _fmt_time(seconds: float) -> str:
|
||||
"""Format seconds as MM:SS."""
|
||||
m, s = divmod(int(seconds), 60)
|
||||
return f"{m:02d}:{s:02d}"
|
||||
|
|
|
|||
Loading…
Reference in New Issue