diff options
| author | Adam Malczewski <[email protected]> | 2026-05-30 21:37:56 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-05-30 21:37:56 +0900 |
| commit | 8e8c64b6dafb8514768f4e801f16eb1a25726d44 (patch) | |
| tree | 9d8708edfccb5120b82f1be334f1c24b6114387f | |
| parent | cf2a0d5777a28484d518c551cf6a412e54332400 (diff) | |
| download | youtube-transcriber-main.tar.gz youtube-transcriber-main.zip | |
Failed queue entries were cached permanently, so a video that failed
because subtitles weren't generated yet would never be re-fetched.
Add requeue_failed() to reset failed entries to pending, and retry any
failed entry once its (120s) cooldown has elapsed.
| -rw-r--r-- | app/main.py | 34 | ||||
| -rw-r--r-- | app/storage.py | 17 |
2 files changed, 51 insertions, 0 deletions
diff --git a/app/main.py b/app/main.py index bece6e4..e91cfff 100644 --- a/app/main.py +++ b/app/main.py @@ -16,6 +16,30 @@ from app.storage import TranscriptStore from app.transcriber import InvalidURLError, create_api, extract_video_id from app.worker import run_worker +from datetime import datetime + +# How long to wait before retrying a failed video, per error_type (seconds). +# All failures are retryable; cooldowns avoid hammering YouTube or a blocked IP. +RETRY_COOLDOWN_SECONDS: dict[str, float] = { + "not_available": 120.0, # subtitles may not be generated yet + "transcript_disabled": 120.0, # rarely changes, but allow eventual recheck + "ip_blocked": 120.0, # back off when our IP is blocked + "internal_error": 120.0, # transient bugs / network blips +} +DEFAULT_RETRY_COOLDOWN_SECONDS = 120.0 + + +def _failed_entry_is_retryable(entry: dict) -> bool: + """Return True if a failed queue entry has cooled down enough to retry.""" + cooldown = RETRY_COOLDOWN_SECONDS.get( + entry["error_type"] or "", DEFAULT_RETRY_COOLDOWN_SECONDS + ) + updated_at = entry.get("updated_at") + if not updated_at: + return True + age = (datetime.utcnow() - datetime.fromisoformat(updated_at)).total_seconds() + return age >= cooldown + @asynccontextmanager async def lifespan(app: FastAPI): @@ -80,6 +104,16 @@ async def get_transcript( entry = await store.get_queue_entry(video_id) if entry is not None: if entry["status"] == "failed": + # All failures are retryable once their cooldown has elapsed. + if _failed_entry_is_retryable(entry): + await store.requeue_failed(video_id) + estimate = await store.get_position_and_estimate(video_id) + return TranscriptQueuedResponse( + status="queued", + video_id=video_id, + position=estimate["position"], + estimated_seconds=estimate["estimated_seconds"], + ) return TranscriptFailedResponse( video_id=video_id, error=entry["error"] or "", diff --git a/app/storage.py b/app/storage.py index 2e991d3..d53d860 100644 --- a/app/storage.py +++ b/app/storage.py @@ -140,6 +140,23 @@ class TranscriptStore: ) await self._db.commit() + async def requeue_failed(self, video_id: str) -> dict: + """Reset a failed queue entry back to pending so it will be retried. + + Assigns a fresh delay and clears the previous error. Returns the entry. + """ + delay = random.uniform(30.0, 60.0) + now = datetime.utcnow().isoformat() + await self._db.execute( + """UPDATE queue + SET status = 'pending', assigned_delay = ?, error = NULL, + error_type = NULL, updated_at = ?, started_at = NULL + WHERE video_id = ? AND status = 'failed'""", + (delay, now, video_id), + ) + await self._db.commit() + return await self.get_queue_entry(video_id) + async def get_position_and_estimate(self, video_id: str) -> dict | None: """Return queue position (1-indexed) and estimated wait seconds for a video.""" entry = await self.get_queue_entry(video_id) |
