summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-05-30 21:37:56 +0900
committerAdam Malczewski <[email protected]>2026-05-30 21:37:56 +0900
commit8e8c64b6dafb8514768f4e801f16eb1a25726d44 (patch)
tree9d8708edfccb5120b82f1be334f1c24b6114387f
parentcf2a0d5777a28484d518c551cf6a412e54332400 (diff)
downloadyoutube-transcriber-main.tar.gz
youtube-transcriber-main.zip
Make failed transcripts retryable after cooldownHEADmain
Failed queue entries were cached permanently, so a video that failed because subtitles weren't generated yet would never be re-fetched. Add requeue_failed() to reset failed entries to pending, and retry any failed entry once its (120s) cooldown has elapsed.
-rw-r--r--app/main.py34
-rw-r--r--app/storage.py17
2 files changed, 51 insertions, 0 deletions
diff --git a/app/main.py b/app/main.py
index bece6e4..e91cfff 100644
--- a/app/main.py
+++ b/app/main.py
@@ -16,6 +16,30 @@ from app.storage import TranscriptStore
from app.transcriber import InvalidURLError, create_api, extract_video_id
from app.worker import run_worker
+from datetime import datetime
+
+# How long to wait before retrying a failed video, per error_type (seconds).
+# All failures are retryable; cooldowns avoid hammering YouTube or a blocked IP.
+RETRY_COOLDOWN_SECONDS: dict[str, float] = {
+ "not_available": 120.0, # subtitles may not be generated yet
+ "transcript_disabled": 120.0, # rarely changes, but allow eventual recheck
+ "ip_blocked": 120.0, # back off when our IP is blocked
+ "internal_error": 120.0, # transient bugs / network blips
+}
+DEFAULT_RETRY_COOLDOWN_SECONDS = 120.0
+
+
+def _failed_entry_is_retryable(entry: dict) -> bool:
+ """Return True if a failed queue entry has cooled down enough to retry."""
+ cooldown = RETRY_COOLDOWN_SECONDS.get(
+ entry["error_type"] or "", DEFAULT_RETRY_COOLDOWN_SECONDS
+ )
+ updated_at = entry.get("updated_at")
+ if not updated_at:
+ return True
+ age = (datetime.utcnow() - datetime.fromisoformat(updated_at)).total_seconds()
+ return age >= cooldown
+
@asynccontextmanager
async def lifespan(app: FastAPI):
@@ -80,6 +104,16 @@ async def get_transcript(
entry = await store.get_queue_entry(video_id)
if entry is not None:
if entry["status"] == "failed":
+ # All failures are retryable once their cooldown has elapsed.
+ if _failed_entry_is_retryable(entry):
+ await store.requeue_failed(video_id)
+ estimate = await store.get_position_and_estimate(video_id)
+ return TranscriptQueuedResponse(
+ status="queued",
+ video_id=video_id,
+ position=estimate["position"],
+ estimated_seconds=estimate["estimated_seconds"],
+ )
return TranscriptFailedResponse(
video_id=video_id,
error=entry["error"] or "",
diff --git a/app/storage.py b/app/storage.py
index 2e991d3..d53d860 100644
--- a/app/storage.py
+++ b/app/storage.py
@@ -140,6 +140,23 @@ class TranscriptStore:
)
await self._db.commit()
+ async def requeue_failed(self, video_id: str) -> dict:
+ """Reset a failed queue entry back to pending so it will be retried.
+
+ Assigns a fresh delay and clears the previous error. Returns the entry.
+ """
+ delay = random.uniform(30.0, 60.0)
+ now = datetime.utcnow().isoformat()
+ await self._db.execute(
+ """UPDATE queue
+ SET status = 'pending', assigned_delay = ?, error = NULL,
+ error_type = NULL, updated_at = ?, started_at = NULL
+ WHERE video_id = ? AND status = 'failed'""",
+ (delay, now, video_id),
+ )
+ await self._db.commit()
+ return await self.get_queue_entry(video_id)
+
async def get_position_and_estimate(self, video_id: str) -> dict | None:
"""Return queue position (1-indexed) and estimated wait seconds for a video."""
entry = await self.get_queue_entry(video_id)