summaryrefslogtreecommitdiffhomepage
path: root/packages/tool-youtube-transcript/src/client.ts
blob: a088d7d964f0bad8ca649241e5d36970351a9f2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/**
 * TranscriptClient — the injected outermost edge for the youtube_transcript
 * tool.
 *
 * All effects (fetch, clock-via-abort-timeout) are injected so the pure
 * decision logic remains testable without real I/O. The factory builds a
 * single `getTranscript` method over a self-hosted transcriber instance (no
 * API key). Mirrors the tool-web-search FirecrawlClient's request structure:
 * per-request timeout combined with the caller's cancellation signal via
 * `AbortSignal.any`.
 */

import type { TranscriptResponse } from "./format.js";

export type FetchLike = typeof globalThis.fetch;

export const DEFAULT_BASE_URL = "http://100.102.55.49:41090";
export const DEFAULT_TIMEOUT_MS = 30_000;

export interface TranscriptClient {
	readonly getTranscript: (url: string, signal: AbortSignal) => Promise<TranscriptResponse>;
}

export interface TranscriptClientDeps {
	readonly baseUrl: string;
	readonly fetchFn: FetchLike;
	readonly timeoutMs?: number;
}

/**
 * Create a TranscriptClient. `getTranscript` builds the request URL
 * (`${baseUrl}/api/transcript?url=${encodeURIComponent(url)}`), calls the
 * injected `fetchFn`, and handles HTTP + JSON errors. The per-request timeout
 * is combined with the caller's cancellation signal via `AbortSignal.any`.
 */
export function createTranscriptClient(deps: TranscriptClientDeps): TranscriptClient {
	const baseUrl = deps.baseUrl;
	const fetchFn = deps.fetchFn;
	const timeoutMs = deps.timeoutMs ?? DEFAULT_TIMEOUT_MS;

	return {
		async getTranscript(url: string, signal: AbortSignal): Promise<TranscriptResponse> {
			const endpoint = `${baseUrl}/api/transcript?url=${encodeURIComponent(url)}`;
			const controller = new AbortController();
			const timeout = setTimeout(() => controller.abort(), timeoutMs);
			const combined = AbortSignal.any([signal, controller.signal]);
			try {
				let response: Response;
				try {
					response = await fetchFn(endpoint, {
						method: "GET",
						headers: { Accept: "application/json" },
						signal: combined,
					});
				} catch (err) {
					if (signal.aborted) {
						throw new Error("Request aborted.");
					}
					if (controller.signal.aborted) {
						throw new Error(`Transcriber request timed out after ${timeoutMs / 1000} seconds.`);
					}
					throw err;
				}
				if (!response.ok) {
					const text = await response.text().catch(() => "");
					throw new Error(
						`HTTP ${response.status} ${response.statusText}${text ? `: ${text}` : ""}`,
					);
				}
				try {
					return (await response.json()) as TranscriptResponse;
				} catch {
					throw new Error("Failed to parse transcriber response as JSON");
				}
			} finally {
				clearTimeout(timeout);
			}
		},
	};
}