summaryrefslogtreecommitdiffhomepage
path: root/packages/tool-youtube-transcript/src/format.ts
blob: 8bb409bd0a649fbb54d03b58211db66160253dde (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/**
 * Pure formatters for the youtube_transcript tool — input → output, no I/O.
 *
 * These mirror the proven opencode youtube-subtitles tool's formatting,
 * isolated (not imported) per the isolation-over-DRY rule. Tested directly
 * with zero mocks.
 *
 * NOTE: `formatQueued` renders the estimated-available-at time as an ISO 8601
 * string derived from the injected `now()`, rather than `toLocaleTimeString`.
 * The opencode tool uses `toLocaleTimeString`, which reads ambient locale +
 * timezone (hidden state) — a violation of the pure-core rule. ISO is fully
 * deterministic from the injected `now` and is a valid `{time}` rendering.
 */

/** A single timestamped segment from a completed transcript. */
export interface TranscriptSegment {
	readonly text: string;
	readonly start: number;
	readonly duration: number;
}

/** `status: "completed"` response from the transcriber service. */
export interface CompletedResponse {
	readonly status: "completed";
	readonly video_id: string;
	readonly full_text: string;
	readonly segments: readonly TranscriptSegment[];
}

/** `status: "queued" | "processing"` response from the transcriber service. */
export interface QueuedResponse {
	readonly status: "queued" | "processing";
	readonly video_id: string;
	readonly position: number;
	readonly estimated_seconds: number;
}

/** `status: "failed"` response from the transcriber service. */
export interface FailedResponse {
	readonly status: "failed";
	readonly video_id: string;
	readonly error: string;
	readonly error_type: string;
}

/** Discriminated union of all transcriber response shapes. */
export type TranscriptResponse = CompletedResponse | QueuedResponse | FailedResponse;

/**
 * Format a segment start offset (seconds) as `m:ss` (e.g. `1:05`, `12:03`).
 * Minutes are not capped — durations over an hour render as `61:40` etc.
 */
export function formatTimestamp(seconds: number): string {
	const m = Math.floor(seconds / 60);
	const s = Math.floor(seconds % 60);
	return `${m}:${s.toString().padStart(2, "0")}`;
}

/**
 * Format a completed transcript as markdown: header, video id, full text, then
 * timestamped segment lines `[m:ss] text`. Mirrors the opencode tool's layout.
 */
export function formatCompleted(url: string, data: CompletedResponse): string {
	const lines: string[] = [];
	lines.push(`## Transcript for ${url}`);
	lines.push(`**Video ID:** ${data.video_id}`);
	lines.push("");
	lines.push("### Full text");
	lines.push("");
	lines.push(data.full_text);
	lines.push("");
	lines.push("### Timestamped segments");
	lines.push("");
	for (const segment of data.segments) {
		lines.push(`[${formatTimestamp(segment.start)}] ${segment.text}`);
	}
	return lines.join("\n");
}

/**
 * Format a queued/processing response: status, queue position, and the
 * estimated available-at time (ISO, derived from the injected `now`).
 */
export function formatQueued(url: string, data: QueuedResponse, now: () => number): string {
	const availableAt = new Date(now() + data.estimated_seconds * 1000);
	const timeStr = availableAt.toISOString();
	return (
		`Transcript not yet available (status: ${data.status}, queue position: ${data.position}).\n` +
		`Estimated available at: ${timeStr} (in ~${Math.ceil(data.estimated_seconds)}s).\n` +
		`URL: ${url}`
	);
}

/** Format a failed response: error type + details. Mirrors the opencode tool. */
export function formatFailed(data: FailedResponse): string {
	return `Transcript fetch failed. Error type: ${data.error_type}. Details: ${data.error}`;
}

/**
 * Truncate output to `cap` characters with a trailing notice, identical in
 * spirit to tool-web-search. Duplication across features is the intended trade
 * (isolation over DRY).
 */
export function truncateOutput(output: string, cap: number): string {
	if (output.length <= cap) {
		return output;
	}
	const truncated = output.slice(0, cap);
	return `${truncated}\n\n[Output truncated: exceeded ${cap} characters]`;
}