1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
/**
* youtube_transcript tool factory — the imperative shell that binds the pure
* validate/format functions to the injected TranscriptClient edge.
*
* Mirrors the tool-web-search pattern: factory + injected dep + pure helpers +
* a `ToolResult` returned per call. Errors surface as `{ isError: true }`
* rather than thrown, so the model can react to the message.
*/
import { mkdirSync, writeFileSync } from "node:fs";
import type { ToolContract, ToolExecuteContext, ToolResult } from "@dispatch/kernel";
import type { TranscriptClient } from "./client.js";
import {
formatCompleted,
formatFailed,
formatQueued,
type TranscriptResponse,
truncateOutput,
} from "./format.js";
import { validateUrl } from "./validate.js";
const OUTPUT_CAP = 50_000;
const FULL_OUTPUT_DIR = "/tmp/dispatch/youtube-transcribe";
export interface YoutubeTranscriptToolDeps {
readonly client: TranscriptClient;
readonly outputCap?: number;
/** Injected file writer (defaults to real fs write). */
readonly writeFile?: (path: string, content: string) => void;
}
const DESCRIPTION =
"Fetch the transcript/subtitles for a YouTube video from the local transcriber " +
"service. If the transcript has not been downloaded before, the video will be " +
"queued for processing and the tool will return the estimated time when the " +
"transcript will be available. Once available, the tool returns the transcript " +
"text and timestamped segments (truncated if very long). The full transcript " +
"is always saved to /tmp/dispatch/youtube-transcribe/{video_id}.txt — use " +
"read_file to access it. Accepted URL formats: " +
"youtube.com/watch?v=, youtu.be/, youtube.com/embed/, youtube.com/shorts/";
/**
* Create the `youtube_transcript` tool. `concurrencySafe: true` — transcript
* fetches are idempotent and safe to run alongside other tools. The `network`
* capability is declared on the extension manifest (not the tool contract).
*/
export function createYoutubeTranscriptTool(deps: YoutubeTranscriptToolDeps): ToolContract {
const client = deps.client;
const cap = deps.outputCap ?? OUTPUT_CAP;
const writeFile =
deps.writeFile ??
((path, content) => {
mkdirSync(FULL_OUTPUT_DIR, { recursive: true });
writeFileSync(path, content, "utf-8");
});
return {
name: "youtube_transcript",
description: DESCRIPTION,
parameters: {
type: "object",
properties: {
url: {
type: "string",
description:
"YouTube video URL (e.g. https://www.youtube.com/watch?v=... or https://youtu.be/...)",
},
},
required: ["url"],
},
concurrencySafe: true,
async execute(args: unknown, ctx: ToolExecuteContext): Promise<ToolResult> {
const validated = validateUrl(args);
if (typeof validated !== "string") {
return { content: validated.error, isError: true };
}
const url = validated;
const span = ctx.log.span("youtube_transcript.execute", { url });
try {
const data: TranscriptResponse = await client.getTranscript(url, ctx.signal);
let output: string;
let videoId: string | undefined;
// Check the single-literal discriminants ("completed"/"failed") first,
// so the final else narrows to QueuedResponse — whose `status` is itself
// a `"queued" | "processing"` union TS cannot negatively narrow.
if (data.status === "completed") {
output = formatCompleted(url, data);
videoId = data.video_id;
} else if (data.status === "failed") {
output = formatFailed(data);
} else {
output = formatQueued(url, data, Date.now);
}
span.end();
if (videoId !== undefined) {
const filePath = `${FULL_OUTPUT_DIR}/${videoId}.txt`;
try {
writeFile(filePath, output);
} catch {
// File write failed — continue with truncated output only.
}
if (output.length > cap) {
return { content: truncateOutput(output, cap, filePath) };
}
}
return { content: truncateOutput(output, cap) };
} catch (err: unknown) {
span.end({ err });
return {
content: `Error: ${err instanceof Error ? err.message : String(err)}`,
isError: true,
};
}
},
};
}
|