summaryrefslogtreecommitdiffhomepage
path: root/packages/tool-web-search/src/tool.ts
blob: 751278dc00d851205c07712b9c67d0c04385c309 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/**
 * web_search tool factory — the imperative shell that binds the pure
 * validate/format functions to the injected FirecrawlClient edge.
 *
 * Mirrors the tool-shell pattern: factory + injected dep + pure helpers +
 * a `ToolResult` returned per call. Errors surface as `{ isError: true }`
 * rather than thrown, so the model can react to the message.
 */

import type { ToolContract, ToolExecuteContext, ToolResult } from "@dispatch/kernel";
import type { FirecrawlClient } from "./client.js";
import {
	formatCrawlResults,
	formatMapResults,
	formatScrapeResult,
	formatSearchResults,
	truncateOutput,
} from "./format.js";
import type { ValidatedArgs } from "./validate.js";
import { validateArgs } from "./validate.js";

const OUTPUT_CAP = 50_000;

export interface WebSearchToolDeps {
	readonly client: FirecrawlClient;
	readonly outputCap?: number;
}

/** Dispatch validated args to the right client method and format the result. */
async function runMode(
	validated: ValidatedArgs,
	client: FirecrawlClient,
	signal: AbortSignal,
): Promise<string> {
	switch (validated.mode) {
		case "search": {
			const hits = await client.search(
				{
					query: validated.query,
					limit: validated.limit,
					...(validated.scrape
						? { scrapeOptions: { formats: ["markdown"], onlyMainContent: true } }
						: {}),
					...(validated.lang !== undefined ? { lang: validated.lang } : {}),
					...(validated.country !== undefined ? { country: validated.country } : {}),
				},
				signal,
			);
			return formatSearchResults(hits);
		}
		case "scrape": {
			const result = await client.scrape(
				{ url: validated.url, formats: [validated.format] },
				signal,
			);
			return formatScrapeResult(result);
		}
		case "crawl": {
			const pages = await client.crawl(
				{ url: validated.url, limit: validated.limit, formats: [validated.format] },
				signal,
			);
			return formatCrawlResults(pages);
		}
		case "map": {
			const links = await client.map(validated.url, signal);
			return formatMapResults(links);
		}
	}
}

/**
 * Create the `web_search` tool. `concurrencySafe: true` — web search is
 * idempotent and safe to run alongside other tools. The `network` capability
 * is declared on the extension manifest (not the tool contract).
 */
export function createWebSearchTool(deps: WebSearchToolDeps): ToolContract {
	const client = deps.client;
	const cap = deps.outputCap ?? OUTPUT_CAP;

	return {
		name: "web_search",
		description:
			"Access the web via a self-hosted Firecrawl instance. Supports search, " +
			"single-page scrape, site crawling, and sitemap discovery.",
		parameters: {
			type: "object",
			properties: {
				query: { type: "string", description: "The search query (search mode)." },
				url: { type: "string", description: "A URL to scrape, crawl, or map." },
				mode: {
					type: "string",
					enum: ["search", "scrape", "crawl", "map"],
					description:
						"Operation mode. 'search' (default when query present), 'scrape' " +
						"(default when url present), 'crawl' (recursively scrape pages from a site), " +
						"'map' (discover URLs on a site).",
				},
				limit: {
					type: "number",
					description: "Max results. Search: default 7, max 10. Crawl: default 3, max 10.",
				},
				scrape: {
					type: "boolean",
					description: "When searching, also scrape full markdown content of each result page.",
				},
				lang: {
					type: "string",
					description: 'Language code to filter search results (e.g. "en", "ja").',
				},
				country: {
					type: "string",
					description: 'Country code to filter search results (e.g. "us", "jp").',
				},
				format: {
					type: "string",
					enum: ["markdown", "text", "html"],
					description: "Format for scrape/crawl output (default: markdown).",
				},
			},
		},
		concurrencySafe: true,
		async execute(args: unknown, ctx: ToolExecuteContext): Promise<ToolResult> {
			const validated = validateArgs(args);
			if ("error" in validated) {
				return { content: validated.error, isError: true };
			}
			const span = ctx.log.span("web_search.execute", { mode: validated.mode });
			try {
				const output = await runMode(validated, client, ctx.signal);
				span.end();
				return { content: truncateOutput(output, cap) };
			} catch (err: unknown) {
				span.end({ err });
				return {
					content: `Error: ${err instanceof Error ? err.message : String(err)}`,
					isError: true,
				};
			}
		},
	};
}