summaryrefslogtreecommitdiffhomepage
path: root/packages/core/src/tools/web-search.ts
blob: 7f061a53db9e15b8603a043d5b4244dbc545a70d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import { z } from "zod";
import type { ToolDefinition } from "../types/index.js";

const FIRECRAWL_URL = "http://100.102.55.49:31329/v1/search";
const MAX_OUTPUT_CHARS = 60000;
const TIMEOUT_MS = 30000;

export function createWebSearchTool(): ToolDefinition {
	return {
		name: "web_search",
		description:
			"Search the web via a self-hosted Firecrawl instance. Returns a list of results with titles, URLs, and descriptions. Optionally scrapes the full markdown content of each result page.",
		parameters: z.object({
			query: z.string().describe("The search query"),
			limit: z
				.number()
				.optional()
				.default(7)
				.describe("Maximum number of results to return (default 7)"),
			scrape: z
				.boolean()
				.optional()
				.default(false)
				.describe("Whether to also scrape the full markdown content of each result page"),
			lang: z.string().optional().describe('Language code to filter results (e.g. "en")'),
			country: z.string().optional().describe('Country code to filter results (e.g. "us")'),
		}),
		execute: async (args: Record<string, unknown>): Promise<string> => {
			const query = args.query as string;
			const limit = (args.limit as number | undefined) ?? 7;
			const scrape = (args.scrape as boolean | undefined) ?? false;
			const lang = args.lang as string | undefined;
			const country = args.country as string | undefined;

			const body: Record<string, unknown> = { query, limit };
			if (lang !== undefined) body.lang = lang;
			if (country !== undefined) body.country = country;
			if (scrape) {
				body.scrapeOptions = { formats: ["markdown"], onlyMainContent: true };
			}

			const controller = new AbortController();
			const timeout = setTimeout(() => controller.abort(), TIMEOUT_MS);

			let response: Response;
			try {
				response = await fetch(FIRECRAWL_URL, {
					method: "POST",
					headers: {
						"Content-Type": "application/json",
						Accept: "application/json",
					},
					body: JSON.stringify(body),
					signal: controller.signal,
				});
			} catch (err) {
				if (err instanceof Error && err.name === "AbortError") {
					return "Error: Request to Firecrawl timed out after 30 seconds.";
				}
				if (err instanceof Error && (err as NodeJS.ErrnoException).code === "ECONNREFUSED") {
					return `Error: Could not connect to Firecrawl at http://100.102.55.49:31329. Is it running?`;
				}
				return `Error: ${err instanceof Error ? err.message : String(err)}`;
			} finally {
				clearTimeout(timeout);
			}

			if (!response.ok) {
				const text = await response.text().catch(() => "");
				return `Error: Firecrawl returned HTTP ${response.status} ${response.statusText}${text ? `: ${text}` : ""}`;
			}

			let json: {
				data?: Array<{ title?: string; url?: string; description?: string; markdown?: string }>;
			};
			try {
				json = await response.json();
			} catch {
				return "Error: Failed to parse Firecrawl response as JSON";
			}

			const results = json.data ?? [];
			if (results.length === 0) {
				return "No results found.";
			}

			const parts: string[] = [];
			for (const result of results) {
				const title = result.title ?? "(no title)";
				const url = result.url ?? "";
				const description = result.description ?? "";
				let section = `### ${title}\n${url}\n\n${description}`;
				if (result.markdown) {
					section += `\n\n${result.markdown}`;
				}
				parts.push(section);
			}

			let output = parts.join("\n\n---\n\n");
			if (output.length > MAX_OUTPUT_CHARS) {
				output = `${output.slice(0, MAX_OUTPUT_CHARS)}\n\n[Output truncated]`;
			}
			return output;
		},
	};
}