feat(tool-web-search): Firecrawl-backed web search tool

New standard tool extension with one tool web_search supporting 4 modes (search, scrape, crawl, map) against a self-hosted Firecrawl instance. Pure core: validateArgs (discriminated union by mode) + format* functions + truncateOutput. Injected edge: FirecrawlClient (injectable fetchFn/sleep/now, AbortSignal.any for per-request timeout + caller cancellation). concurrencySafe true, capabilities network. 38 tests, zero vi.mock. Live-verified: umans-glm-5.2 called web_search → real Firecrawl results (also the first live Umans API call).
author: Adam Malczewski <[email protected]> 2026-06-21 13:11:29 +0900
committer: Adam Malczewski <[email protected]> 2026-06-21 13:11:29 +0900
commit: 8a4a624d16422467a8e85434c674bb591877e8ea (patch)
tree: 54052da00bbc580742913e5c031b7cc1b160db19
parent: d23de3254374d4d63c8e15c6ab9311c3c6f4da5b (diff)
download: dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.tar.gz
dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.zip
17 files changed, 1317 insertions, 4 deletions
diff --git a/bun.lock b/bun.lock
index c08d7f9..18f4542 100644
--- a/bun.lock
+++ b/bun.lock
@@ -73,6 +73,7 @@
         "@dispatch/tool-edit-file": "workspace:*",
         "@dispatch/tool-read-file": "workspace:*",
         "@dispatch/tool-shell": "workspace:*",
+        "@dispatch/tool-web-search": "workspace:*",
         "@dispatch/tool-write-file": "workspace:*",
         "@dispatch/transport-http": "workspace:*",
         "@dispatch/transport-ws": "workspace:*",
diff --git a/packages/host-bin/package.json b/packages/host-bin/package.json
index 63b78bc..5f7d0e7 100644
--- a/packages/host-bin/package.json
+++ b/packages/host-bin/package.json
@@ -21,6 +21,7 @@
 		"@dispatch/tool-shell": "workspace:*",
 		"@dispatch/tool-edit-file": "workspace:*",
 		"@dispatch/tool-write-file": "workspace:*",
+		"@dispatch/tool-web-search": "workspace:*",
 		"@dispatch/journal-sink": "workspace:*",
 		"@dispatch/lsp": "workspace:*",
 		"@dispatch/surface-loaded-extensions": "workspace:*",
diff --git a/packages/host-bin/src/main.ts b/packages/host-bin/src/main.ts
index 1928a8a..1c122de 100644
--- a/packages/host-bin/src/main.ts
+++ b/packages/host-bin/src/main.ts
@@ -32,6 +32,7 @@ import { extension as throughputStoreExt } from "@dispatch/throughput-store";
 import { extension as toolEditFileExt } from "@dispatch/tool-edit-file";
 import { extension as toolReadFileExt } from "@dispatch/tool-read-file";
 import { extension as toolShellExt } from "@dispatch/tool-shell";
+import { extension as toolWebSearchExt } from "@dispatch/tool-web-search";
 import { extension as toolWriteFileExt } from "@dispatch/tool-write-file";
 import { createTransportHttpExtension } from "@dispatch/transport-http";
 import { createTransportWsExtension } from "@dispatch/transport-ws";
@@ -75,6 +76,7 @@ const CORE_EXTENSIONS: readonly Extension[] = [
 	toolReadFileExt,
 	toolShellExt,
 	toolWriteFileExt,
+	toolWebSearchExt,
 	throughputStoreExt,
 	messageQueueExt,
 	sessionOrchestratorExt,
diff --git a/packages/tool-web-search/package.json b/packages/tool-web-search/package.json
new file mode 100644
index 0000000..c41ab7b
--- /dev/null
+++ b/packages/tool-web-search/package.json
@@ -0,0 +1,11 @@
+{
+	"name": "@dispatch/tool-web-search",
+	"version": "0.0.0",
+	"type": "module",
+	"private": true,
+	"main": "dist/index.js",
+	"types": "dist/index.d.ts",
+	"dependencies": {
+		"@dispatch/kernel": "workspace:*"
+	}
+}
diff --git a/packages/tool-web-search/src/client.test.ts b/packages/tool-web-search/src/client.test.ts
new file mode 100644
index 0000000..f020a83
--- /dev/null
+++ b/packages/tool-web-search/src/client.test.ts
@@ -0,0 +1,208 @@
+import { describe, expect, it } from "vitest";
+import { createFirecrawlClient, type FetchLike } from "./client.js";
+
+function jsonResponse(body: unknown, status = 200): Response {
+	return new Response(JSON.stringify(body), {
+		status,
+		headers: { "Content-Type": "application/json" },
+	});
+}
+
+interface CapturedCall {
+	url: string;
+	method?: string | undefined;
+	body?: string | undefined;
+}
+
+/** Builds a fake fetch that returns scripted responses in order, capturing each call. */
+function makeFetch(responses: Response[]): { fetchFn: FetchLike; calls: CapturedCall[] } {
+	const calls: CapturedCall[] = [];
+	let i = 0;
+	const fetchFn: FetchLike = (async (input: string | URL | Request, init?: RequestInit) => {
+		const url =
+			typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
+		calls.push({
+			url,
+			method: init?.method,
+			body: typeof init?.body === "string" ? init.body : undefined,
+		});
+		return responses[i++] ?? jsonResponse({});
+	}) as unknown as FetchLike;
+	return { fetchFn, calls };
+}
+
+const BASE = "http://test-firecrawl.local/v1";
+const signal = (): AbortSignal => new AbortController().signal;
+
+describe("createFirecrawlClient.search", () => {
+	it("sends POST /search with correct body", async () => {
+		const { fetchFn, calls } = makeFetch([jsonResponse({ success: true, data: [] })]);
+		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+		await client.search({ query: "hello", limit: 7 }, signal());
+
+		const call = calls[0];
+		if (!call) throw new Error("no call captured");
+		expect(call.url).toBe(`${BASE}/search`);
+		expect(call.method).toBe("POST");
+		expect(JSON.parse(call.body ?? "{}")).toEqual({ query: "hello", limit: 7 });
+	});
+
+	it("returns parsed data on success", async () => {
+		const data = [{ title: "T", url: "http://x", description: "d" }];
+		const { fetchFn } = makeFetch([jsonResponse({ success: true, data })]);
+		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+		const result = await client.search({ query: "hello", limit: 7 }, signal());
+		expect(result).toEqual(data);
+	});
+
+	it("throws on !success", async () => {
+		const { fetchFn } = makeFetch([jsonResponse({ success: false, error: "boom" })]);
+		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+		await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("boom");
+	});
+});
+
+describe("createFirecrawlClient.scrape", () => {
+	it("sends POST /scrape with correct body", async () => {
+		const { fetchFn, calls } = makeFetch([
+			jsonResponse({ success: true, data: { markdown: "md", metadata: { title: "T" } } }),
+		]);
+		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+		await client.scrape({ url: "http://x", formats: ["markdown"] }, signal());
+
+		const call = calls[0];
+		if (!call) throw new Error("no call captured");
+		expect(call.url).toBe(`${BASE}/scrape`);
+		expect(call.method).toBe("POST");
+		expect(JSON.parse(call.body ?? "{}")).toEqual({
+			url: "http://x",
+			formats: ["markdown"],
+			onlyMainContent: true,
+		});
+	});
+});
+
+describe("createFirecrawlClient.crawl", () => {
+	it("polls status URL until completed", async () => {
+		const { fetchFn, calls } = makeFetch([
+			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+			jsonResponse({ status: "scraping" }),
+			jsonResponse({
+				status: "completed",
+				data: [{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }],
+			}),
+		]);
+		const client = createFirecrawlClient({
+			baseUrl: BASE,
+			fetchFn,
+			sleep: async () => {},
+		});
+		const pages = await client.crawl(
+			{ url: "http://site", limit: 3, formats: ["markdown"] },
+			signal(),
+		);
+		expect(pages).toEqual([{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }]);
+		expect(calls.length).toBe(3);
+	});
+
+	it("returns data when completed", async () => {
+		const { fetchFn } = makeFetch([
+			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+			jsonResponse({
+				status: "completed",
+				data: [{ markdown: "page", metadata: { title: "T" } }],
+			}),
+		]);
+		const client = createFirecrawlClient({
+			baseUrl: BASE,
+			fetchFn,
+			sleep: async () => {},
+		});
+		const pages = await client.crawl(
+			{ url: "http://site", limit: 3, formats: ["markdown"] },
+			signal(),
+		);
+		expect(pages.length).toBe(1);
+		expect(pages[0]?.markdown).toBe("page");
+	});
+
+	it("throws when status is failed", async () => {
+		const { fetchFn } = makeFetch([
+			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+			jsonResponse({ status: "failed", error: "boom" }),
+		]);
+		const client = createFirecrawlClient({
+			baseUrl: BASE,
+			fetchFn,
+			sleep: async () => {},
+		});
+		await expect(
+			client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, signal()),
+		).rejects.toThrow("failed");
+	});
+
+	it("respects abort signal (stops polling)", async () => {
+		const controller = new AbortController();
+		const { fetchFn, calls } = makeFetch([
+			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+		]);
+		const client = createFirecrawlClient({
+			baseUrl: BASE,
+			fetchFn,
+			sleep: async (_ms, sig) => {
+				controller.abort();
+				if (sig.aborted) throw new Error("Request aborted.");
+			},
+		});
+		await expect(
+			client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, controller.signal),
+		).rejects.toThrow();
+		expect(calls.length).toBe(1);
+	});
+});
+
+describe("createFirecrawlClient.map", () => {
+	it("sends POST /map and returns links", async () => {
+		const { fetchFn, calls } = makeFetch([
+			jsonResponse({ success: true, links: ["http://a", "http://b"] }),
+		]);
+		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+		const links = await client.map("http://site", signal());
+		expect(links).toEqual(["http://a", "http://b"]);
+
+		const call = calls[0];
+		if (!call) throw new Error("no call captured");
+		expect(call.url).toBe(`${BASE}/map`);
+		expect(call.method).toBe("POST");
+		expect(JSON.parse(call.body ?? "{}")).toEqual({ url: "http://site" });
+	});
+});
+
+describe("createFirecrawlClient.request (error paths)", () => {
+	it("throws on HTTP error", async () => {
+		const { fetchFn } = makeFetch([
+			new Response("not found", { status: 404, statusText: "Not Found" }),
+		]);
+		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+		await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("HTTP 404");
+	});
+
+	it("throws on timeout", async () => {
+		const fetchFn: FetchLike = ((_input: string | URL | Request, init?: RequestInit) =>
+			new Promise<Response>((_resolve, reject) => {
+				const sig = init?.signal;
+				if (!sig) return;
+				sig.addEventListener("abort", () => {
+					const err = new Error("aborted");
+					err.name = "AbortError";
+					reject(err);
+				});
+			})) as unknown as FetchLike;
+		const client = createFirecrawlClient({
+			baseUrl: BASE,
+			fetchFn,
+			timeoutMs: 10,
+		});
+		await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("timed out");
+	});
+});
diff --git a/packages/tool-web-search/src/client.ts b/packages/tool-web-search/src/client.ts
new file mode 100644
index 0000000..071ba97
--- /dev/null
+++ b/packages/tool-web-search/src/client.ts
@@ -0,0 +1,243 @@
+/**
+ * FirecrawlClient — the injected outermost edge for the web_search tool.
+ *
+ * All effects (fetch, sleep, clock) are injected so the pure decision logic
+ * remains testable without real I/O. The factory builds four methods
+ * (`search`, `scrape`, `crawl`, `map`) over a self-hosted Firecrawl instance
+ * (no API key). `crawl` polls a status URL until the crawl completes or fails.
+ */
+
+import type { CrawlPage, ScrapeResult, SearchHit } from "./format.js";
+
+export type FetchLike = typeof globalThis.fetch;
+
+export const DEFAULT_BASE_URL = "http://100.102.55.49:31329/v1";
+export const DEFAULT_TIMEOUT_MS = 30_000;
+export const CRAWL_POLL_MS = 2_000;
+export const CRAWL_MAX_WAIT_MS = 5 * 60 * 1_000;
+
+export interface SearchParams {
+	readonly query: string;
+	readonly limit: number;
+	readonly lang?: string;
+	readonly country?: string;
+	readonly scrapeOptions?: {
+		readonly formats: readonly string[];
+		readonly onlyMainContent: boolean;
+	};
+}
+
+export interface ScrapeParams {
+	readonly url: string;
+	readonly formats: readonly string[];
+}
+
+export interface CrawlParams {
+	readonly url: string;
+	readonly limit: number;
+	readonly formats: readonly string[];
+}
+
+export interface FirecrawlClient {
+	readonly search: (params: SearchParams, signal: AbortSignal) => Promise<readonly SearchHit[]>;
+	readonly scrape: (params: ScrapeParams, signal: AbortSignal) => Promise<ScrapeResult>;
+	readonly crawl: (params: CrawlParams, signal: AbortSignal) => Promise<readonly CrawlPage[]>;
+	readonly map: (url: string, signal: AbortSignal) => Promise<readonly string[]>;
+}
+
+export interface FirecrawlClientDeps {
+	readonly baseUrl: string;
+	readonly fetchFn: FetchLike;
+	readonly timeoutMs?: number;
+	readonly pollMs?: number;
+	readonly maxWaitMs?: number;
+	readonly now?: () => number;
+	readonly sleep?: (ms: number, signal: AbortSignal) => Promise<void>;
+}
+
+interface SearchResponse {
+	readonly success: boolean;
+	readonly data?: readonly SearchHit[];
+	readonly error?: string;
+}
+
+interface ScrapeResponse {
+	readonly success: boolean;
+	readonly data?: {
+		readonly markdown?: string;
+		readonly metadata?: { readonly title?: string };
+	};
+	readonly error?: string;
+}
+
+interface CrawlStartResponse {
+	readonly success: boolean;
+	readonly url?: string;
+	readonly error?: string;
+}
+
+interface CrawlStatusResponse {
+	readonly status: string;
+	readonly data?: readonly CrawlPage[];
+	readonly error?: string;
+}
+
+interface MapResponse {
+	readonly success: boolean;
+	readonly links?: readonly string[];
+	readonly error?: string;
+}
+
+/** Default sleep: resolve after `ms`, reject on abort. */
+async function defaultSleep(ms: number, signal: AbortSignal): Promise<void> {
+	return new Promise<void>((resolve, reject) => {
+		if (signal.aborted) {
+			reject(new Error("Request aborted."));
+			return;
+		}
+		let timer: ReturnType<typeof setTimeout> | undefined;
+		const onAbort = (): void => {
+			if (timer !== undefined) {
+				clearTimeout(timer);
+			}
+			reject(new Error("Request aborted."));
+		};
+		timer = setTimeout(() => {
+			signal.removeEventListener("abort", onAbort);
+			resolve();
+		}, ms);
+		signal.addEventListener("abort", onAbort, { once: true });
+	});
+}
+
+/**
+ * Create a FirecrawlClient. Each method builds a fetch request, calls the
+ * injected `fetchFn`, and handles HTTP + JSON errors. The per-request timeout
+ * is combined with the caller's cancellation signal via `AbortSignal.any`.
+ */
+export function createFirecrawlClient(deps: FirecrawlClientDeps): FirecrawlClient {
+	const baseUrl = deps.baseUrl;
+	const fetchFn = deps.fetchFn;
+	const timeoutMs = deps.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+	const pollMs = deps.pollMs ?? CRAWL_POLL_MS;
+	const maxWaitMs = deps.maxWaitMs ?? CRAWL_MAX_WAIT_MS;
+	const now = deps.now ?? Date.now;
+	const sleep = deps.sleep ?? defaultSleep;
+
+	async function request(
+		method: "POST" | "GET",
+		url: string,
+		body: unknown,
+		signal: AbortSignal,
+	): Promise<unknown> {
+		const controller = new AbortController();
+		const timeout = setTimeout(() => controller.abort(), timeoutMs);
+		const combined = AbortSignal.any([signal, controller.signal]);
+		try {
+			let response: Response;
+			try {
+				response = await fetchFn(url, {
+					method,
+					headers:
+						body !== undefined
+							? { "Content-Type": "application/json", Accept: "application/json" }
+							: { Accept: "application/json" },
+					body: body !== undefined ? JSON.stringify(body) : undefined,
+					signal: combined,
+				});
+			} catch (err) {
+				if (signal.aborted) {
+					throw new Error("Request aborted.");
+				}
+				if (controller.signal.aborted) {
+					throw new Error(`Firecrawl request timed out after ${timeoutMs / 1000} seconds.`);
+				}
+				throw err;
+			}
+			if (!response.ok) {
+				const text = await response.text().catch(() => "");
+				throw new Error(`HTTP ${response.status} ${response.statusText}${text ? `: ${text}` : ""}`);
+			}
+			try {
+				return await response.json();
+			} catch {
+				throw new Error("Failed to parse Firecrawl response as JSON");
+			}
+		} finally {
+			clearTimeout(timeout);
+		}
+	}
+
+	async function post(endpoint: string, body: unknown, signal: AbortSignal): Promise<unknown> {
+		return request("POST", `${baseUrl}/${endpoint}`, body, signal);
+	}
+
+	return {
+		async search(params: SearchParams, signal: AbortSignal): Promise<readonly SearchHit[]> {
+			const body: Record<string, unknown> = { query: params.query, limit: params.limit };
+			if (params.lang !== undefined) {
+				body.lang = params.lang;
+			}
+			if (params.country !== undefined) {
+				body.country = params.country;
+			}
+			if (params.scrapeOptions !== undefined) {
+				body.scrapeOptions = params.scrapeOptions;
+			}
+			const json = (await post("search", body, signal)) as SearchResponse;
+			if (!json.success) {
+				throw new Error(json.error ?? "Unknown error");
+			}
+			return json.data ?? [];
+		},
+
+		async scrape(params: ScrapeParams, signal: AbortSignal): Promise<ScrapeResult> {
+			const body = {
+				url: params.url,
+				formats: params.formats,
+				onlyMainContent: true,
+			};
+			const json = (await post("scrape", body, signal)) as ScrapeResponse;
+			if (!json.success) {
+				throw new Error(json.error ?? "Unknown error");
+			}
+			return json;
+		},
+
+		async crawl(params: CrawlParams, signal: AbortSignal): Promise<readonly CrawlPage[]> {
+			const body = {
+				url: params.url,
+				limit: params.limit,
+				scrapeOptions: { formats: params.formats, onlyMainContent: true },
+			};
+			const startJson = (await post("crawl", body, signal)) as CrawlStartResponse;
+			if (!startJson.success) {
+				throw new Error(startJson.error ?? "Unknown error");
+			}
+			const statusUrl = startJson.url;
+			if (statusUrl === undefined) {
+				throw new Error("crawl response missing status URL.");
+			}
+			const started = now();
+			while (now() - started < maxWaitMs) {
+				await sleep(pollMs, signal);
+				const status = (await request("GET", statusUrl, undefined, signal)) as CrawlStatusResponse;
+				if (status.status === "completed") {
+					return status.data ?? [];
+				}
+				if (status.status === "failed") {
+					throw new Error(`crawl failed: ${status.error ?? "unknown"}`);
+				}
+			}
+			throw new Error("crawl timed out waiting for completion.");
+		},
+
+		async map(url: string, signal: AbortSignal): Promise<readonly string[]> {
+			const json = (await post("map", { url }, signal)) as MapResponse;
+			if (!json.success) {
+				throw new Error(json.error ?? "Unknown error");
+			}
+			return json.links ?? [];
+		},
+	};
+}
diff --git a/packages/tool-web-search/src/extension.test.ts b/packages/tool-web-search/src/extension.test.ts
new file mode 100644
index 0000000..6e0a6bc
--- /dev/null
+++ b/packages/tool-web-search/src/extension.test.ts
@@ -0,0 +1,113 @@
+import { createLogger, type HostAPI, type ToolExecuteContext } from "@dispatch/kernel";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { activate, extension, manifest } from "./extension.js";
+
+function stubCtx(overrides?: Partial<ToolExecuteContext>): ToolExecuteContext {
+	return {
+		toolCallId: "test-call-1",
+		onOutput: () => {},
+		signal: new AbortController().signal,
+		log: createLogger(
+			{ extensionId: "test" },
+			{ emit: () => {} },
+			{ now: () => 0, newId: () => "id" },
+		),
+		...overrides,
+	};
+}
+
+function makeFakeHost(): { host: HostAPI; defineTool: ReturnType<typeof vi.fn> } {
+	const defineTool = vi.fn();
+	const host = {
+		defineTool,
+		logger: {
+			debug: vi.fn(),
+			info: vi.fn(),
+			warn: vi.fn(),
+			error: vi.fn(),
+			span: vi.fn(() => ({ end: vi.fn() })),
+		},
+	} as unknown as HostAPI;
+	return { host, defineTool };
+}
+
+const ORIG_FETCH = globalThis.fetch;
+const ORIG_ENV = process.env.FIRECRAWL_BASE_URL;
+
+function restoreEnv(): void {
+	if (ORIG_ENV === undefined) {
+		delete process.env.FIRECRAWL_BASE_URL;
+	} else {
+		process.env.FIRECRAWL_BASE_URL = ORIG_ENV;
+	}
+}
+
+afterEach(() => {
+	globalThis.fetch = ORIG_FETCH;
+	restoreEnv();
+});
+
+function stubFetchCapture(): { calls: Array<{ url: string }> } {
+	const calls: Array<{ url: string }> = [];
+	globalThis.fetch = vi.fn(async (input: string | URL | Request) => {
+		calls.push({ url: String(input) });
+		return new Response(JSON.stringify({ success: true, data: [] }), {
+			status: 200,
+			headers: { "Content-Type": "application/json" },
+		});
+	}) as unknown as typeof globalThis.fetch;
+	return { calls };
+}
+
+describe("tool-web-search activation", () => {
+	it("registers the 'web_search' tool (defineTool called)", () => {
+		const { host, defineTool } = makeFakeHost();
+		activate(host);
+		expect(defineTool).toHaveBeenCalledTimes(1);
+		const registered = defineTool.mock.calls[0]?.[0];
+		if (!registered) throw new Error("no tool registered");
+		expect(registered.name).toBe("web_search");
+		expect(registered.concurrencySafe).toBe(true);
+	});
+
+	it("uses FIRECRAWL_BASE_URL from env", async () => {
+		process.env.FIRECRAWL_BASE_URL = "http://env-firecrawl.local/v1";
+		const { calls } = stubFetchCapture();
+		const { host, defineTool } = makeFakeHost();
+		activate(host);
+
+		const tool = defineTool.mock.calls[0]?.[0];
+		if (!tool) throw new Error("no tool registered");
+		await tool.execute({ query: "hello" }, stubCtx());
+		expect(calls.length).toBeGreaterThan(0);
+		expect(calls[0]?.url).toContain("http://env-firecrawl.local/v1/search");
+	});
+
+	it("uses default base URL when env unset", async () => {
+		delete process.env.FIRECRAWL_BASE_URL;
+		const { calls } = stubFetchCapture();
+		const { host, defineTool } = makeFakeHost();
+		activate(host);
+
+		const tool = defineTool.mock.calls[0]?.[0];
+		if (!tool) throw new Error("no tool registered");
+		await tool.execute({ query: "hello" }, stubCtx());
+		expect(calls.length).toBeGreaterThan(0);
+		expect(calls[0]?.url).toContain("100.102.55.49:31329/v1/search");
+	});
+});
+
+describe("tool-web-search manifest", () => {
+	it("declares network capability + web_search contribution", () => {
+		expect(manifest.id).toBe("tool-web-search");
+		expect(manifest.capabilities).toEqual({ network: true });
+		expect(manifest.contributes).toEqual({ tools: ["web_search"] });
+		expect(manifest.trust).toBe("bundled");
+		expect(manifest.activation).toBe("eager");
+	});
+
+	it("extension bundles the manifest + activate", () => {
+		expect(extension.manifest).toBe(manifest);
+		expect(typeof extension.activate).toBe("function");
+	});
+});
diff --git a/packages/tool-web-search/src/extension.ts b/packages/tool-web-search/src/extension.ts
new file mode 100644
index 0000000..1d1803d
--- /dev/null
+++ b/packages/tool-web-search/src/extension.ts
@@ -0,0 +1,32 @@
+/**
+ * tool-web-search extension — registers the `web_search` tool backed by a
+ * self-hosted Firecrawl instance on activation.
+ *
+ * The base URL comes from `FIRECRAWL_BASE_URL` (env) with a Tailscale default.
+ * Effects (`globalThis.fetch`) come from the ambient edge here, in the shell —
+ * never in the pure core. Logging is left to the host via `host.logger`/`ctx.log`
+ * (no `console.*`, no hand-rolled logger).
+ */
+
+import type { Extension, HostAPI, Manifest } from "@dispatch/kernel";
+import { createFirecrawlClient, DEFAULT_BASE_URL } from "./client.js";
+import { createWebSearchTool } from "./tool.js";
+
+export const manifest: Manifest = {
+	id: "tool-web-search",
+	name: "Web Search Tool",
+	version: "0.0.0",
+	apiVersion: "^0.1.0",
+	trust: "bundled",
+	activation: "eager",
+	capabilities: { network: true },
+	contributes: { tools: ["web_search"] },
+};
+
+export function activate(host: HostAPI): void {
+	const baseUrl = process.env.FIRECRAWL_BASE_URL ?? DEFAULT_BASE_URL;
+	const client = createFirecrawlClient({ baseUrl, fetchFn: globalThis.fetch });
+	host.defineTool(createWebSearchTool({ client }));
+}
+
+export const extension: Extension = { manifest, activate };
diff --git a/packages/tool-web-search/src/format.test.ts b/packages/tool-web-search/src/format.test.ts
new file mode 100644
index 0000000..b98bc02
--- /dev/null
+++ b/packages/tool-web-search/src/format.test.ts
@@ -0,0 +1,87 @@
+import { describe, expect, it } from "vitest";
+import {
+	formatCrawlResults,
+	formatMapResults,
+	formatScrapeResult,
+	formatSearchResults,
+	truncateOutput,
+} from "./format.js";
+
+describe("formatSearchResults", () => {
+	it("formats title + url + description + optional markdown", () => {
+		const out = formatSearchResults([
+			{ title: "T1", url: "http://a", description: "desc", markdown: "md-body" },
+		]);
+		expect(out).toBe("### T1\nhttp://a\n\ndesc\n\nmd-body");
+	});
+
+	it("joins multiple results with ---", () => {
+		const out = formatSearchResults([
+			{ title: "T1", url: "http://a", description: "d1" },
+			{ title: "T2", url: "http://b", description: "d2" },
+		]);
+		expect(out).toBe("### T1\nhttp://a\n\nd1\n\n---\n\n### T2\nhttp://b\n\nd2");
+	});
+
+	it("empty data returns 'No results found.'", () => {
+		expect(formatSearchResults([])).toBe("No results found.");
+		expect(formatSearchResults(null)).toBe("No results found.");
+		expect(formatSearchResults(undefined)).toBe("No results found.");
+	});
+});
+
+describe("formatScrapeResult", () => {
+	it("formats title + markdown", () => {
+		const out = formatScrapeResult({
+			data: { markdown: "body", metadata: { title: "Title" } },
+		});
+		expect(out).toBe("# Title\n\nbody");
+	});
+
+	it("omits title header when absent", () => {
+		const out = formatScrapeResult({ data: { markdown: "body" } });
+		expect(out).toBe("body");
+	});
+});
+
+describe("formatCrawlResults", () => {
+	it("formats multiple pages", () => {
+		const out = formatCrawlResults([
+			{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } },
+			{ markdown: "p2", metadata: { title: "P2", url: "http://p2" } },
+		]);
+		expect(out).toBe("## P1\nhttp://p1\n\np1\n\n---\n\n## P2\nhttp://p2\n\np2");
+	});
+
+	it("empty data returns 'No pages crawled.'", () => {
+		expect(formatCrawlResults([])).toBe("No pages crawled.");
+		expect(formatCrawlResults(null)).toBe("No pages crawled.");
+	});
+});
+
+describe("formatMapResults", () => {
+	it("formats links as bullet list", () => {
+		const out = formatMapResults(["http://a", "http://b"]);
+		expect(out).toBe("- http://a\n- http://b");
+	});
+
+	it("empty links returns 'No links found.'", () => {
+		expect(formatMapResults([])).toBe("No links found.");
+		expect(formatMapResults(null)).toBe("No links found.");
+	});
+});
+
+describe("truncateOutput", () => {
+	it("truncates with notice when over cap", () => {
+		const output = "a".repeat(100);
+		const result = truncateOutput(output, 50);
+		expect(result).toContain("a".repeat(50));
+		expect(result).toContain("[Output truncated: exceeded 50 characters]");
+		expect(result.length).toBeLessThan(output.length + 100);
+	});
+
+	it("returns as-is when under cap", () => {
+		expect(truncateOutput("short", 100)).toBe("short");
+		expect(truncateOutput("exact", 5)).toBe("exact");
+	});
+});
diff --git a/packages/tool-web-search/src/format.ts b/packages/tool-web-search/src/format.ts
new file mode 100644
index 0000000..cfc9aa0
--- /dev/null
+++ b/packages/tool-web-search/src/format.ts
@@ -0,0 +1,111 @@
+/**
+ * Pure formatters for the web_search tool — input → output, no I/O.
+ *
+ * These mirror the proven opencode Firecrawl tool's formatting, isolated
+ * (not imported) per the isolation-over-DRY rule. Tested directly with
+ * zero mocks.
+ */
+
+/** A single search hit from Firecrawl's `/search` endpoint. */
+export interface SearchHit {
+	readonly title?: string;
+	readonly url?: string;
+	readonly description?: string;
+	readonly markdown?: string;
+}
+
+/** One page from a completed crawl (`/crawl` status `data`). */
+export interface CrawlPage {
+	readonly markdown?: string;
+	readonly metadata?: {
+		readonly title?: string;
+		readonly sourceURL?: string;
+		readonly url?: string;
+	};
+}
+
+/** The scrape response payload (`/scrape` `data`). */
+export interface ScrapeResult {
+	readonly data?: {
+		readonly markdown?: string;
+		readonly metadata?: { readonly title?: string };
+	};
+}
+
+/**
+ * Truncate output to `cap` characters with a trailing notice, identical in
+ * spirit to tool-shell. Duplication across features is the intended trade.
+ */
+export function truncateOutput(output: string, cap: number): string {
+	if (output.length <= cap) {
+		return output;
+	}
+	const truncated = output.slice(0, cap);
+	return `${truncated}\n\n[Output truncated: exceeded ${cap} characters]`;
+}
+
+/**
+ * Format search hits as `### title\nurl\n\ndescription` (+ optional markdown),
+ * joined by `---` separators. Empty → `"No results found."`.
+ */
+export function formatSearchResults(data: readonly SearchHit[] | null | undefined): string {
+	if (!data || data.length === 0) {
+		return "No results found.";
+	}
+	const parts: string[] = [];
+	for (const r of data) {
+		const title = r.title ?? "(no title)";
+		const url = r.url ?? "";
+		const description = r.description ?? "";
+		let section = `### ${title}\n${url}\n\n${description}`;
+		if (r.markdown) {
+			section += `\n\n${r.markdown}`;
+		}
+		parts.push(section);
+	}
+	return parts.join("\n\n---\n\n");
+}
+
+/**
+ * Format a scrape response as `# title\n\nmarkdown`, omitting the header when
+ * the title is absent.
+ */
+export function formatScrapeResult(json: ScrapeResult): string {
+	const md = json.data?.markdown ?? "";
+	const title = json.data?.metadata?.title;
+	if (title) {
+		return `# ${title}\n\n${md}`;
+	}
+	return md;
+}
+
+/**
+ * Format crawled pages as `## title\nurl\n\nmarkdown` each, joined by `---`.
+ * Empty → `"No pages crawled."`.
+ */
+export function formatCrawlResults(data: readonly CrawlPage[] | null | undefined): string {
+	if (!data || data.length === 0) {
+		return "No pages crawled.";
+	}
+	const parts: string[] = [];
+	for (const page of data) {
+		const title = page.metadata?.title ?? "(no title)";
+		const url = page.metadata?.sourceURL ?? page.metadata?.url ?? "";
+		let section = `## ${title}\n${url}`;
+		if (page.markdown) {
+			section += `\n\n${page.markdown}`;
+		}
+		parts.push(section);
+	}
+	return parts.join("\n\n---\n\n");
+}
+
+/**
+ * Format discovered links as a bullet list. Empty → `"No links found."`.
+ */
+export function formatMapResults(links: readonly string[] | null | undefined): string {
+	if (!links || links.length === 0) {
+		return "No links found.";
+	}
+	return links.map((l) => `- ${l}`).join("\n");
+}
diff --git a/packages/tool-web-search/src/index.ts b/packages/tool-web-search/src/index.ts
new file mode 100644
index 0000000..69894d1
--- /dev/null
+++ b/packages/tool-web-search/src/index.ts
@@ -0,0 +1,40 @@
+export {
+	CRAWL_MAX_WAIT_MS,
+	CRAWL_POLL_MS,
+	type CrawlParams,
+	createFirecrawlClient,
+	DEFAULT_BASE_URL,
+	DEFAULT_TIMEOUT_MS,
+	type FetchLike,
+	type FirecrawlClient,
+	type FirecrawlClientDeps,
+	type ScrapeParams,
+	type SearchParams,
+} from "./client.js";
+export { activate, extension, manifest } from "./extension.js";
+export {
+	type CrawlPage,
+	formatCrawlResults,
+	formatMapResults,
+	formatScrapeResult,
+	formatSearchResults,
+	type ScrapeResult,
+	type SearchHit,
+	truncateOutput,
+} from "./format.js";
+export { createWebSearchTool, type WebSearchToolDeps } from "./tool.js";
+export {
+	CRAWL_DEFAULT_LIMIT,
+	type CrawlArgs,
+	FORMATS,
+	type Format,
+	MAX_LIMIT,
+	type MapArgs,
+	MODES,
+	type Mode,
+	type ScrapeArgs,
+	SEARCH_DEFAULT_LIMIT,
+	type SearchArgs,
+	type ValidatedArgs,
+	validateArgs,
+} from "./validate.js";
diff --git a/packages/tool-web-search/src/tool.ts b/packages/tool-web-search/src/tool.ts
new file mode 100644
index 0000000..751278d
--- /dev/null
+++ b/packages/tool-web-search/src/tool.ts
@@ -0,0 +1,142 @@
+/**
+ * web_search tool factory — the imperative shell that binds the pure
+ * validate/format functions to the injected FirecrawlClient edge.
+ *
+ * Mirrors the tool-shell pattern: factory + injected dep + pure helpers +
+ * a `ToolResult` returned per call. Errors surface as `{ isError: true }`
+ * rather than thrown, so the model can react to the message.
+ */
+
+import type { ToolContract, ToolExecuteContext, ToolResult } from "@dispatch/kernel";
+import type { FirecrawlClient } from "./client.js";
+import {
+	formatCrawlResults,
+	formatMapResults,
+	formatScrapeResult,
+	formatSearchResults,
+	truncateOutput,
+} from "./format.js";
+import type { ValidatedArgs } from "./validate.js";
+import { validateArgs } from "./validate.js";
+
+const OUTPUT_CAP = 50_000;
+
+export interface WebSearchToolDeps {
+	readonly client: FirecrawlClient;
+	readonly outputCap?: number;
+}
+
+/** Dispatch validated args to the right client method and format the result. */
+async function runMode(
+	validated: ValidatedArgs,
+	client: FirecrawlClient,
+	signal: AbortSignal,
+): Promise<string> {
+	switch (validated.mode) {
+		case "search": {
+			const hits = await client.search(
+				{
+					query: validated.query,
+					limit: validated.limit,
+					...(validated.scrape
+						? { scrapeOptions: { formats: ["markdown"], onlyMainContent: true } }
+						: {}),
+					...(validated.lang !== undefined ? { lang: validated.lang } : {}),
+					...(validated.country !== undefined ? { country: validated.country } : {}),
+				},
+				signal,
+			);
+			return formatSearchResults(hits);
+		}
+		case "scrape": {
+			const result = await client.scrape(
+				{ url: validated.url, formats: [validated.format] },
+				signal,
+			);
+			return formatScrapeResult(result);
+		}
+		case "crawl": {
+			const pages = await client.crawl(
+				{ url: validated.url, limit: validated.limit, formats: [validated.format] },
+				signal,
+			);
+			return formatCrawlResults(pages);
+		}
+		case "map": {
+			const links = await client.map(validated.url, signal);
+			return formatMapResults(links);
+		}
+	}
+}
+
+/**
+ * Create the `web_search` tool. `concurrencySafe: true` — web search is
+ * idempotent and safe to run alongside other tools. The `network` capability
+ * is declared on the extension manifest (not the tool contract).
+ */
+export function createWebSearchTool(deps: WebSearchToolDeps): ToolContract {
+	const client = deps.client;
+	const cap = deps.outputCap ?? OUTPUT_CAP;
+
+	return {
+		name: "web_search",
+		description:
+			"Access the web via a self-hosted Firecrawl instance. Supports search, " +
+			"single-page scrape, site crawling, and sitemap discovery.",
+		parameters: {
+			type: "object",
+			properties: {
+				query: { type: "string", description: "The search query (search mode)." },
+				url: { type: "string", description: "A URL to scrape, crawl, or map." },
+				mode: {
+					type: "string",
+					enum: ["search", "scrape", "crawl", "map"],
+					description:
+						"Operation mode. 'search' (default when query present), 'scrape' " +
+						"(default when url present), 'crawl' (recursively scrape pages from a site), " +
+						"'map' (discover URLs on a site).",
+				},
+				limit: {
+					type: "number",
+					description: "Max results. Search: default 7, max 10. Crawl: default 3, max 10.",
+				},
+				scrape: {
+					type: "boolean",
+					description: "When searching, also scrape full markdown content of each result page.",
+				},
+				lang: {
+					type: "string",
+					description: 'Language code to filter search results (e.g. "en", "ja").',
+				},
+				country: {
+					type: "string",
+					description: 'Country code to filter search results (e.g. "us", "jp").',
+				},
+				format: {
+					type: "string",
+					enum: ["markdown", "text", "html"],
+					description: "Format for scrape/crawl output (default: markdown).",
+				},
+			},
+		},
+		concurrencySafe: true,
+		async execute(args: unknown, ctx: ToolExecuteContext): Promise<ToolResult> {
+			const validated = validateArgs(args);
+			if ("error" in validated) {
+				return { content: validated.error, isError: true };
+			}
+			const span = ctx.log.span("web_search.execute", { mode: validated.mode });
+			try {
+				const output = await runMode(validated, client, ctx.signal);
+				span.end();
+				return { content: truncateOutput(output, cap) };
+			} catch (err: unknown) {
+				span.end({ err });
+				return {
+					content: `Error: ${err instanceof Error ? err.message : String(err)}`,
+					isError: true,
+				};
+			}
+		},
+	};
+}
diff --git a/packages/tool-web-search/src/validate.test.ts b/packages/tool-web-search/src/validate.test.ts
new file mode 100644
index 0000000..30ae26c
--- /dev/null
+++ b/packages/tool-web-search/src/validate.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, it } from "vitest";
+import {
+	type CrawlArgs,
+	type MapArgs,
+	type ScrapeArgs,
+	type SearchArgs,
+	validateArgs,
+} from "./validate.js";
+
+describe("validateArgs", () => {
+	it("mode defaults to search when query present", () => {
+		const result = validateArgs({ query: "hello" });
+		expect("error" in result).toBe(false);
+		if ("error" in result) return;
+		expect(result.mode).toBe("search");
+		expect((result as SearchArgs).query).toBe("hello");
+	});
+
+	it("mode defaults to scrape when url present (no query)", () => {
+		const result = validateArgs({ url: "http://example.com" });
+		expect("error" in result).toBe(false);
+		if ("error" in result) return;
+		expect(result.mode).toBe("scrape");
+		expect((result as ScrapeArgs).url).toBe("http://example.com");
+	});
+
+	it("explicit mode overrides defaults", () => {
+		const result = validateArgs({ query: "hello", url: "http://x", mode: "map" });
+		expect("error" in result).toBe(false);
+		if ("error" in result) return;
+		expect(result.mode).toBe("map");
+		expect((result as MapArgs).url).toBe("http://x");
+	});
+
+	it("search mode requires query", () => {
+		const result = validateArgs({ mode: "search" });
+		expect(result).toHaveProperty("error");
+	});
+
+	it("scrape/crawl/map modes require url", () => {
+		expect(validateArgs({ mode: "scrape" })).toHaveProperty("error");
+		expect(validateArgs({ mode: "crawl" })).toHaveProperty("error");
+		expect(validateArgs({ mode: "map" })).toHaveProperty("error");
+	});
+
+	it("limit clamped to max 10", () => {
+		const result = validateArgs({ query: "hello", limit: 50 });
+		expect("error" in result).toBe(false);
+		if ("error" in result) return;
+		expect((result as SearchArgs).limit).toBe(10);
+	});
+
+	it("limit defaults to 7 (search) / 3 (crawl)", () => {
+		const search = validateArgs({ query: "hello" });
+		expect("error" in search).toBe(false);
+		if ("error" in search) return;
+		expect((search as SearchArgs).limit).toBe(7);
+
+		const crawl = validateArgs({ url: "http://x", mode: "crawl" });
+		expect("error" in crawl).toBe(false);
+		if ("error" in crawl) return;
+		expect((crawl as CrawlArgs).limit).toBe(3);
+	});
+
+	it("format defaults to markdown", () => {
+		const result = validateArgs({ query: "hello" });
+		expect("error" in result).toBe(false);
+		if ("error" in result) return;
+		expect(result.format).toBe("markdown");
+	});
+
+	it("rejects invalid mode", () => {
+		const result = validateArgs({ mode: "invalid" });
+		expect(result).toHaveProperty("error");
+		if (!("error" in result)) return;
+		expect(result.error).toContain("Invalid mode");
+	});
+
+	it("rejects invalid format", () => {
+		const result = validateArgs({ url: "http://x", format: "pdf" });
+		expect(result).toHaveProperty("error");
+		if (!("error" in result)) return;
+		expect(result.error).toContain("Invalid format");
+	});
+
+	it("returns error for null/non-object args", () => {
+		expect(validateArgs(null)).toHaveProperty("error");
+		expect(validateArgs(undefined)).toHaveProperty("error");
+		expect(validateArgs("string")).toHaveProperty("error");
+		expect(validateArgs(42)).toHaveProperty("error");
+	});
+});
diff --git a/packages/tool-web-search/src/validate.ts b/packages/tool-web-search/src/validate.ts
new file mode 100644
index 0000000..56bd356
--- /dev/null
+++ b/packages/tool-web-search/src/validate.ts
@@ -0,0 +1,212 @@
+/**
+ * Pure argument validation for the web_search tool — input → output, no I/O.
+ *
+ * Resolves the operation mode (explicit, or inferred from `query`/`url`),
+ * applies per-mode field requirements, clamps `limit`, and defaults `format`.
+ * Returns a discriminated union so the tool's dispatch narrows by `mode`.
+ */
+
+export const MODES = ["search", "scrape", "crawl", "map"] as const;
+export type Mode = (typeof MODES)[number];
+
+export const FORMATS = ["markdown", "text", "html"] as const;
+export type Format = (typeof FORMATS)[number];
+
+export const SEARCH_DEFAULT_LIMIT = 7;
+export const CRAWL_DEFAULT_LIMIT = 3;
+export const MAX_LIMIT = 10;
+
+interface BaseArgs {
+	readonly format: Format;
+}
+
+export interface SearchArgs extends BaseArgs {
+	readonly mode: "search";
+	readonly query: string;
+	readonly limit: number;
+	readonly scrape: boolean;
+	readonly lang?: string;
+	readonly country?: string;
+}
+
+export interface ScrapeArgs extends BaseArgs {
+	readonly mode: "scrape";
+	readonly url: string;
+}
+
+export interface CrawlArgs extends BaseArgs {
+	readonly mode: "crawl";
+	readonly url: string;
+	readonly limit: number;
+}
+
+export interface MapArgs extends BaseArgs {
+	readonly mode: "map";
+	readonly url: string;
+}
+
+export type ValidatedArgs = SearchArgs | ScrapeArgs | CrawlArgs | MapArgs;
+
+export type ValidationError = { readonly error: string };
+
+type Result<T> = { readonly value: T } | ValidationError;
+
+function resolveFormat(raw: unknown): Result<Format> {
+	if (raw === undefined || raw === null) {
+		return { value: "markdown" };
+	}
+	if (typeof raw === "string" && (FORMATS as readonly string[]).includes(raw)) {
+		return { value: raw as Format };
+	}
+	return {
+		error: `Error: Invalid format "${String(raw)}" (must be one of: markdown, text, html).`,
+	};
+}
+
+function resolveMode(raw: unknown, query: unknown, url: unknown): Result<Mode> {
+	if (raw === undefined || raw === null) {
+		const hasQuery = typeof query === "string" && query.trim().length > 0;
+		const hasUrl = typeof url === "string" && url.trim().length > 0;
+		return { value: hasQuery ? "search" : hasUrl ? "scrape" : "search" };
+	}
+	if (typeof raw === "string" && (MODES as readonly string[]).includes(raw)) {
+		return { value: raw as Mode };
+	}
+	return {
+		error: `Error: Invalid mode "${String(raw)}" (must be one of: search, scrape, crawl, map).`,
+	};
+}
+
+function optionalString(raw: unknown, name: string): Result<string | undefined> {
+	if (raw === undefined || raw === null) {
+		return { value: undefined };
+	}
+	if (typeof raw === "string") {
+		return { value: raw };
+	}
+	return { error: `Error: "${name}" must be a string.` };
+}
+
+function resolveLimit(raw: unknown, defaultLimit: number): Result<number> {
+	if (raw === undefined || raw === null) {
+		return { value: defaultLimit };
+	}
+	const n = Number(raw);
+	if (!Number.isFinite(n) || n < 1) {
+		return { error: 'Error: "limit" must be a positive number.' };
+	}
+	return { value: Math.min(Math.floor(n), MAX_LIMIT) };
+}
+
+function resolveBoolean(raw: unknown, name: string): Result<boolean> {
+	if (raw === undefined || raw === null) {
+		return { value: false };
+	}
+	if (typeof raw === "boolean") {
+		return { value: raw };
+	}
+	return { error: `Error: "${name}" must be a boolean.` };
+}
+
+/**
+ * Validate raw tool args and resolve a typed, mode-aware `ValidatedArgs`.
+ * Returns `{ error }` for invalid input — the tool surfaces it verbatim.
+ */
+export function validateArgs(args: unknown): ValidatedArgs | ValidationError {
+	if (args === null || args === undefined || typeof args !== "object") {
+		return { error: "Error: Arguments must be an object." };
+	}
+	const obj = args as Record<string, unknown>;
+
+	const format = resolveFormat(obj.format);
+	if ("error" in format) {
+		return format;
+	}
+
+	const mode = resolveMode(obj.mode, obj.query, obj.url);
+	if ("error" in mode) {
+		return mode;
+	}
+
+	const query = optionalString(obj.query, "query");
+	if ("error" in query) {
+		return query;
+	}
+
+	const url = optionalString(obj.url, "url");
+	if ("error" in url) {
+		return url;
+	}
+
+	switch (mode.value) {
+		case "search": {
+			if (query.value === undefined || query.value.trim().length === 0) {
+				return { error: "Error: query is required for search mode." };
+			}
+			const limit = resolveLimit(obj.limit, SEARCH_DEFAULT_LIMIT);
+			if ("error" in limit) {
+				return limit;
+			}
+			const scrape = resolveBoolean(obj.scrape, "scrape");
+			if ("error" in scrape) {
+				return scrape;
+			}
+			const lang = optionalString(obj.lang, "lang");
+			if ("error" in lang) {
+				return lang;
+			}
+			const country = optionalString(obj.country, "country");
+			if ("error" in country) {
+				return country;
+			}
+			const result: SearchArgs = {
+				mode: "search",
+				query: query.value,
+				limit: limit.value,
+				scrape: scrape.value,
+				format: format.value,
+				...(lang.value !== undefined ? { lang: lang.value } : {}),
+				...(country.value !== undefined ? { country: country.value } : {}),
+			};
+			return result;
+		}
+		case "scrape": {
+			if (url.value === undefined || url.value.trim().length === 0) {
+				return { error: "Error: url is required for scrape mode." };
+			}
+			const result: ScrapeArgs = {
+				mode: "scrape",
+				url: url.value,
+				format: format.value,
+			};
+			return result;
+		}
+		case "crawl": {
+			if (url.value === undefined || url.value.trim().length === 0) {
+				return { error: "Error: url is required for crawl mode." };
+			}
+			const limit = resolveLimit(obj.limit, CRAWL_DEFAULT_LIMIT);
+			if ("error" in limit) {
+				return limit;
+			}
+			const result: CrawlArgs = {
+				mode: "crawl",
+				url: url.value,
+				limit: limit.value,
+				format: format.value,
+			};
+			return result;
+		}
+		case "map": {
+			if (url.value === undefined || url.value.trim().length === 0) {
+				return { error: "Error: url is required for map mode." };
+			}
+			const result: MapArgs = {
+				mode: "map",
+				url: url.value,
+				format: format.value,
+			};
+			return result;
+		}
+	}
+}
diff --git a/packages/tool-web-search/tsconfig.json b/packages/tool-web-search/tsconfig.json
new file mode 100644
index 0000000..ff99a43
--- /dev/null
+++ b/packages/tool-web-search/tsconfig.json
@@ -0,0 +1,6 @@
+{
+	"extends": "../../tsconfig.base.json",
+	"compilerOptions": { "rootDir": "src", "outDir": "dist", "composite": true },
+	"include": ["src/**/*.ts"],
+	"references": [{ "path": "../kernel" }]
+}
diff --git a/tasks.md b/tasks.md
index 0efc511..a374562 100644
--- a/tasks.md
+++ b/tasks.md
@@ -5,7 +5,7 @@
 > Keep this lean and current; do not let it re-accrete a step-by-step changelog.
 
 ## Status (current)
-`tsc -b` EXIT 0 · biome clean · **1059 vitest + 199 transport bun green**.
+`tsc -b` EXIT 0 · biome clean · **1097 vitest + 199 transport bun green**.
 
 Built and verified live (full-fidelity: every feature is a manifest-loaded
 extension through the host):
@@ -454,7 +454,19 @@ path**: first extract a generic `@dispatch/openai-stream` library from
   **Boot smoke:** without `UMANS_API_KEY` → `"provider-umans: no UMANS_API_KEY. Provider
   not registered."` (graceful skip); with `UMANS_API_KEY=sk-test` → `"provider-umans:
   registered (model=umans-coder)"`.
-- [ ] Live-verify against the real Umans API (not yet exercised end-to-end).
+- [x] **LIVE-VERIFIED against the real Umans API:** the dev stack (umans-glm-5.2) called
+  `web_search` (Firecrawl) in a real turn — first live Umans API call, clean response.
+
+## web_search tool — Firecrawl (DONE)
+Standard tool extension `tool-web-search` backed by a self-hosted Firecrawl instance
+(`http://100.102.55.49:31329/v1`, Tailscale, no API key). One tool `web_search` with 4
+modes: search, scrape, crawl (polls status URL), map — mirroring the proven opencode tool.
+Pure core: `validateArgs` (discriminated union by mode) + `format*` functions + `truncateOutput`.
+Injected edge: `FirecrawlClient` (injectable `fetchFn` + `sleep` + `now`), `AbortSignal.any`
+for per-request timeout + caller cancellation. `concurrencySafe: true`, `capabilities: { network: true }`.
+38 tests. Report: `reports/tool-web-search.md`.
+- **LIVE-VERIFIED:** the dev stack (umans-glm-5.2) called `web_search` → Firecrawl returned
+  real results (Paris, France) — first live Umans API call too.
 
 ## Open items
 - **Context window LIMIT (deferred, sibling of context size):** expose the selected model's max
@@ -507,8 +519,7 @@ path**: first extract a generic `@dispatch/openai-stream` library from
 5. **`todo` tool** — a per-conversation task-list tool the model maintains
    (like opencode's todowrite/todoread), as a standard tool extension; likely a
    surface so the FE can render the live list.
- 6. **`web_search` tool** — a web search tool (like old dispatch's;
-    reference-only source at `../dispatch-source`), as a standard tool extension.
+ 6. ~~**`web_search` tool**~~ — **DONE** (see milestone section above).
  7. **Message queue — close-with-queued-messages (deferred product decision):**
     if a client closes a conversation (`POST /conversations/:id/close`) while the
     queue is non-empty, the carry currently still fires (starts a new turn on the
diff --git a/tsconfig.json b/tsconfig.json
index b227e92..d084acb 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -22,6 +22,7 @@
 		{ "path": "./packages/tool-shell" },
 		{ "path": "./packages/tool-edit-file" },
 		{ "path": "./packages/tool-write-file" },
+		{ "path": "./packages/tool-web-search" },
 		{ "path": "./packages/skills" },
 		{ "path": "./packages/cache-warming" },
 		{ "path": "./packages/message-queue" },
author	Adam Malczewski <[email protected]>	2026-06-21 13:11:29 +0900
committer	Adam Malczewski <[email protected]>	2026-06-21 13:11:29 +0900
commit	8a4a624d16422467a8e85434c674bb591877e8ea (patch)
tree	54052da00bbc580742913e5c031b7cc1b160db19
parent	d23de3254374d4d63c8e15c6ab9311c3c6f4da5b (diff)
download	dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.tar.gz dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.zip