summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-21 13:11:29 +0900
committerAdam Malczewski <[email protected]>2026-06-21 13:11:29 +0900
commit8a4a624d16422467a8e85434c674bb591877e8ea (patch)
tree54052da00bbc580742913e5c031b7cc1b160db19
parentd23de3254374d4d63c8e15c6ab9311c3c6f4da5b (diff)
downloaddispatch-8a4a624d16422467a8e85434c674bb591877e8ea.tar.gz
dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.zip
feat(tool-web-search): Firecrawl-backed web search tool
New standard tool extension with one tool web_search supporting 4 modes (search, scrape, crawl, map) against a self-hosted Firecrawl instance. Pure core: validateArgs (discriminated union by mode) + format* functions + truncateOutput. Injected edge: FirecrawlClient (injectable fetchFn/sleep/now, AbortSignal.any for per-request timeout + caller cancellation). concurrencySafe true, capabilities network. 38 tests, zero vi.mock. Live-verified: umans-glm-5.2 called web_search → real Firecrawl results (also the first live Umans API call).
-rw-r--r--bun.lock1
-rw-r--r--packages/host-bin/package.json1
-rw-r--r--packages/host-bin/src/main.ts2
-rw-r--r--packages/tool-web-search/package.json11
-rw-r--r--packages/tool-web-search/src/client.test.ts208
-rw-r--r--packages/tool-web-search/src/client.ts243
-rw-r--r--packages/tool-web-search/src/extension.test.ts113
-rw-r--r--packages/tool-web-search/src/extension.ts32
-rw-r--r--packages/tool-web-search/src/format.test.ts87
-rw-r--r--packages/tool-web-search/src/format.ts111
-rw-r--r--packages/tool-web-search/src/index.ts40
-rw-r--r--packages/tool-web-search/src/tool.ts142
-rw-r--r--packages/tool-web-search/src/validate.test.ts92
-rw-r--r--packages/tool-web-search/src/validate.ts212
-rw-r--r--packages/tool-web-search/tsconfig.json6
-rw-r--r--tasks.md19
-rw-r--r--tsconfig.json1
17 files changed, 1317 insertions, 4 deletions
diff --git a/bun.lock b/bun.lock
index c08d7f9..18f4542 100644
--- a/bun.lock
+++ b/bun.lock
@@ -73,6 +73,7 @@
"@dispatch/tool-edit-file": "workspace:*",
"@dispatch/tool-read-file": "workspace:*",
"@dispatch/tool-shell": "workspace:*",
+ "@dispatch/tool-web-search": "workspace:*",
"@dispatch/tool-write-file": "workspace:*",
"@dispatch/transport-http": "workspace:*",
"@dispatch/transport-ws": "workspace:*",
diff --git a/packages/host-bin/package.json b/packages/host-bin/package.json
index 63b78bc..5f7d0e7 100644
--- a/packages/host-bin/package.json
+++ b/packages/host-bin/package.json
@@ -21,6 +21,7 @@
"@dispatch/tool-shell": "workspace:*",
"@dispatch/tool-edit-file": "workspace:*",
"@dispatch/tool-write-file": "workspace:*",
+ "@dispatch/tool-web-search": "workspace:*",
"@dispatch/journal-sink": "workspace:*",
"@dispatch/lsp": "workspace:*",
"@dispatch/surface-loaded-extensions": "workspace:*",
diff --git a/packages/host-bin/src/main.ts b/packages/host-bin/src/main.ts
index 1928a8a..1c122de 100644
--- a/packages/host-bin/src/main.ts
+++ b/packages/host-bin/src/main.ts
@@ -32,6 +32,7 @@ import { extension as throughputStoreExt } from "@dispatch/throughput-store";
import { extension as toolEditFileExt } from "@dispatch/tool-edit-file";
import { extension as toolReadFileExt } from "@dispatch/tool-read-file";
import { extension as toolShellExt } from "@dispatch/tool-shell";
+import { extension as toolWebSearchExt } from "@dispatch/tool-web-search";
import { extension as toolWriteFileExt } from "@dispatch/tool-write-file";
import { createTransportHttpExtension } from "@dispatch/transport-http";
import { createTransportWsExtension } from "@dispatch/transport-ws";
@@ -75,6 +76,7 @@ const CORE_EXTENSIONS: readonly Extension[] = [
toolReadFileExt,
toolShellExt,
toolWriteFileExt,
+ toolWebSearchExt,
throughputStoreExt,
messageQueueExt,
sessionOrchestratorExt,
diff --git a/packages/tool-web-search/package.json b/packages/tool-web-search/package.json
new file mode 100644
index 0000000..c41ab7b
--- /dev/null
+++ b/packages/tool-web-search/package.json
@@ -0,0 +1,11 @@
+{
+ "name": "@dispatch/tool-web-search",
+ "version": "0.0.0",
+ "type": "module",
+ "private": true,
+ "main": "dist/index.js",
+ "types": "dist/index.d.ts",
+ "dependencies": {
+ "@dispatch/kernel": "workspace:*"
+ }
+}
diff --git a/packages/tool-web-search/src/client.test.ts b/packages/tool-web-search/src/client.test.ts
new file mode 100644
index 0000000..f020a83
--- /dev/null
+++ b/packages/tool-web-search/src/client.test.ts
@@ -0,0 +1,208 @@
+import { describe, expect, it } from "vitest";
+import { createFirecrawlClient, type FetchLike } from "./client.js";
+
+function jsonResponse(body: unknown, status = 200): Response {
+ return new Response(JSON.stringify(body), {
+ status,
+ headers: { "Content-Type": "application/json" },
+ });
+}
+
+interface CapturedCall {
+ url: string;
+ method?: string | undefined;
+ body?: string | undefined;
+}
+
+/** Builds a fake fetch that returns scripted responses in order, capturing each call. */
+function makeFetch(responses: Response[]): { fetchFn: FetchLike; calls: CapturedCall[] } {
+ const calls: CapturedCall[] = [];
+ let i = 0;
+ const fetchFn: FetchLike = (async (input: string | URL | Request, init?: RequestInit) => {
+ const url =
+ typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
+ calls.push({
+ url,
+ method: init?.method,
+ body: typeof init?.body === "string" ? init.body : undefined,
+ });
+ return responses[i++] ?? jsonResponse({});
+ }) as unknown as FetchLike;
+ return { fetchFn, calls };
+}
+
+const BASE = "http://test-firecrawl.local/v1";
+const signal = (): AbortSignal => new AbortController().signal;
+
+describe("createFirecrawlClient.search", () => {
+ it("sends POST /search with correct body", async () => {
+ const { fetchFn, calls } = makeFetch([jsonResponse({ success: true, data: [] })]);
+ const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+ await client.search({ query: "hello", limit: 7 }, signal());
+
+ const call = calls[0];
+ if (!call) throw new Error("no call captured");
+ expect(call.url).toBe(`${BASE}/search`);
+ expect(call.method).toBe("POST");
+ expect(JSON.parse(call.body ?? "{}")).toEqual({ query: "hello", limit: 7 });
+ });
+
+ it("returns parsed data on success", async () => {
+ const data = [{ title: "T", url: "http://x", description: "d" }];
+ const { fetchFn } = makeFetch([jsonResponse({ success: true, data })]);
+ const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+ const result = await client.search({ query: "hello", limit: 7 }, signal());
+ expect(result).toEqual(data);
+ });
+
+ it("throws on !success", async () => {
+ const { fetchFn } = makeFetch([jsonResponse({ success: false, error: "boom" })]);
+ const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+ await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("boom");
+ });
+});
+
+describe("createFirecrawlClient.scrape", () => {
+ it("sends POST /scrape with correct body", async () => {
+ const { fetchFn, calls } = makeFetch([
+ jsonResponse({ success: true, data: { markdown: "md", metadata: { title: "T" } } }),
+ ]);
+ const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+ await client.scrape({ url: "http://x", formats: ["markdown"] }, signal());
+
+ const call = calls[0];
+ if (!call) throw new Error("no call captured");
+ expect(call.url).toBe(`${BASE}/scrape`);
+ expect(call.method).toBe("POST");
+ expect(JSON.parse(call.body ?? "{}")).toEqual({
+ url: "http://x",
+ formats: ["markdown"],
+ onlyMainContent: true,
+ });
+ });
+});
+
+describe("createFirecrawlClient.crawl", () => {
+ it("polls status URL until completed", async () => {
+ const { fetchFn, calls } = makeFetch([
+ jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+ jsonResponse({ status: "scraping" }),
+ jsonResponse({
+ status: "completed",
+ data: [{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }],
+ }),
+ ]);
+ const client = createFirecrawlClient({
+ baseUrl: BASE,
+ fetchFn,
+ sleep: async () => {},
+ });
+ const pages = await client.crawl(
+ { url: "http://site", limit: 3, formats: ["markdown"] },
+ signal(),
+ );
+ expect(pages).toEqual([{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }]);
+ expect(calls.length).toBe(3);
+ });
+
+ it("returns data when completed", async () => {
+ const { fetchFn } = makeFetch([
+ jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+ jsonResponse({
+ status: "completed",
+ data: [{ markdown: "page", metadata: { title: "T" } }],
+ }),
+ ]);
+ const client = createFirecrawlClient({
+ baseUrl: BASE,
+ fetchFn,
+ sleep: async () => {},
+ });
+ const pages = await client.crawl(
+ { url: "http://site", limit: 3, formats: ["markdown"] },
+ signal(),
+ );
+ expect(pages.length).toBe(1);
+ expect(pages[0]?.markdown).toBe("page");
+ });
+
+ it("throws when status is failed", async () => {
+ const { fetchFn } = makeFetch([
+ jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+ jsonResponse({ status: "failed", error: "boom" }),
+ ]);
+ const client = createFirecrawlClient({
+ baseUrl: BASE,
+ fetchFn,
+ sleep: async () => {},
+ });
+ await expect(
+ client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, signal()),
+ ).rejects.toThrow("failed");
+ });
+
+ it("respects abort signal (stops polling)", async () => {
+ const controller = new AbortController();
+ const { fetchFn, calls } = makeFetch([
+ jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
+ ]);
+ const client = createFirecrawlClient({
+ baseUrl: BASE,
+ fetchFn,
+ sleep: async (_ms, sig) => {
+ controller.abort();
+ if (sig.aborted) throw new Error("Request aborted.");
+ },
+ });
+ await expect(
+ client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, controller.signal),
+ ).rejects.toThrow();
+ expect(calls.length).toBe(1);
+ });
+});
+
+describe("createFirecrawlClient.map", () => {
+ it("sends POST /map and returns links", async () => {
+ const { fetchFn, calls } = makeFetch([
+ jsonResponse({ success: true, links: ["http://a", "http://b"] }),
+ ]);
+ const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+ const links = await client.map("http://site", signal());
+ expect(links).toEqual(["http://a", "http://b"]);
+
+ const call = calls[0];
+ if (!call) throw new Error("no call captured");
+ expect(call.url).toBe(`${BASE}/map`);
+ expect(call.method).toBe("POST");
+ expect(JSON.parse(call.body ?? "{}")).toEqual({ url: "http://site" });
+ });
+});
+
+describe("createFirecrawlClient.request (error paths)", () => {
+ it("throws on HTTP error", async () => {
+ const { fetchFn } = makeFetch([
+ new Response("not found", { status: 404, statusText: "Not Found" }),
+ ]);
+ const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
+ await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("HTTP 404");
+ });
+
+ it("throws on timeout", async () => {
+ const fetchFn: FetchLike = ((_input: string | URL | Request, init?: RequestInit) =>
+ new Promise<Response>((_resolve, reject) => {
+ const sig = init?.signal;
+ if (!sig) return;
+ sig.addEventListener("abort", () => {
+ const err = new Error("aborted");
+ err.name = "AbortError";
+ reject(err);
+ });
+ })) as unknown as FetchLike;
+ const client = createFirecrawlClient({
+ baseUrl: BASE,
+ fetchFn,
+ timeoutMs: 10,
+ });
+ await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("timed out");
+ });
+});
diff --git a/packages/tool-web-search/src/client.ts b/packages/tool-web-search/src/client.ts
new file mode 100644
index 0000000..071ba97
--- /dev/null
+++ b/packages/tool-web-search/src/client.ts
@@ -0,0 +1,243 @@
+/**
+ * FirecrawlClient — the injected outermost edge for the web_search tool.
+ *
+ * All effects (fetch, sleep, clock) are injected so the pure decision logic
+ * remains testable without real I/O. The factory builds four methods
+ * (`search`, `scrape`, `crawl`, `map`) over a self-hosted Firecrawl instance
+ * (no API key). `crawl` polls a status URL until the crawl completes or fails.
+ */
+
+import type { CrawlPage, ScrapeResult, SearchHit } from "./format.js";
+
+export type FetchLike = typeof globalThis.fetch;
+
+export const DEFAULT_BASE_URL = "http://100.102.55.49:31329/v1";
+export const DEFAULT_TIMEOUT_MS = 30_000;
+export const CRAWL_POLL_MS = 2_000;
+export const CRAWL_MAX_WAIT_MS = 5 * 60 * 1_000;
+
+export interface SearchParams {
+ readonly query: string;
+ readonly limit: number;
+ readonly lang?: string;
+ readonly country?: string;
+ readonly scrapeOptions?: {
+ readonly formats: readonly string[];
+ readonly onlyMainContent: boolean;
+ };
+}
+
+export interface ScrapeParams {
+ readonly url: string;
+ readonly formats: readonly string[];
+}
+
+export interface CrawlParams {
+ readonly url: string;
+ readonly limit: number;
+ readonly formats: readonly string[];
+}
+
+export interface FirecrawlClient {
+ readonly search: (params: SearchParams, signal: AbortSignal) => Promise<readonly SearchHit[]>;
+ readonly scrape: (params: ScrapeParams, signal: AbortSignal) => Promise<ScrapeResult>;
+ readonly crawl: (params: CrawlParams, signal: AbortSignal) => Promise<readonly CrawlPage[]>;
+ readonly map: (url: string, signal: AbortSignal) => Promise<readonly string[]>;
+}
+
+export interface FirecrawlClientDeps {
+ readonly baseUrl: string;
+ readonly fetchFn: FetchLike;
+ readonly timeoutMs?: number;
+ readonly pollMs?: number;
+ readonly maxWaitMs?: number;
+ readonly now?: () => number;
+ readonly sleep?: (ms: number, signal: AbortSignal) => Promise<void>;
+}
+
+interface SearchResponse {
+ readonly success: boolean;
+ readonly data?: readonly SearchHit[];
+ readonly error?: string;
+}
+
+interface ScrapeResponse {
+ readonly success: boolean;
+ readonly data?: {
+ readonly markdown?: string;
+ readonly metadata?: { readonly title?: string };
+ };
+ readonly error?: string;
+}
+
+interface CrawlStartResponse {
+ readonly success: boolean;
+ readonly url?: string;
+ readonly error?: string;
+}
+
+interface CrawlStatusResponse {
+ readonly status: string;
+ readonly data?: readonly CrawlPage[];
+ readonly error?: string;
+}
+
+interface MapResponse {
+ readonly success: boolean;
+ readonly links?: readonly string[];
+ readonly error?: string;
+}
+
+/** Default sleep: resolve after `ms`, reject on abort. */
+async function defaultSleep(ms: number, signal: AbortSignal): Promise<void> {
+ return new Promise<void>((resolve, reject) => {
+ if (signal.aborted) {
+ reject(new Error("Request aborted."));
+ return;
+ }
+ let timer: ReturnType<typeof setTimeout> | undefined;
+ const onAbort = (): void => {
+ if (timer !== undefined) {
+ clearTimeout(timer);
+ }
+ reject(new Error("Request aborted."));
+ };
+ timer = setTimeout(() => {
+ signal.removeEventListener("abort", onAbort);
+ resolve();
+ }, ms);
+ signal.addEventListener("abort", onAbort, { once: true });
+ });
+}
+
+/**
+ * Create a FirecrawlClient. Each method builds a fetch request, calls the
+ * injected `fetchFn`, and handles HTTP + JSON errors. The per-request timeout
+ * is combined with the caller's cancellation signal via `AbortSignal.any`.
+ */
+export function createFirecrawlClient(deps: FirecrawlClientDeps): FirecrawlClient {
+ const baseUrl = deps.baseUrl;
+ const fetchFn = deps.fetchFn;
+ const timeoutMs = deps.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+ const pollMs = deps.pollMs ?? CRAWL_POLL_MS;
+ const maxWaitMs = deps.maxWaitMs ?? CRAWL_MAX_WAIT_MS;
+ const now = deps.now ?? Date.now;
+ const sleep = deps.sleep ?? defaultSleep;
+
+ async function request(
+ method: "POST" | "GET",
+ url: string,
+ body: unknown,
+ signal: AbortSignal,
+ ): Promise<unknown> {
+ const controller = new AbortController();
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
+ const combined = AbortSignal.any([signal, controller.signal]);
+ try {
+ let response: Response;
+ try {
+ response = await fetchFn(url, {
+ method,
+ headers:
+ body !== undefined
+ ? { "Content-Type": "application/json", Accept: "application/json" }
+ : { Accept: "application/json" },
+ body: body !== undefined ? JSON.stringify(body) : undefined,
+ signal: combined,
+ });
+ } catch (err) {
+ if (signal.aborted) {
+ throw new Error("Request aborted.");
+ }
+ if (controller.signal.aborted) {
+ throw new Error(`Firecrawl request timed out after ${timeoutMs / 1000} seconds.`);
+ }
+ throw err;
+ }
+ if (!response.ok) {
+ const text = await response.text().catch(() => "");
+ throw new Error(`HTTP ${response.status} ${response.statusText}${text ? `: ${text}` : ""}`);
+ }
+ try {
+ return await response.json();
+ } catch {
+ throw new Error("Failed to parse Firecrawl response as JSON");
+ }
+ } finally {
+ clearTimeout(timeout);
+ }
+ }
+
+ async function post(endpoint: string, body: unknown, signal: AbortSignal): Promise<unknown> {
+ return request("POST", `${baseUrl}/${endpoint}`, body, signal);
+ }
+
+ return {
+ async search(params: SearchParams, signal: AbortSignal): Promise<readonly SearchHit[]> {
+ const body: Record<string, unknown> = { query: params.query, limit: params.limit };
+ if (params.lang !== undefined) {
+ body.lang = params.lang;
+ }
+ if (params.country !== undefined) {
+ body.country = params.country;
+ }
+ if (params.scrapeOptions !== undefined) {
+ body.scrapeOptions = params.scrapeOptions;
+ }
+ const json = (await post("search", body, signal)) as SearchResponse;
+ if (!json.success) {
+ throw new Error(json.error ?? "Unknown error");
+ }
+ return json.data ?? [];
+ },
+
+ async scrape(params: ScrapeParams, signal: AbortSignal): Promise<ScrapeResult> {
+ const body = {
+ url: params.url,
+ formats: params.formats,
+ onlyMainContent: true,
+ };
+ const json = (await post("scrape", body, signal)) as ScrapeResponse;
+ if (!json.success) {
+ throw new Error(json.error ?? "Unknown error");
+ }
+ return json;
+ },
+
+ async crawl(params: CrawlParams, signal: AbortSignal): Promise<readonly CrawlPage[]> {
+ const body = {
+ url: params.url,
+ limit: params.limit,
+ scrapeOptions: { formats: params.formats, onlyMainContent: true },
+ };
+ const startJson = (await post("crawl", body, signal)) as CrawlStartResponse;
+ if (!startJson.success) {
+ throw new Error(startJson.error ?? "Unknown error");
+ }
+ const statusUrl = startJson.url;
+ if (statusUrl === undefined) {
+ throw new Error("crawl response missing status URL.");
+ }
+ const started = now();
+ while (now() - started < maxWaitMs) {
+ await sleep(pollMs, signal);
+ const status = (await request("GET", statusUrl, undefined, signal)) as CrawlStatusResponse;
+ if (status.status === "completed") {
+ return status.data ?? [];
+ }
+ if (status.status === "failed") {
+ throw new Error(`crawl failed: ${status.error ?? "unknown"}`);
+ }
+ }
+ throw new Error("crawl timed out waiting for completion.");
+ },
+
+ async map(url: string, signal: AbortSignal): Promise<readonly string[]> {
+ const json = (await post("map", { url }, signal)) as MapResponse;
+ if (!json.success) {
+ throw new Error(json.error ?? "Unknown error");
+ }
+ return json.links ?? [];
+ },
+ };
+}
diff --git a/packages/tool-web-search/src/extension.test.ts b/packages/tool-web-search/src/extension.test.ts
new file mode 100644
index 0000000..6e0a6bc
--- /dev/null
+++ b/packages/tool-web-search/src/extension.test.ts
@@ -0,0 +1,113 @@
+import { createLogger, type HostAPI, type ToolExecuteContext } from "@dispatch/kernel";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { activate, extension, manifest } from "./extension.js";
+
+function stubCtx(overrides?: Partial<ToolExecuteContext>): ToolExecuteContext {
+ return {
+ toolCallId: "test-call-1",
+ onOutput: () => {},
+ signal: new AbortController().signal,
+ log: createLogger(
+ { extensionId: "test" },
+ { emit: () => {} },
+ { now: () => 0, newId: () => "id" },
+ ),
+ ...overrides,
+ };
+}
+
+function makeFakeHost(): { host: HostAPI; defineTool: ReturnType<typeof vi.fn> } {
+ const defineTool = vi.fn();
+ const host = {
+ defineTool,
+ logger: {
+ debug: vi.fn(),
+ info: vi.fn(),
+ warn: vi.fn(),
+ error: vi.fn(),
+ span: vi.fn(() => ({ end: vi.fn() })),
+ },
+ } as unknown as HostAPI;
+ return { host, defineTool };
+}
+
+const ORIG_FETCH = globalThis.fetch;
+const ORIG_ENV = process.env.FIRECRAWL_BASE_URL;
+
+function restoreEnv(): void {
+ if (ORIG_ENV === undefined) {
+ delete process.env.FIRECRAWL_BASE_URL;
+ } else {
+ process.env.FIRECRAWL_BASE_URL = ORIG_ENV;
+ }
+}
+
+afterEach(() => {
+ globalThis.fetch = ORIG_FETCH;
+ restoreEnv();
+});
+
+function stubFetchCapture(): { calls: Array<{ url: string }> } {
+ const calls: Array<{ url: string }> = [];
+ globalThis.fetch = vi.fn(async (input: string | URL | Request) => {
+ calls.push({ url: String(input) });
+ return new Response(JSON.stringify({ success: true, data: [] }), {
+ status: 200,
+ headers: { "Content-Type": "application/json" },
+ });
+ }) as unknown as typeof globalThis.fetch;
+ return { calls };
+}
+
+describe("tool-web-search activation", () => {
+ it("registers the 'web_search' tool (defineTool called)", () => {
+ const { host, defineTool } = makeFakeHost();
+ activate(host);
+ expect(defineTool).toHaveBeenCalledTimes(1);
+ const registered = defineTool.mock.calls[0]?.[0];
+ if (!registered) throw new Error("no tool registered");
+ expect(registered.name).toBe("web_search");
+ expect(registered.concurrencySafe).toBe(true);
+ });
+
+ it("uses FIRECRAWL_BASE_URL from env", async () => {
+ process.env.FIRECRAWL_BASE_URL = "http://env-firecrawl.local/v1";
+ const { calls } = stubFetchCapture();
+ const { host, defineTool } = makeFakeHost();
+ activate(host);
+
+ const tool = defineTool.mock.calls[0]?.[0];
+ if (!tool) throw new Error("no tool registered");
+ await tool.execute({ query: "hello" }, stubCtx());
+ expect(calls.length).toBeGreaterThan(0);
+ expect(calls[0]?.url).toContain("http://env-firecrawl.local/v1/search");
+ });
+
+ it("uses default base URL when env unset", async () => {
+ delete process.env.FIRECRAWL_BASE_URL;
+ const { calls } = stubFetchCapture();
+ const { host, defineTool } = makeFakeHost();
+ activate(host);
+
+ const tool = defineTool.mock.calls[0]?.[0];
+ if (!tool) throw new Error("no tool registered");
+ await tool.execute({ query: "hello" }, stubCtx());
+ expect(calls.length).toBeGreaterThan(0);
+ expect(calls[0]?.url).toContain("100.102.55.49:31329/v1/search");
+ });
+});
+
+describe("tool-web-search manifest", () => {
+ it("declares network capability + web_search contribution", () => {
+ expect(manifest.id).toBe("tool-web-search");
+ expect(manifest.capabilities).toEqual({ network: true });
+ expect(manifest.contributes).toEqual({ tools: ["web_search"] });
+ expect(manifest.trust).toBe("bundled");
+ expect(manifest.activation).toBe("eager");
+ });
+
+ it("extension bundles the manifest + activate", () => {
+ expect(extension.manifest).toBe(manifest);
+ expect(typeof extension.activate).toBe("function");
+ });
+});
diff --git a/packages/tool-web-search/src/extension.ts b/packages/tool-web-search/src/extension.ts
new file mode 100644
index 0000000..1d1803d
--- /dev/null
+++ b/packages/tool-web-search/src/extension.ts
@@ -0,0 +1,32 @@
+/**
+ * tool-web-search extension — registers the `web_search` tool backed by a
+ * self-hosted Firecrawl instance on activation.
+ *
+ * The base URL comes from `FIRECRAWL_BASE_URL` (env) with a Tailscale default.
+ * Effects (`globalThis.fetch`) come from the ambient edge here, in the shell —
+ * never in the pure core. Logging is left to the host via `host.logger`/`ctx.log`
+ * (no `console.*`, no hand-rolled logger).
+ */
+
+import type { Extension, HostAPI, Manifest } from "@dispatch/kernel";
+import { createFirecrawlClient, DEFAULT_BASE_URL } from "./client.js";
+import { createWebSearchTool } from "./tool.js";
+
+export const manifest: Manifest = {
+ id: "tool-web-search",
+ name: "Web Search Tool",
+ version: "0.0.0",
+ apiVersion: "^0.1.0",
+ trust: "bundled",
+ activation: "eager",
+ capabilities: { network: true },
+ contributes: { tools: ["web_search"] },
+};
+
+export function activate(host: HostAPI): void {
+ const baseUrl = process.env.FIRECRAWL_BASE_URL ?? DEFAULT_BASE_URL;
+ const client = createFirecrawlClient({ baseUrl, fetchFn: globalThis.fetch });
+ host.defineTool(createWebSearchTool({ client }));
+}
+
+export const extension: Extension = { manifest, activate };
diff --git a/packages/tool-web-search/src/format.test.ts b/packages/tool-web-search/src/format.test.ts
new file mode 100644
index 0000000..b98bc02
--- /dev/null
+++ b/packages/tool-web-search/src/format.test.ts
@@ -0,0 +1,87 @@
+import { describe, expect, it } from "vitest";
+import {
+ formatCrawlResults,
+ formatMapResults,
+ formatScrapeResult,
+ formatSearchResults,
+ truncateOutput,
+} from "./format.js";
+
+describe("formatSearchResults", () => {
+ it("formats title + url + description + optional markdown", () => {
+ const out = formatSearchResults([
+ { title: "T1", url: "http://a", description: "desc", markdown: "md-body" },
+ ]);
+ expect(out).toBe("### T1\nhttp://a\n\ndesc\n\nmd-body");
+ });
+
+ it("joins multiple results with ---", () => {
+ const out = formatSearchResults([
+ { title: "T1", url: "http://a", description: "d1" },
+ { title: "T2", url: "http://b", description: "d2" },
+ ]);
+ expect(out).toBe("### T1\nhttp://a\n\nd1\n\n---\n\n### T2\nhttp://b\n\nd2");
+ });
+
+ it("empty data returns 'No results found.'", () => {
+ expect(formatSearchResults([])).toBe("No results found.");
+ expect(formatSearchResults(null)).toBe("No results found.");
+ expect(formatSearchResults(undefined)).toBe("No results found.");
+ });
+});
+
+describe("formatScrapeResult", () => {
+ it("formats title + markdown", () => {
+ const out = formatScrapeResult({
+ data: { markdown: "body", metadata: { title: "Title" } },
+ });
+ expect(out).toBe("# Title\n\nbody");
+ });
+
+ it("omits title header when absent", () => {
+ const out = formatScrapeResult({ data: { markdown: "body" } });
+ expect(out).toBe("body");
+ });
+});
+
+describe("formatCrawlResults", () => {
+ it("formats multiple pages", () => {
+ const out = formatCrawlResults([
+ { markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } },
+ { markdown: "p2", metadata: { title: "P2", url: "http://p2" } },
+ ]);
+ expect(out).toBe("## P1\nhttp://p1\n\np1\n\n---\n\n## P2\nhttp://p2\n\np2");
+ });
+
+ it("empty data returns 'No pages crawled.'", () => {
+ expect(formatCrawlResults([])).toBe("No pages crawled.");
+ expect(formatCrawlResults(null)).toBe("No pages crawled.");
+ });
+});
+
+describe("formatMapResults", () => {
+ it("formats links as bullet list", () => {
+ const out = formatMapResults(["http://a", "http://b"]);
+ expect(out).toBe("- http://a\n- http://b");
+ });
+
+ it("empty links returns 'No links found.'", () => {
+ expect(formatMapResults([])).toBe("No links found.");
+ expect(formatMapResults(null)).toBe("No links found.");
+ });
+});
+
+describe("truncateOutput", () => {
+ it("truncates with notice when over cap", () => {
+ const output = "a".repeat(100);
+ const result = truncateOutput(output, 50);
+ expect(result).toContain("a".repeat(50));
+ expect(result).toContain("[Output truncated: exceeded 50 characters]");
+ expect(result.length).toBeLessThan(output.length + 100);
+ });
+
+ it("returns as-is when under cap", () => {
+ expect(truncateOutput("short", 100)).toBe("short");
+ expect(truncateOutput("exact", 5)).toBe("exact");
+ });
+});
diff --git a/packages/tool-web-search/src/format.ts b/packages/tool-web-search/src/format.ts
new file mode 100644
index 0000000..cfc9aa0
--- /dev/null
+++ b/packages/tool-web-search/src/format.ts
@@ -0,0 +1,111 @@
+/**
+ * Pure formatters for the web_search tool — input → output, no I/O.
+ *
+ * These mirror the proven opencode Firecrawl tool's formatting, isolated
+ * (not imported) per the isolation-over-DRY rule. Tested directly with
+ * zero mocks.
+ */
+
+/** A single search hit from Firecrawl's `/search` endpoint. */
+export interface SearchHit {
+ readonly title?: string;
+ readonly url?: string;
+ readonly description?: string;
+ readonly markdown?: string;
+}
+
+/** One page from a completed crawl (`/crawl` status `data`). */
+export interface CrawlPage {
+ readonly markdown?: string;
+ readonly metadata?: {
+ readonly title?: string;
+ readonly sourceURL?: string;
+ readonly url?: string;
+ };
+}
+
+/** The scrape response payload (`/scrape` `data`). */
+export interface ScrapeResult {
+ readonly data?: {
+ readonly markdown?: string;
+ readonly metadata?: { readonly title?: string };
+ };
+}
+
+/**
+ * Truncate output to `cap` characters with a trailing notice, identical in
+ * spirit to tool-shell. Duplication across features is the intended trade.
+ */
+export function truncateOutput(output: string, cap: number): string {
+ if (output.length <= cap) {
+ return output;
+ }
+ const truncated = output.slice(0, cap);
+ return `${truncated}\n\n[Output truncated: exceeded ${cap} characters]`;
+}
+
+/**
+ * Format search hits as `### title\nurl\n\ndescription` (+ optional markdown),
+ * joined by `---` separators. Empty → `"No results found."`.
+ */
+export function formatSearchResults(data: readonly SearchHit[] | null | undefined): string {
+ if (!data || data.length === 0) {
+ return "No results found.";
+ }
+ const parts: string[] = [];
+ for (const r of data) {
+ const title = r.title ?? "(no title)";
+ const url = r.url ?? "";
+ const description = r.description ?? "";
+ let section = `### ${title}\n${url}\n\n${description}`;
+ if (r.markdown) {
+ section += `\n\n${r.markdown}`;
+ }
+ parts.push(section);
+ }
+ return parts.join("\n\n---\n\n");
+}
+
+/**
+ * Format a scrape response as `# title\n\nmarkdown`, omitting the header when
+ * the title is absent.
+ */
+export function formatScrapeResult(json: ScrapeResult): string {
+ const md = json.data?.markdown ?? "";
+ const title = json.data?.metadata?.title;
+ if (title) {
+ return `# ${title}\n\n${md}`;
+ }
+ return md;
+}
+
+/**
+ * Format crawled pages as `## title\nurl\n\nmarkdown` each, joined by `---`.
+ * Empty → `"No pages crawled."`.
+ */
+export function formatCrawlResults(data: readonly CrawlPage[] | null | undefined): string {
+ if (!data || data.length === 0) {
+ return "No pages crawled.";
+ }
+ const parts: string[] = [];
+ for (const page of data) {
+ const title = page.metadata?.title ?? "(no title)";
+ const url = page.metadata?.sourceURL ?? page.metadata?.url ?? "";
+ let section = `## ${title}\n${url}`;
+ if (page.markdown) {
+ section += `\n\n${page.markdown}`;
+ }
+ parts.push(section);
+ }
+ return parts.join("\n\n---\n\n");
+}
+
+/**
+ * Format discovered links as a bullet list. Empty → `"No links found."`.
+ */
+export function formatMapResults(links: readonly string[] | null | undefined): string {
+ if (!links || links.length === 0) {
+ return "No links found.";
+ }
+ return links.map((l) => `- ${l}`).join("\n");
+}
diff --git a/packages/tool-web-search/src/index.ts b/packages/tool-web-search/src/index.ts
new file mode 100644
index 0000000..69894d1
--- /dev/null
+++ b/packages/tool-web-search/src/index.ts
@@ -0,0 +1,40 @@
+export {
+ CRAWL_MAX_WAIT_MS,
+ CRAWL_POLL_MS,
+ type CrawlParams,
+ createFirecrawlClient,
+ DEFAULT_BASE_URL,
+ DEFAULT_TIMEOUT_MS,
+ type FetchLike,
+ type FirecrawlClient,
+ type FirecrawlClientDeps,
+ type ScrapeParams,
+ type SearchParams,
+} from "./client.js";
+export { activate, extension, manifest } from "./extension.js";
+export {
+ type CrawlPage,
+ formatCrawlResults,
+ formatMapResults,
+ formatScrapeResult,
+ formatSearchResults,
+ type ScrapeResult,
+ type SearchHit,
+ truncateOutput,
+} from "./format.js";
+export { createWebSearchTool, type WebSearchToolDeps } from "./tool.js";
+export {
+ CRAWL_DEFAULT_LIMIT,
+ type CrawlArgs,
+ FORMATS,
+ type Format,
+ MAX_LIMIT,
+ type MapArgs,
+ MODES,
+ type Mode,
+ type ScrapeArgs,
+ SEARCH_DEFAULT_LIMIT,
+ type SearchArgs,
+ type ValidatedArgs,
+ validateArgs,
+} from "./validate.js";
diff --git a/packages/tool-web-search/src/tool.ts b/packages/tool-web-search/src/tool.ts
new file mode 100644
index 0000000..751278d
--- /dev/null
+++ b/packages/tool-web-search/src/tool.ts
@@ -0,0 +1,142 @@
+/**
+ * web_search tool factory — the imperative shell that binds the pure
+ * validate/format functions to the injected FirecrawlClient edge.
+ *
+ * Mirrors the tool-shell pattern: factory + injected dep + pure helpers +
+ * a `ToolResult` returned per call. Errors surface as `{ isError: true }`
+ * rather than thrown, so the model can react to the message.
+ */
+
+import type { ToolContract, ToolExecuteContext, ToolResult } from "@dispatch/kernel";
+import type { FirecrawlClient } from "./client.js";
+import {
+ formatCrawlResults,
+ formatMapResults,
+ formatScrapeResult,
+ formatSearchResults,
+ truncateOutput,
+} from "./format.js";
+import type { ValidatedArgs } from "./validate.js";
+import { validateArgs } from "./validate.js";
+
+const OUTPUT_CAP = 50_000;
+
+export interface WebSearchToolDeps {
+ readonly client: FirecrawlClient;
+ readonly outputCap?: number;
+}
+
+/** Dispatch validated args to the right client method and format the result. */
+async function runMode(
+ validated: ValidatedArgs,
+ client: FirecrawlClient,
+ signal: AbortSignal,
+): Promise<string> {
+ switch (validated.mode) {
+ case "search": {
+ const hits = await client.search(
+ {
+ query: validated.query,
+ limit: validated.limit,
+ ...(validated.scrape
+ ? { scrapeOptions: { formats: ["markdown"], onlyMainContent: true } }
+ : {}),
+ ...(validated.lang !== undefined ? { lang: validated.lang } : {}),
+ ...(validated.country !== undefined ? { country: validated.country } : {}),
+ },
+ signal,
+ );
+ return formatSearchResults(hits);
+ }
+ case "scrape": {
+ const result = await client.scrape(
+ { url: validated.url, formats: [validated.format] },
+ signal,
+ );
+ return formatScrapeResult(result);
+ }
+ case "crawl": {
+ const pages = await client.crawl(
+ { url: validated.url, limit: validated.limit, formats: [validated.format] },
+ signal,
+ );
+ return formatCrawlResults(pages);
+ }
+ case "map": {
+ const links = await client.map(validated.url, signal);
+ return formatMapResults(links);
+ }
+ }
+}
+
+/**
+ * Create the `web_search` tool. `concurrencySafe: true` — web search is
+ * idempotent and safe to run alongside other tools. The `network` capability
+ * is declared on the extension manifest (not the tool contract).
+ */
+export function createWebSearchTool(deps: WebSearchToolDeps): ToolContract {
+ const client = deps.client;
+ const cap = deps.outputCap ?? OUTPUT_CAP;
+
+ return {
+ name: "web_search",
+ description:
+ "Access the web via a self-hosted Firecrawl instance. Supports search, " +
+ "single-page scrape, site crawling, and sitemap discovery.",
+ parameters: {
+ type: "object",
+ properties: {
+ query: { type: "string", description: "The search query (search mode)." },
+ url: { type: "string", description: "A URL to scrape, crawl, or map." },
+ mode: {
+ type: "string",
+ enum: ["search", "scrape", "crawl", "map"],
+ description:
+ "Operation mode. 'search' (default when query present), 'scrape' " +
+ "(default when url present), 'crawl' (recursively scrape pages from a site), " +
+ "'map' (discover URLs on a site).",
+ },
+ limit: {
+ type: "number",
+ description: "Max results. Search: default 7, max 10. Crawl: default 3, max 10.",
+ },
+ scrape: {
+ type: "boolean",
+ description: "When searching, also scrape full markdown content of each result page.",
+ },
+ lang: {
+ type: "string",
+ description: 'Language code to filter search results (e.g. "en", "ja").',
+ },
+ country: {
+ type: "string",
+ description: 'Country code to filter search results (e.g. "us", "jp").',
+ },
+ format: {
+ type: "string",
+ enum: ["markdown", "text", "html"],
+ description: "Format for scrape/crawl output (default: markdown).",
+ },
+ },
+ },
+ concurrencySafe: true,
+ async execute(args: unknown, ctx: ToolExecuteContext): Promise<ToolResult> {
+ const validated = validateArgs(args);
+ if ("error" in validated) {
+ return { content: validated.error, isError: true };
+ }
+ const span = ctx.log.span("web_search.execute", { mode: validated.mode });
+ try {
+ const output = await runMode(validated, client, ctx.signal);
+ span.end();
+ return { content: truncateOutput(output, cap) };
+ } catch (err: unknown) {
+ span.end({ err });
+ return {
+ content: `Error: ${err instanceof Error ? err.message : String(err)}`,
+ isError: true,
+ };
+ }
+ },
+ };
+}
diff --git a/packages/tool-web-search/src/validate.test.ts b/packages/tool-web-search/src/validate.test.ts
new file mode 100644
index 0000000..30ae26c
--- /dev/null
+++ b/packages/tool-web-search/src/validate.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, it } from "vitest";
+import {
+ type CrawlArgs,
+ type MapArgs,
+ type ScrapeArgs,
+ type SearchArgs,
+ validateArgs,
+} from "./validate.js";
+
+describe("validateArgs", () => {
+ it("mode defaults to search when query present", () => {
+ const result = validateArgs({ query: "hello" });
+ expect("error" in result).toBe(false);
+ if ("error" in result) return;
+ expect(result.mode).toBe("search");
+ expect((result as SearchArgs).query).toBe("hello");
+ });
+
+ it("mode defaults to scrape when url present (no query)", () => {
+ const result = validateArgs({ url: "http://example.com" });
+ expect("error" in result).toBe(false);
+ if ("error" in result) return;
+ expect(result.mode).toBe("scrape");
+ expect((result as ScrapeArgs).url).toBe("http://example.com");
+ });
+
+ it("explicit mode overrides defaults", () => {
+ const result = validateArgs({ query: "hello", url: "http://x", mode: "map" });
+ expect("error" in result).toBe(false);
+ if ("error" in result) return;
+ expect(result.mode).toBe("map");
+ expect((result as MapArgs).url).toBe("http://x");
+ });
+
+ it("search mode requires query", () => {
+ const result = validateArgs({ mode: "search" });
+ expect(result).toHaveProperty("error");
+ });
+
+ it("scrape/crawl/map modes require url", () => {
+ expect(validateArgs({ mode: "scrape" })).toHaveProperty("error");
+ expect(validateArgs({ mode: "crawl" })).toHaveProperty("error");
+ expect(validateArgs({ mode: "map" })).toHaveProperty("error");
+ });
+
+ it("limit clamped to max 10", () => {
+ const result = validateArgs({ query: "hello", limit: 50 });
+ expect("error" in result).toBe(false);
+ if ("error" in result) return;
+ expect((result as SearchArgs).limit).toBe(10);
+ });
+
+ it("limit defaults to 7 (search) / 3 (crawl)", () => {
+ const search = validateArgs({ query: "hello" });
+ expect("error" in search).toBe(false);
+ if ("error" in search) return;
+ expect((search as SearchArgs).limit).toBe(7);
+
+ const crawl = validateArgs({ url: "http://x", mode: "crawl" });
+ expect("error" in crawl).toBe(false);
+ if ("error" in crawl) return;
+ expect((crawl as CrawlArgs).limit).toBe(3);
+ });
+
+ it("format defaults to markdown", () => {
+ const result = validateArgs({ query: "hello" });
+ expect("error" in result).toBe(false);
+ if ("error" in result) return;
+ expect(result.format).toBe("markdown");
+ });
+
+ it("rejects invalid mode", () => {
+ const result = validateArgs({ mode: "invalid" });
+ expect(result).toHaveProperty("error");
+ if (!("error" in result)) return;
+ expect(result.error).toContain("Invalid mode");
+ });
+
+ it("rejects invalid format", () => {
+ const result = validateArgs({ url: "http://x", format: "pdf" });
+ expect(result).toHaveProperty("error");
+ if (!("error" in result)) return;
+ expect(result.error).toContain("Invalid format");
+ });
+
+ it("returns error for null/non-object args", () => {
+ expect(validateArgs(null)).toHaveProperty("error");
+ expect(validateArgs(undefined)).toHaveProperty("error");
+ expect(validateArgs("string")).toHaveProperty("error");
+ expect(validateArgs(42)).toHaveProperty("error");
+ });
+});
diff --git a/packages/tool-web-search/src/validate.ts b/packages/tool-web-search/src/validate.ts
new file mode 100644
index 0000000..56bd356
--- /dev/null
+++ b/packages/tool-web-search/src/validate.ts
@@ -0,0 +1,212 @@
+/**
+ * Pure argument validation for the web_search tool — input → output, no I/O.
+ *
+ * Resolves the operation mode (explicit, or inferred from `query`/`url`),
+ * applies per-mode field requirements, clamps `limit`, and defaults `format`.
+ * Returns a discriminated union so the tool's dispatch narrows by `mode`.
+ */
+
+export const MODES = ["search", "scrape", "crawl", "map"] as const;
+export type Mode = (typeof MODES)[number];
+
+export const FORMATS = ["markdown", "text", "html"] as const;
+export type Format = (typeof FORMATS)[number];
+
+export const SEARCH_DEFAULT_LIMIT = 7;
+export const CRAWL_DEFAULT_LIMIT = 3;
+export const MAX_LIMIT = 10;
+
+interface BaseArgs {
+ readonly format: Format;
+}
+
+export interface SearchArgs extends BaseArgs {
+ readonly mode: "search";
+ readonly query: string;
+ readonly limit: number;
+ readonly scrape: boolean;
+ readonly lang?: string;
+ readonly country?: string;
+}
+
+export interface ScrapeArgs extends BaseArgs {
+ readonly mode: "scrape";
+ readonly url: string;
+}
+
+export interface CrawlArgs extends BaseArgs {
+ readonly mode: "crawl";
+ readonly url: string;
+ readonly limit: number;
+}
+
+export interface MapArgs extends BaseArgs {
+ readonly mode: "map";
+ readonly url: string;
+}
+
+export type ValidatedArgs = SearchArgs | ScrapeArgs | CrawlArgs | MapArgs;
+
+export type ValidationError = { readonly error: string };
+
+type Result<T> = { readonly value: T } | ValidationError;
+
+function resolveFormat(raw: unknown): Result<Format> {
+ if (raw === undefined || raw === null) {
+ return { value: "markdown" };
+ }
+ if (typeof raw === "string" && (FORMATS as readonly string[]).includes(raw)) {
+ return { value: raw as Format };
+ }
+ return {
+ error: `Error: Invalid format "${String(raw)}" (must be one of: markdown, text, html).`,
+ };
+}
+
+function resolveMode(raw: unknown, query: unknown, url: unknown): Result<Mode> {
+ if (raw === undefined || raw === null) {
+ const hasQuery = typeof query === "string" && query.trim().length > 0;
+ const hasUrl = typeof url === "string" && url.trim().length > 0;
+ return { value: hasQuery ? "search" : hasUrl ? "scrape" : "search" };
+ }
+ if (typeof raw === "string" && (MODES as readonly string[]).includes(raw)) {
+ return { value: raw as Mode };
+ }
+ return {
+ error: `Error: Invalid mode "${String(raw)}" (must be one of: search, scrape, crawl, map).`,
+ };
+}
+
+function optionalString(raw: unknown, name: string): Result<string | undefined> {
+ if (raw === undefined || raw === null) {
+ return { value: undefined };
+ }
+ if (typeof raw === "string") {
+ return { value: raw };
+ }
+ return { error: `Error: "${name}" must be a string.` };
+}
+
+function resolveLimit(raw: unknown, defaultLimit: number): Result<number> {
+ if (raw === undefined || raw === null) {
+ return { value: defaultLimit };
+ }
+ const n = Number(raw);
+ if (!Number.isFinite(n) || n < 1) {
+ return { error: 'Error: "limit" must be a positive number.' };
+ }
+ return { value: Math.min(Math.floor(n), MAX_LIMIT) };
+}
+
+function resolveBoolean(raw: unknown, name: string): Result<boolean> {
+ if (raw === undefined || raw === null) {
+ return { value: false };
+ }
+ if (typeof raw === "boolean") {
+ return { value: raw };
+ }
+ return { error: `Error: "${name}" must be a boolean.` };
+}
+
+/**
+ * Validate raw tool args and resolve a typed, mode-aware `ValidatedArgs`.
+ * Returns `{ error }` for invalid input — the tool surfaces it verbatim.
+ */
+export function validateArgs(args: unknown): ValidatedArgs | ValidationError {
+ if (args === null || args === undefined || typeof args !== "object") {
+ return { error: "Error: Arguments must be an object." };
+ }
+ const obj = args as Record<string, unknown>;
+
+ const format = resolveFormat(obj.format);
+ if ("error" in format) {
+ return format;
+ }
+
+ const mode = resolveMode(obj.mode, obj.query, obj.url);
+ if ("error" in mode) {
+ return mode;
+ }
+
+ const query = optionalString(obj.query, "query");
+ if ("error" in query) {
+ return query;
+ }
+
+ const url = optionalString(obj.url, "url");
+ if ("error" in url) {
+ return url;
+ }
+
+ switch (mode.value) {
+ case "search": {
+ if (query.value === undefined || query.value.trim().length === 0) {
+ return { error: "Error: query is required for search mode." };
+ }
+ const limit = resolveLimit(obj.limit, SEARCH_DEFAULT_LIMIT);
+ if ("error" in limit) {
+ return limit;
+ }
+ const scrape = resolveBoolean(obj.scrape, "scrape");
+ if ("error" in scrape) {
+ return scrape;
+ }
+ const lang = optionalString(obj.lang, "lang");
+ if ("error" in lang) {
+ return lang;
+ }
+ const country = optionalString(obj.country, "country");
+ if ("error" in country) {
+ return country;
+ }
+ const result: SearchArgs = {
+ mode: "search",
+ query: query.value,
+ limit: limit.value,
+ scrape: scrape.value,
+ format: format.value,
+ ...(lang.value !== undefined ? { lang: lang.value } : {}),
+ ...(country.value !== undefined ? { country: country.value } : {}),
+ };
+ return result;
+ }
+ case "scrape": {
+ if (url.value === undefined || url.value.trim().length === 0) {
+ return { error: "Error: url is required for scrape mode." };
+ }
+ const result: ScrapeArgs = {
+ mode: "scrape",
+ url: url.value,
+ format: format.value,
+ };
+ return result;
+ }
+ case "crawl": {
+ if (url.value === undefined || url.value.trim().length === 0) {
+ return { error: "Error: url is required for crawl mode." };
+ }
+ const limit = resolveLimit(obj.limit, CRAWL_DEFAULT_LIMIT);
+ if ("error" in limit) {
+ return limit;
+ }
+ const result: CrawlArgs = {
+ mode: "crawl",
+ url: url.value,
+ limit: limit.value,
+ format: format.value,
+ };
+ return result;
+ }
+ case "map": {
+ if (url.value === undefined || url.value.trim().length === 0) {
+ return { error: "Error: url is required for map mode." };
+ }
+ const result: MapArgs = {
+ mode: "map",
+ url: url.value,
+ format: format.value,
+ };
+ return result;
+ }
+ }
+}
diff --git a/packages/tool-web-search/tsconfig.json b/packages/tool-web-search/tsconfig.json
new file mode 100644
index 0000000..ff99a43
--- /dev/null
+++ b/packages/tool-web-search/tsconfig.json
@@ -0,0 +1,6 @@
+{
+ "extends": "../../tsconfig.base.json",
+ "compilerOptions": { "rootDir": "src", "outDir": "dist", "composite": true },
+ "include": ["src/**/*.ts"],
+ "references": [{ "path": "../kernel" }]
+}
diff --git a/tasks.md b/tasks.md
index 0efc511..a374562 100644
--- a/tasks.md
+++ b/tasks.md
@@ -5,7 +5,7 @@
> Keep this lean and current; do not let it re-accrete a step-by-step changelog.
## Status (current)
-`tsc -b` EXIT 0 · biome clean · **1059 vitest + 199 transport bun green**.
+`tsc -b` EXIT 0 · biome clean · **1097 vitest + 199 transport bun green**.
Built and verified live (full-fidelity: every feature is a manifest-loaded
extension through the host):
@@ -454,7 +454,19 @@ path**: first extract a generic `@dispatch/openai-stream` library from
**Boot smoke:** without `UMANS_API_KEY` → `"provider-umans: no UMANS_API_KEY. Provider
not registered."` (graceful skip); with `UMANS_API_KEY=sk-test` → `"provider-umans:
registered (model=umans-coder)"`.
-- [ ] Live-verify against the real Umans API (not yet exercised end-to-end).
+- [x] **LIVE-VERIFIED against the real Umans API:** the dev stack (umans-glm-5.2) called
+ `web_search` (Firecrawl) in a real turn — first live Umans API call, clean response.
+
+## web_search tool — Firecrawl (DONE)
+Standard tool extension `tool-web-search` backed by a self-hosted Firecrawl instance
+(`http://100.102.55.49:31329/v1`, Tailscale, no API key). One tool `web_search` with 4
+modes: search, scrape, crawl (polls status URL), map — mirroring the proven opencode tool.
+Pure core: `validateArgs` (discriminated union by mode) + `format*` functions + `truncateOutput`.
+Injected edge: `FirecrawlClient` (injectable `fetchFn` + `sleep` + `now`), `AbortSignal.any`
+for per-request timeout + caller cancellation. `concurrencySafe: true`, `capabilities: { network: true }`.
+38 tests. Report: `reports/tool-web-search.md`.
+- **LIVE-VERIFIED:** the dev stack (umans-glm-5.2) called `web_search` → Firecrawl returned
+ real results (Paris, France) — first live Umans API call too.
## Open items
- **Context window LIMIT (deferred, sibling of context size):** expose the selected model's max
@@ -507,8 +519,7 @@ path**: first extract a generic `@dispatch/openai-stream` library from
5. **`todo` tool** — a per-conversation task-list tool the model maintains
(like opencode's todowrite/todoread), as a standard tool extension; likely a
surface so the FE can render the live list.
- 6. **`web_search` tool** — a web search tool (like old dispatch's;
- reference-only source at `../dispatch-source`), as a standard tool extension.
+ 6. ~~**`web_search` tool**~~ — **DONE** (see milestone section above).
7. **Message queue — close-with-queued-messages (deferred product decision):**
if a client closes a conversation (`POST /conversations/:id/close`) while the
queue is non-empty, the carry currently still fires (starts a new turn on the
diff --git a/tsconfig.json b/tsconfig.json
index b227e92..d084acb 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -22,6 +22,7 @@
{ "path": "./packages/tool-shell" },
{ "path": "./packages/tool-edit-file" },
{ "path": "./packages/tool-write-file" },
+ { "path": "./packages/tool-web-search" },
{ "path": "./packages/skills" },
{ "path": "./packages/cache-warming" },
{ "path": "./packages/message-queue" },