diff options
| author | Adam Malczewski <[email protected]> | 2026-06-21 13:11:29 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-21 13:11:29 +0900 |
| commit | 8a4a624d16422467a8e85434c674bb591877e8ea (patch) | |
| tree | 54052da00bbc580742913e5c031b7cc1b160db19 | |
| parent | d23de3254374d4d63c8e15c6ab9311c3c6f4da5b (diff) | |
| download | dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.tar.gz dispatch-8a4a624d16422467a8e85434c674bb591877e8ea.zip | |
feat(tool-web-search): Firecrawl-backed web search tool
New standard tool extension with one tool web_search supporting 4 modes
(search, scrape, crawl, map) against a self-hosted Firecrawl instance.
Pure core: validateArgs (discriminated union by mode) + format* functions
+ truncateOutput. Injected edge: FirecrawlClient (injectable fetchFn/sleep/now,
AbortSignal.any for per-request timeout + caller cancellation). concurrencySafe
true, capabilities network. 38 tests, zero vi.mock.
Live-verified: umans-glm-5.2 called web_search → real Firecrawl results (also
the first live Umans API call).
| -rw-r--r-- | bun.lock | 1 | ||||
| -rw-r--r-- | packages/host-bin/package.json | 1 | ||||
| -rw-r--r-- | packages/host-bin/src/main.ts | 2 | ||||
| -rw-r--r-- | packages/tool-web-search/package.json | 11 | ||||
| -rw-r--r-- | packages/tool-web-search/src/client.test.ts | 208 | ||||
| -rw-r--r-- | packages/tool-web-search/src/client.ts | 243 | ||||
| -rw-r--r-- | packages/tool-web-search/src/extension.test.ts | 113 | ||||
| -rw-r--r-- | packages/tool-web-search/src/extension.ts | 32 | ||||
| -rw-r--r-- | packages/tool-web-search/src/format.test.ts | 87 | ||||
| -rw-r--r-- | packages/tool-web-search/src/format.ts | 111 | ||||
| -rw-r--r-- | packages/tool-web-search/src/index.ts | 40 | ||||
| -rw-r--r-- | packages/tool-web-search/src/tool.ts | 142 | ||||
| -rw-r--r-- | packages/tool-web-search/src/validate.test.ts | 92 | ||||
| -rw-r--r-- | packages/tool-web-search/src/validate.ts | 212 | ||||
| -rw-r--r-- | packages/tool-web-search/tsconfig.json | 6 | ||||
| -rw-r--r-- | tasks.md | 19 | ||||
| -rw-r--r-- | tsconfig.json | 1 |
17 files changed, 1317 insertions, 4 deletions
@@ -73,6 +73,7 @@ "@dispatch/tool-edit-file": "workspace:*", "@dispatch/tool-read-file": "workspace:*", "@dispatch/tool-shell": "workspace:*", + "@dispatch/tool-web-search": "workspace:*", "@dispatch/tool-write-file": "workspace:*", "@dispatch/transport-http": "workspace:*", "@dispatch/transport-ws": "workspace:*", diff --git a/packages/host-bin/package.json b/packages/host-bin/package.json index 63b78bc..5f7d0e7 100644 --- a/packages/host-bin/package.json +++ b/packages/host-bin/package.json @@ -21,6 +21,7 @@ "@dispatch/tool-shell": "workspace:*", "@dispatch/tool-edit-file": "workspace:*", "@dispatch/tool-write-file": "workspace:*", + "@dispatch/tool-web-search": "workspace:*", "@dispatch/journal-sink": "workspace:*", "@dispatch/lsp": "workspace:*", "@dispatch/surface-loaded-extensions": "workspace:*", diff --git a/packages/host-bin/src/main.ts b/packages/host-bin/src/main.ts index 1928a8a..1c122de 100644 --- a/packages/host-bin/src/main.ts +++ b/packages/host-bin/src/main.ts @@ -32,6 +32,7 @@ import { extension as throughputStoreExt } from "@dispatch/throughput-store"; import { extension as toolEditFileExt } from "@dispatch/tool-edit-file"; import { extension as toolReadFileExt } from "@dispatch/tool-read-file"; import { extension as toolShellExt } from "@dispatch/tool-shell"; +import { extension as toolWebSearchExt } from "@dispatch/tool-web-search"; import { extension as toolWriteFileExt } from "@dispatch/tool-write-file"; import { createTransportHttpExtension } from "@dispatch/transport-http"; import { createTransportWsExtension } from "@dispatch/transport-ws"; @@ -75,6 +76,7 @@ const CORE_EXTENSIONS: readonly Extension[] = [ toolReadFileExt, toolShellExt, toolWriteFileExt, + toolWebSearchExt, throughputStoreExt, messageQueueExt, sessionOrchestratorExt, diff --git a/packages/tool-web-search/package.json b/packages/tool-web-search/package.json new file mode 100644 index 0000000..c41ab7b --- /dev/null +++ b/packages/tool-web-search/package.json @@ -0,0 +1,11 @@ +{ + "name": "@dispatch/tool-web-search", + "version": "0.0.0", + "type": "module", + "private": true, + "main": "dist/index.js", + "types": "dist/index.d.ts", + "dependencies": { + "@dispatch/kernel": "workspace:*" + } +} diff --git a/packages/tool-web-search/src/client.test.ts b/packages/tool-web-search/src/client.test.ts new file mode 100644 index 0000000..f020a83 --- /dev/null +++ b/packages/tool-web-search/src/client.test.ts @@ -0,0 +1,208 @@ +import { describe, expect, it } from "vitest"; +import { createFirecrawlClient, type FetchLike } from "./client.js"; + +function jsonResponse(body: unknown, status = 200): Response { + return new Response(JSON.stringify(body), { + status, + headers: { "Content-Type": "application/json" }, + }); +} + +interface CapturedCall { + url: string; + method?: string | undefined; + body?: string | undefined; +} + +/** Builds a fake fetch that returns scripted responses in order, capturing each call. */ +function makeFetch(responses: Response[]): { fetchFn: FetchLike; calls: CapturedCall[] } { + const calls: CapturedCall[] = []; + let i = 0; + const fetchFn: FetchLike = (async (input: string | URL | Request, init?: RequestInit) => { + const url = + typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url; + calls.push({ + url, + method: init?.method, + body: typeof init?.body === "string" ? init.body : undefined, + }); + return responses[i++] ?? jsonResponse({}); + }) as unknown as FetchLike; + return { fetchFn, calls }; +} + +const BASE = "http://test-firecrawl.local/v1"; +const signal = (): AbortSignal => new AbortController().signal; + +describe("createFirecrawlClient.search", () => { + it("sends POST /search with correct body", async () => { + const { fetchFn, calls } = makeFetch([jsonResponse({ success: true, data: [] })]); + const client = createFirecrawlClient({ baseUrl: BASE, fetchFn }); + await client.search({ query: "hello", limit: 7 }, signal()); + + const call = calls[0]; + if (!call) throw new Error("no call captured"); + expect(call.url).toBe(`${BASE}/search`); + expect(call.method).toBe("POST"); + expect(JSON.parse(call.body ?? "{}")).toEqual({ query: "hello", limit: 7 }); + }); + + it("returns parsed data on success", async () => { + const data = [{ title: "T", url: "http://x", description: "d" }]; + const { fetchFn } = makeFetch([jsonResponse({ success: true, data })]); + const client = createFirecrawlClient({ baseUrl: BASE, fetchFn }); + const result = await client.search({ query: "hello", limit: 7 }, signal()); + expect(result).toEqual(data); + }); + + it("throws on !success", async () => { + const { fetchFn } = makeFetch([jsonResponse({ success: false, error: "boom" })]); + const client = createFirecrawlClient({ baseUrl: BASE, fetchFn }); + await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("boom"); + }); +}); + +describe("createFirecrawlClient.scrape", () => { + it("sends POST /scrape with correct body", async () => { + const { fetchFn, calls } = makeFetch([ + jsonResponse({ success: true, data: { markdown: "md", metadata: { title: "T" } } }), + ]); + const client = createFirecrawlClient({ baseUrl: BASE, fetchFn }); + await client.scrape({ url: "http://x", formats: ["markdown"] }, signal()); + + const call = calls[0]; + if (!call) throw new Error("no call captured"); + expect(call.url).toBe(`${BASE}/scrape`); + expect(call.method).toBe("POST"); + expect(JSON.parse(call.body ?? "{}")).toEqual({ + url: "http://x", + formats: ["markdown"], + onlyMainContent: true, + }); + }); +}); + +describe("createFirecrawlClient.crawl", () => { + it("polls status URL until completed", async () => { + const { fetchFn, calls } = makeFetch([ + jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }), + jsonResponse({ status: "scraping" }), + jsonResponse({ + status: "completed", + data: [{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }], + }), + ]); + const client = createFirecrawlClient({ + baseUrl: BASE, + fetchFn, + sleep: async () => {}, + }); + const pages = await client.crawl( + { url: "http://site", limit: 3, formats: ["markdown"] }, + signal(), + ); + expect(pages).toEqual([{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }]); + expect(calls.length).toBe(3); + }); + + it("returns data when completed", async () => { + const { fetchFn } = makeFetch([ + jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }), + jsonResponse({ + status: "completed", + data: [{ markdown: "page", metadata: { title: "T" } }], + }), + ]); + const client = createFirecrawlClient({ + baseUrl: BASE, + fetchFn, + sleep: async () => {}, + }); + const pages = await client.crawl( + { url: "http://site", limit: 3, formats: ["markdown"] }, + signal(), + ); + expect(pages.length).toBe(1); + expect(pages[0]?.markdown).toBe("page"); + }); + + it("throws when status is failed", async () => { + const { fetchFn } = makeFetch([ + jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }), + jsonResponse({ status: "failed", error: "boom" }), + ]); + const client = createFirecrawlClient({ + baseUrl: BASE, + fetchFn, + sleep: async () => {}, + }); + await expect( + client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, signal()), + ).rejects.toThrow("failed"); + }); + + it("respects abort signal (stops polling)", async () => { + const controller = new AbortController(); + const { fetchFn, calls } = makeFetch([ + jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }), + ]); + const client = createFirecrawlClient({ + baseUrl: BASE, + fetchFn, + sleep: async (_ms, sig) => { + controller.abort(); + if (sig.aborted) throw new Error("Request aborted."); + }, + }); + await expect( + client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, controller.signal), + ).rejects.toThrow(); + expect(calls.length).toBe(1); + }); +}); + +describe("createFirecrawlClient.map", () => { + it("sends POST /map and returns links", async () => { + const { fetchFn, calls } = makeFetch([ + jsonResponse({ success: true, links: ["http://a", "http://b"] }), + ]); + const client = createFirecrawlClient({ baseUrl: BASE, fetchFn }); + const links = await client.map("http://site", signal()); + expect(links).toEqual(["http://a", "http://b"]); + + const call = calls[0]; + if (!call) throw new Error("no call captured"); + expect(call.url).toBe(`${BASE}/map`); + expect(call.method).toBe("POST"); + expect(JSON.parse(call.body ?? "{}")).toEqual({ url: "http://site" }); + }); +}); + +describe("createFirecrawlClient.request (error paths)", () => { + it("throws on HTTP error", async () => { + const { fetchFn } = makeFetch([ + new Response("not found", { status: 404, statusText: "Not Found" }), + ]); + const client = createFirecrawlClient({ baseUrl: BASE, fetchFn }); + await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("HTTP 404"); + }); + + it("throws on timeout", async () => { + const fetchFn: FetchLike = ((_input: string | URL | Request, init?: RequestInit) => + new Promise<Response>((_resolve, reject) => { + const sig = init?.signal; + if (!sig) return; + sig.addEventListener("abort", () => { + const err = new Error("aborted"); + err.name = "AbortError"; + reject(err); + }); + })) as unknown as FetchLike; + const client = createFirecrawlClient({ + baseUrl: BASE, + fetchFn, + timeoutMs: 10, + }); + await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("timed out"); + }); +}); diff --git a/packages/tool-web-search/src/client.ts b/packages/tool-web-search/src/client.ts new file mode 100644 index 0000000..071ba97 --- /dev/null +++ b/packages/tool-web-search/src/client.ts @@ -0,0 +1,243 @@ +/** + * FirecrawlClient — the injected outermost edge for the web_search tool. + * + * All effects (fetch, sleep, clock) are injected so the pure decision logic + * remains testable without real I/O. The factory builds four methods + * (`search`, `scrape`, `crawl`, `map`) over a self-hosted Firecrawl instance + * (no API key). `crawl` polls a status URL until the crawl completes or fails. + */ + +import type { CrawlPage, ScrapeResult, SearchHit } from "./format.js"; + +export type FetchLike = typeof globalThis.fetch; + +export const DEFAULT_BASE_URL = "http://100.102.55.49:31329/v1"; +export const DEFAULT_TIMEOUT_MS = 30_000; +export const CRAWL_POLL_MS = 2_000; +export const CRAWL_MAX_WAIT_MS = 5 * 60 * 1_000; + +export interface SearchParams { + readonly query: string; + readonly limit: number; + readonly lang?: string; + readonly country?: string; + readonly scrapeOptions?: { + readonly formats: readonly string[]; + readonly onlyMainContent: boolean; + }; +} + +export interface ScrapeParams { + readonly url: string; + readonly formats: readonly string[]; +} + +export interface CrawlParams { + readonly url: string; + readonly limit: number; + readonly formats: readonly string[]; +} + +export interface FirecrawlClient { + readonly search: (params: SearchParams, signal: AbortSignal) => Promise<readonly SearchHit[]>; + readonly scrape: (params: ScrapeParams, signal: AbortSignal) => Promise<ScrapeResult>; + readonly crawl: (params: CrawlParams, signal: AbortSignal) => Promise<readonly CrawlPage[]>; + readonly map: (url: string, signal: AbortSignal) => Promise<readonly string[]>; +} + +export interface FirecrawlClientDeps { + readonly baseUrl: string; + readonly fetchFn: FetchLike; + readonly timeoutMs?: number; + readonly pollMs?: number; + readonly maxWaitMs?: number; + readonly now?: () => number; + readonly sleep?: (ms: number, signal: AbortSignal) => Promise<void>; +} + +interface SearchResponse { + readonly success: boolean; + readonly data?: readonly SearchHit[]; + readonly error?: string; +} + +interface ScrapeResponse { + readonly success: boolean; + readonly data?: { + readonly markdown?: string; + readonly metadata?: { readonly title?: string }; + }; + readonly error?: string; +} + +interface CrawlStartResponse { + readonly success: boolean; + readonly url?: string; + readonly error?: string; +} + +interface CrawlStatusResponse { + readonly status: string; + readonly data?: readonly CrawlPage[]; + readonly error?: string; +} + +interface MapResponse { + readonly success: boolean; + readonly links?: readonly string[]; + readonly error?: string; +} + +/** Default sleep: resolve after `ms`, reject on abort. */ +async function defaultSleep(ms: number, signal: AbortSignal): Promise<void> { + return new Promise<void>((resolve, reject) => { + if (signal.aborted) { + reject(new Error("Request aborted.")); + return; + } + let timer: ReturnType<typeof setTimeout> | undefined; + const onAbort = (): void => { + if (timer !== undefined) { + clearTimeout(timer); + } + reject(new Error("Request aborted.")); + }; + timer = setTimeout(() => { + signal.removeEventListener("abort", onAbort); + resolve(); + }, ms); + signal.addEventListener("abort", onAbort, { once: true }); + }); +} + +/** + * Create a FirecrawlClient. Each method builds a fetch request, calls the + * injected `fetchFn`, and handles HTTP + JSON errors. The per-request timeout + * is combined with the caller's cancellation signal via `AbortSignal.any`. + */ +export function createFirecrawlClient(deps: FirecrawlClientDeps): FirecrawlClient { + const baseUrl = deps.baseUrl; + const fetchFn = deps.fetchFn; + const timeoutMs = deps.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const pollMs = deps.pollMs ?? CRAWL_POLL_MS; + const maxWaitMs = deps.maxWaitMs ?? CRAWL_MAX_WAIT_MS; + const now = deps.now ?? Date.now; + const sleep = deps.sleep ?? defaultSleep; + + async function request( + method: "POST" | "GET", + url: string, + body: unknown, + signal: AbortSignal, + ): Promise<unknown> { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), timeoutMs); + const combined = AbortSignal.any([signal, controller.signal]); + try { + let response: Response; + try { + response = await fetchFn(url, { + method, + headers: + body !== undefined + ? { "Content-Type": "application/json", Accept: "application/json" } + : { Accept: "application/json" }, + body: body !== undefined ? JSON.stringify(body) : undefined, + signal: combined, + }); + } catch (err) { + if (signal.aborted) { + throw new Error("Request aborted."); + } + if (controller.signal.aborted) { + throw new Error(`Firecrawl request timed out after ${timeoutMs / 1000} seconds.`); + } + throw err; + } + if (!response.ok) { + const text = await response.text().catch(() => ""); + throw new Error(`HTTP ${response.status} ${response.statusText}${text ? `: ${text}` : ""}`); + } + try { + return await response.json(); + } catch { + throw new Error("Failed to parse Firecrawl response as JSON"); + } + } finally { + clearTimeout(timeout); + } + } + + async function post(endpoint: string, body: unknown, signal: AbortSignal): Promise<unknown> { + return request("POST", `${baseUrl}/${endpoint}`, body, signal); + } + + return { + async search(params: SearchParams, signal: AbortSignal): Promise<readonly SearchHit[]> { + const body: Record<string, unknown> = { query: params.query, limit: params.limit }; + if (params.lang !== undefined) { + body.lang = params.lang; + } + if (params.country !== undefined) { + body.country = params.country; + } + if (params.scrapeOptions !== undefined) { + body.scrapeOptions = params.scrapeOptions; + } + const json = (await post("search", body, signal)) as SearchResponse; + if (!json.success) { + throw new Error(json.error ?? "Unknown error"); + } + return json.data ?? []; + }, + + async scrape(params: ScrapeParams, signal: AbortSignal): Promise<ScrapeResult> { + const body = { + url: params.url, + formats: params.formats, + onlyMainContent: true, + }; + const json = (await post("scrape", body, signal)) as ScrapeResponse; + if (!json.success) { + throw new Error(json.error ?? "Unknown error"); + } + return json; + }, + + async crawl(params: CrawlParams, signal: AbortSignal): Promise<readonly CrawlPage[]> { + const body = { + url: params.url, + limit: params.limit, + scrapeOptions: { formats: params.formats, onlyMainContent: true }, + }; + const startJson = (await post("crawl", body, signal)) as CrawlStartResponse; + if (!startJson.success) { + throw new Error(startJson.error ?? "Unknown error"); + } + const statusUrl = startJson.url; + if (statusUrl === undefined) { + throw new Error("crawl response missing status URL."); + } + const started = now(); + while (now() - started < maxWaitMs) { + await sleep(pollMs, signal); + const status = (await request("GET", statusUrl, undefined, signal)) as CrawlStatusResponse; + if (status.status === "completed") { + return status.data ?? []; + } + if (status.status === "failed") { + throw new Error(`crawl failed: ${status.error ?? "unknown"}`); + } + } + throw new Error("crawl timed out waiting for completion."); + }, + + async map(url: string, signal: AbortSignal): Promise<readonly string[]> { + const json = (await post("map", { url }, signal)) as MapResponse; + if (!json.success) { + throw new Error(json.error ?? "Unknown error"); + } + return json.links ?? []; + }, + }; +} diff --git a/packages/tool-web-search/src/extension.test.ts b/packages/tool-web-search/src/extension.test.ts new file mode 100644 index 0000000..6e0a6bc --- /dev/null +++ b/packages/tool-web-search/src/extension.test.ts @@ -0,0 +1,113 @@ +import { createLogger, type HostAPI, type ToolExecuteContext } from "@dispatch/kernel"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { activate, extension, manifest } from "./extension.js"; + +function stubCtx(overrides?: Partial<ToolExecuteContext>): ToolExecuteContext { + return { + toolCallId: "test-call-1", + onOutput: () => {}, + signal: new AbortController().signal, + log: createLogger( + { extensionId: "test" }, + { emit: () => {} }, + { now: () => 0, newId: () => "id" }, + ), + ...overrides, + }; +} + +function makeFakeHost(): { host: HostAPI; defineTool: ReturnType<typeof vi.fn> } { + const defineTool = vi.fn(); + const host = { + defineTool, + logger: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + span: vi.fn(() => ({ end: vi.fn() })), + }, + } as unknown as HostAPI; + return { host, defineTool }; +} + +const ORIG_FETCH = globalThis.fetch; +const ORIG_ENV = process.env.FIRECRAWL_BASE_URL; + +function restoreEnv(): void { + if (ORIG_ENV === undefined) { + delete process.env.FIRECRAWL_BASE_URL; + } else { + process.env.FIRECRAWL_BASE_URL = ORIG_ENV; + } +} + +afterEach(() => { + globalThis.fetch = ORIG_FETCH; + restoreEnv(); +}); + +function stubFetchCapture(): { calls: Array<{ url: string }> } { + const calls: Array<{ url: string }> = []; + globalThis.fetch = vi.fn(async (input: string | URL | Request) => { + calls.push({ url: String(input) }); + return new Response(JSON.stringify({ success: true, data: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + }) as unknown as typeof globalThis.fetch; + return { calls }; +} + +describe("tool-web-search activation", () => { + it("registers the 'web_search' tool (defineTool called)", () => { + const { host, defineTool } = makeFakeHost(); + activate(host); + expect(defineTool).toHaveBeenCalledTimes(1); + const registered = defineTool.mock.calls[0]?.[0]; + if (!registered) throw new Error("no tool registered"); + expect(registered.name).toBe("web_search"); + expect(registered.concurrencySafe).toBe(true); + }); + + it("uses FIRECRAWL_BASE_URL from env", async () => { + process.env.FIRECRAWL_BASE_URL = "http://env-firecrawl.local/v1"; + const { calls } = stubFetchCapture(); + const { host, defineTool } = makeFakeHost(); + activate(host); + + const tool = defineTool.mock.calls[0]?.[0]; + if (!tool) throw new Error("no tool registered"); + await tool.execute({ query: "hello" }, stubCtx()); + expect(calls.length).toBeGreaterThan(0); + expect(calls[0]?.url).toContain("http://env-firecrawl.local/v1/search"); + }); + + it("uses default base URL when env unset", async () => { + delete process.env.FIRECRAWL_BASE_URL; + const { calls } = stubFetchCapture(); + const { host, defineTool } = makeFakeHost(); + activate(host); + + const tool = defineTool.mock.calls[0]?.[0]; + if (!tool) throw new Error("no tool registered"); + await tool.execute({ query: "hello" }, stubCtx()); + expect(calls.length).toBeGreaterThan(0); + expect(calls[0]?.url).toContain("100.102.55.49:31329/v1/search"); + }); +}); + +describe("tool-web-search manifest", () => { + it("declares network capability + web_search contribution", () => { + expect(manifest.id).toBe("tool-web-search"); + expect(manifest.capabilities).toEqual({ network: true }); + expect(manifest.contributes).toEqual({ tools: ["web_search"] }); + expect(manifest.trust).toBe("bundled"); + expect(manifest.activation).toBe("eager"); + }); + + it("extension bundles the manifest + activate", () => { + expect(extension.manifest).toBe(manifest); + expect(typeof extension.activate).toBe("function"); + }); +}); diff --git a/packages/tool-web-search/src/extension.ts b/packages/tool-web-search/src/extension.ts new file mode 100644 index 0000000..1d1803d --- /dev/null +++ b/packages/tool-web-search/src/extension.ts @@ -0,0 +1,32 @@ +/** + * tool-web-search extension — registers the `web_search` tool backed by a + * self-hosted Firecrawl instance on activation. + * + * The base URL comes from `FIRECRAWL_BASE_URL` (env) with a Tailscale default. + * Effects (`globalThis.fetch`) come from the ambient edge here, in the shell — + * never in the pure core. Logging is left to the host via `host.logger`/`ctx.log` + * (no `console.*`, no hand-rolled logger). + */ + +import type { Extension, HostAPI, Manifest } from "@dispatch/kernel"; +import { createFirecrawlClient, DEFAULT_BASE_URL } from "./client.js"; +import { createWebSearchTool } from "./tool.js"; + +export const manifest: Manifest = { + id: "tool-web-search", + name: "Web Search Tool", + version: "0.0.0", + apiVersion: "^0.1.0", + trust: "bundled", + activation: "eager", + capabilities: { network: true }, + contributes: { tools: ["web_search"] }, +}; + +export function activate(host: HostAPI): void { + const baseUrl = process.env.FIRECRAWL_BASE_URL ?? DEFAULT_BASE_URL; + const client = createFirecrawlClient({ baseUrl, fetchFn: globalThis.fetch }); + host.defineTool(createWebSearchTool({ client })); +} + +export const extension: Extension = { manifest, activate }; diff --git a/packages/tool-web-search/src/format.test.ts b/packages/tool-web-search/src/format.test.ts new file mode 100644 index 0000000..b98bc02 --- /dev/null +++ b/packages/tool-web-search/src/format.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, it } from "vitest"; +import { + formatCrawlResults, + formatMapResults, + formatScrapeResult, + formatSearchResults, + truncateOutput, +} from "./format.js"; + +describe("formatSearchResults", () => { + it("formats title + url + description + optional markdown", () => { + const out = formatSearchResults([ + { title: "T1", url: "http://a", description: "desc", markdown: "md-body" }, + ]); + expect(out).toBe("### T1\nhttp://a\n\ndesc\n\nmd-body"); + }); + + it("joins multiple results with ---", () => { + const out = formatSearchResults([ + { title: "T1", url: "http://a", description: "d1" }, + { title: "T2", url: "http://b", description: "d2" }, + ]); + expect(out).toBe("### T1\nhttp://a\n\nd1\n\n---\n\n### T2\nhttp://b\n\nd2"); + }); + + it("empty data returns 'No results found.'", () => { + expect(formatSearchResults([])).toBe("No results found."); + expect(formatSearchResults(null)).toBe("No results found."); + expect(formatSearchResults(undefined)).toBe("No results found."); + }); +}); + +describe("formatScrapeResult", () => { + it("formats title + markdown", () => { + const out = formatScrapeResult({ + data: { markdown: "body", metadata: { title: "Title" } }, + }); + expect(out).toBe("# Title\n\nbody"); + }); + + it("omits title header when absent", () => { + const out = formatScrapeResult({ data: { markdown: "body" } }); + expect(out).toBe("body"); + }); +}); + +describe("formatCrawlResults", () => { + it("formats multiple pages", () => { + const out = formatCrawlResults([ + { markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }, + { markdown: "p2", metadata: { title: "P2", url: "http://p2" } }, + ]); + expect(out).toBe("## P1\nhttp://p1\n\np1\n\n---\n\n## P2\nhttp://p2\n\np2"); + }); + + it("empty data returns 'No pages crawled.'", () => { + expect(formatCrawlResults([])).toBe("No pages crawled."); + expect(formatCrawlResults(null)).toBe("No pages crawled."); + }); +}); + +describe("formatMapResults", () => { + it("formats links as bullet list", () => { + const out = formatMapResults(["http://a", "http://b"]); + expect(out).toBe("- http://a\n- http://b"); + }); + + it("empty links returns 'No links found.'", () => { + expect(formatMapResults([])).toBe("No links found."); + expect(formatMapResults(null)).toBe("No links found."); + }); +}); + +describe("truncateOutput", () => { + it("truncates with notice when over cap", () => { + const output = "a".repeat(100); + const result = truncateOutput(output, 50); + expect(result).toContain("a".repeat(50)); + expect(result).toContain("[Output truncated: exceeded 50 characters]"); + expect(result.length).toBeLessThan(output.length + 100); + }); + + it("returns as-is when under cap", () => { + expect(truncateOutput("short", 100)).toBe("short"); + expect(truncateOutput("exact", 5)).toBe("exact"); + }); +}); diff --git a/packages/tool-web-search/src/format.ts b/packages/tool-web-search/src/format.ts new file mode 100644 index 0000000..cfc9aa0 --- /dev/null +++ b/packages/tool-web-search/src/format.ts @@ -0,0 +1,111 @@ +/** + * Pure formatters for the web_search tool — input → output, no I/O. + * + * These mirror the proven opencode Firecrawl tool's formatting, isolated + * (not imported) per the isolation-over-DRY rule. Tested directly with + * zero mocks. + */ + +/** A single search hit from Firecrawl's `/search` endpoint. */ +export interface SearchHit { + readonly title?: string; + readonly url?: string; + readonly description?: string; + readonly markdown?: string; +} + +/** One page from a completed crawl (`/crawl` status `data`). */ +export interface CrawlPage { + readonly markdown?: string; + readonly metadata?: { + readonly title?: string; + readonly sourceURL?: string; + readonly url?: string; + }; +} + +/** The scrape response payload (`/scrape` `data`). */ +export interface ScrapeResult { + readonly data?: { + readonly markdown?: string; + readonly metadata?: { readonly title?: string }; + }; +} + +/** + * Truncate output to `cap` characters with a trailing notice, identical in + * spirit to tool-shell. Duplication across features is the intended trade. + */ +export function truncateOutput(output: string, cap: number): string { + if (output.length <= cap) { + return output; + } + const truncated = output.slice(0, cap); + return `${truncated}\n\n[Output truncated: exceeded ${cap} characters]`; +} + +/** + * Format search hits as `### title\nurl\n\ndescription` (+ optional markdown), + * joined by `---` separators. Empty → `"No results found."`. + */ +export function formatSearchResults(data: readonly SearchHit[] | null | undefined): string { + if (!data || data.length === 0) { + return "No results found."; + } + const parts: string[] = []; + for (const r of data) { + const title = r.title ?? "(no title)"; + const url = r.url ?? ""; + const description = r.description ?? ""; + let section = `### ${title}\n${url}\n\n${description}`; + if (r.markdown) { + section += `\n\n${r.markdown}`; + } + parts.push(section); + } + return parts.join("\n\n---\n\n"); +} + +/** + * Format a scrape response as `# title\n\nmarkdown`, omitting the header when + * the title is absent. + */ +export function formatScrapeResult(json: ScrapeResult): string { + const md = json.data?.markdown ?? ""; + const title = json.data?.metadata?.title; + if (title) { + return `# ${title}\n\n${md}`; + } + return md; +} + +/** + * Format crawled pages as `## title\nurl\n\nmarkdown` each, joined by `---`. + * Empty → `"No pages crawled."`. + */ +export function formatCrawlResults(data: readonly CrawlPage[] | null | undefined): string { + if (!data || data.length === 0) { + return "No pages crawled."; + } + const parts: string[] = []; + for (const page of data) { + const title = page.metadata?.title ?? "(no title)"; + const url = page.metadata?.sourceURL ?? page.metadata?.url ?? ""; + let section = `## ${title}\n${url}`; + if (page.markdown) { + section += `\n\n${page.markdown}`; + } + parts.push(section); + } + return parts.join("\n\n---\n\n"); +} + +/** + * Format discovered links as a bullet list. Empty → `"No links found."`. + */ +export function formatMapResults(links: readonly string[] | null | undefined): string { + if (!links || links.length === 0) { + return "No links found."; + } + return links.map((l) => `- ${l}`).join("\n"); +} diff --git a/packages/tool-web-search/src/index.ts b/packages/tool-web-search/src/index.ts new file mode 100644 index 0000000..69894d1 --- /dev/null +++ b/packages/tool-web-search/src/index.ts @@ -0,0 +1,40 @@ +export { + CRAWL_MAX_WAIT_MS, + CRAWL_POLL_MS, + type CrawlParams, + createFirecrawlClient, + DEFAULT_BASE_URL, + DEFAULT_TIMEOUT_MS, + type FetchLike, + type FirecrawlClient, + type FirecrawlClientDeps, + type ScrapeParams, + type SearchParams, +} from "./client.js"; +export { activate, extension, manifest } from "./extension.js"; +export { + type CrawlPage, + formatCrawlResults, + formatMapResults, + formatScrapeResult, + formatSearchResults, + type ScrapeResult, + type SearchHit, + truncateOutput, +} from "./format.js"; +export { createWebSearchTool, type WebSearchToolDeps } from "./tool.js"; +export { + CRAWL_DEFAULT_LIMIT, + type CrawlArgs, + FORMATS, + type Format, + MAX_LIMIT, + type MapArgs, + MODES, + type Mode, + type ScrapeArgs, + SEARCH_DEFAULT_LIMIT, + type SearchArgs, + type ValidatedArgs, + validateArgs, +} from "./validate.js"; diff --git a/packages/tool-web-search/src/tool.ts b/packages/tool-web-search/src/tool.ts new file mode 100644 index 0000000..751278d --- /dev/null +++ b/packages/tool-web-search/src/tool.ts @@ -0,0 +1,142 @@ +/** + * web_search tool factory — the imperative shell that binds the pure + * validate/format functions to the injected FirecrawlClient edge. + * + * Mirrors the tool-shell pattern: factory + injected dep + pure helpers + + * a `ToolResult` returned per call. Errors surface as `{ isError: true }` + * rather than thrown, so the model can react to the message. + */ + +import type { ToolContract, ToolExecuteContext, ToolResult } from "@dispatch/kernel"; +import type { FirecrawlClient } from "./client.js"; +import { + formatCrawlResults, + formatMapResults, + formatScrapeResult, + formatSearchResults, + truncateOutput, +} from "./format.js"; +import type { ValidatedArgs } from "./validate.js"; +import { validateArgs } from "./validate.js"; + +const OUTPUT_CAP = 50_000; + +export interface WebSearchToolDeps { + readonly client: FirecrawlClient; + readonly outputCap?: number; +} + +/** Dispatch validated args to the right client method and format the result. */ +async function runMode( + validated: ValidatedArgs, + client: FirecrawlClient, + signal: AbortSignal, +): Promise<string> { + switch (validated.mode) { + case "search": { + const hits = await client.search( + { + query: validated.query, + limit: validated.limit, + ...(validated.scrape + ? { scrapeOptions: { formats: ["markdown"], onlyMainContent: true } } + : {}), + ...(validated.lang !== undefined ? { lang: validated.lang } : {}), + ...(validated.country !== undefined ? { country: validated.country } : {}), + }, + signal, + ); + return formatSearchResults(hits); + } + case "scrape": { + const result = await client.scrape( + { url: validated.url, formats: [validated.format] }, + signal, + ); + return formatScrapeResult(result); + } + case "crawl": { + const pages = await client.crawl( + { url: validated.url, limit: validated.limit, formats: [validated.format] }, + signal, + ); + return formatCrawlResults(pages); + } + case "map": { + const links = await client.map(validated.url, signal); + return formatMapResults(links); + } + } +} + +/** + * Create the `web_search` tool. `concurrencySafe: true` — web search is + * idempotent and safe to run alongside other tools. The `network` capability + * is declared on the extension manifest (not the tool contract). + */ +export function createWebSearchTool(deps: WebSearchToolDeps): ToolContract { + const client = deps.client; + const cap = deps.outputCap ?? OUTPUT_CAP; + + return { + name: "web_search", + description: + "Access the web via a self-hosted Firecrawl instance. Supports search, " + + "single-page scrape, site crawling, and sitemap discovery.", + parameters: { + type: "object", + properties: { + query: { type: "string", description: "The search query (search mode)." }, + url: { type: "string", description: "A URL to scrape, crawl, or map." }, + mode: { + type: "string", + enum: ["search", "scrape", "crawl", "map"], + description: + "Operation mode. 'search' (default when query present), 'scrape' " + + "(default when url present), 'crawl' (recursively scrape pages from a site), " + + "'map' (discover URLs on a site).", + }, + limit: { + type: "number", + description: "Max results. Search: default 7, max 10. Crawl: default 3, max 10.", + }, + scrape: { + type: "boolean", + description: "When searching, also scrape full markdown content of each result page.", + }, + lang: { + type: "string", + description: 'Language code to filter search results (e.g. "en", "ja").', + }, + country: { + type: "string", + description: 'Country code to filter search results (e.g. "us", "jp").', + }, + format: { + type: "string", + enum: ["markdown", "text", "html"], + description: "Format for scrape/crawl output (default: markdown).", + }, + }, + }, + concurrencySafe: true, + async execute(args: unknown, ctx: ToolExecuteContext): Promise<ToolResult> { + const validated = validateArgs(args); + if ("error" in validated) { + return { content: validated.error, isError: true }; + } + const span = ctx.log.span("web_search.execute", { mode: validated.mode }); + try { + const output = await runMode(validated, client, ctx.signal); + span.end(); + return { content: truncateOutput(output, cap) }; + } catch (err: unknown) { + span.end({ err }); + return { + content: `Error: ${err instanceof Error ? err.message : String(err)}`, + isError: true, + }; + } + }, + }; +} diff --git a/packages/tool-web-search/src/validate.test.ts b/packages/tool-web-search/src/validate.test.ts new file mode 100644 index 0000000..30ae26c --- /dev/null +++ b/packages/tool-web-search/src/validate.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from "vitest"; +import { + type CrawlArgs, + type MapArgs, + type ScrapeArgs, + type SearchArgs, + validateArgs, +} from "./validate.js"; + +describe("validateArgs", () => { + it("mode defaults to search when query present", () => { + const result = validateArgs({ query: "hello" }); + expect("error" in result).toBe(false); + if ("error" in result) return; + expect(result.mode).toBe("search"); + expect((result as SearchArgs).query).toBe("hello"); + }); + + it("mode defaults to scrape when url present (no query)", () => { + const result = validateArgs({ url: "http://example.com" }); + expect("error" in result).toBe(false); + if ("error" in result) return; + expect(result.mode).toBe("scrape"); + expect((result as ScrapeArgs).url).toBe("http://example.com"); + }); + + it("explicit mode overrides defaults", () => { + const result = validateArgs({ query: "hello", url: "http://x", mode: "map" }); + expect("error" in result).toBe(false); + if ("error" in result) return; + expect(result.mode).toBe("map"); + expect((result as MapArgs).url).toBe("http://x"); + }); + + it("search mode requires query", () => { + const result = validateArgs({ mode: "search" }); + expect(result).toHaveProperty("error"); + }); + + it("scrape/crawl/map modes require url", () => { + expect(validateArgs({ mode: "scrape" })).toHaveProperty("error"); + expect(validateArgs({ mode: "crawl" })).toHaveProperty("error"); + expect(validateArgs({ mode: "map" })).toHaveProperty("error"); + }); + + it("limit clamped to max 10", () => { + const result = validateArgs({ query: "hello", limit: 50 }); + expect("error" in result).toBe(false); + if ("error" in result) return; + expect((result as SearchArgs).limit).toBe(10); + }); + + it("limit defaults to 7 (search) / 3 (crawl)", () => { + const search = validateArgs({ query: "hello" }); + expect("error" in search).toBe(false); + if ("error" in search) return; + expect((search as SearchArgs).limit).toBe(7); + + const crawl = validateArgs({ url: "http://x", mode: "crawl" }); + expect("error" in crawl).toBe(false); + if ("error" in crawl) return; + expect((crawl as CrawlArgs).limit).toBe(3); + }); + + it("format defaults to markdown", () => { + const result = validateArgs({ query: "hello" }); + expect("error" in result).toBe(false); + if ("error" in result) return; + expect(result.format).toBe("markdown"); + }); + + it("rejects invalid mode", () => { + const result = validateArgs({ mode: "invalid" }); + expect(result).toHaveProperty("error"); + if (!("error" in result)) return; + expect(result.error).toContain("Invalid mode"); + }); + + it("rejects invalid format", () => { + const result = validateArgs({ url: "http://x", format: "pdf" }); + expect(result).toHaveProperty("error"); + if (!("error" in result)) return; + expect(result.error).toContain("Invalid format"); + }); + + it("returns error for null/non-object args", () => { + expect(validateArgs(null)).toHaveProperty("error"); + expect(validateArgs(undefined)).toHaveProperty("error"); + expect(validateArgs("string")).toHaveProperty("error"); + expect(validateArgs(42)).toHaveProperty("error"); + }); +}); diff --git a/packages/tool-web-search/src/validate.ts b/packages/tool-web-search/src/validate.ts new file mode 100644 index 0000000..56bd356 --- /dev/null +++ b/packages/tool-web-search/src/validate.ts @@ -0,0 +1,212 @@ +/** + * Pure argument validation for the web_search tool — input → output, no I/O. + * + * Resolves the operation mode (explicit, or inferred from `query`/`url`), + * applies per-mode field requirements, clamps `limit`, and defaults `format`. + * Returns a discriminated union so the tool's dispatch narrows by `mode`. + */ + +export const MODES = ["search", "scrape", "crawl", "map"] as const; +export type Mode = (typeof MODES)[number]; + +export const FORMATS = ["markdown", "text", "html"] as const; +export type Format = (typeof FORMATS)[number]; + +export const SEARCH_DEFAULT_LIMIT = 7; +export const CRAWL_DEFAULT_LIMIT = 3; +export const MAX_LIMIT = 10; + +interface BaseArgs { + readonly format: Format; +} + +export interface SearchArgs extends BaseArgs { + readonly mode: "search"; + readonly query: string; + readonly limit: number; + readonly scrape: boolean; + readonly lang?: string; + readonly country?: string; +} + +export interface ScrapeArgs extends BaseArgs { + readonly mode: "scrape"; + readonly url: string; +} + +export interface CrawlArgs extends BaseArgs { + readonly mode: "crawl"; + readonly url: string; + readonly limit: number; +} + +export interface MapArgs extends BaseArgs { + readonly mode: "map"; + readonly url: string; +} + +export type ValidatedArgs = SearchArgs | ScrapeArgs | CrawlArgs | MapArgs; + +export type ValidationError = { readonly error: string }; + +type Result<T> = { readonly value: T } | ValidationError; + +function resolveFormat(raw: unknown): Result<Format> { + if (raw === undefined || raw === null) { + return { value: "markdown" }; + } + if (typeof raw === "string" && (FORMATS as readonly string[]).includes(raw)) { + return { value: raw as Format }; + } + return { + error: `Error: Invalid format "${String(raw)}" (must be one of: markdown, text, html).`, + }; +} + +function resolveMode(raw: unknown, query: unknown, url: unknown): Result<Mode> { + if (raw === undefined || raw === null) { + const hasQuery = typeof query === "string" && query.trim().length > 0; + const hasUrl = typeof url === "string" && url.trim().length > 0; + return { value: hasQuery ? "search" : hasUrl ? "scrape" : "search" }; + } + if (typeof raw === "string" && (MODES as readonly string[]).includes(raw)) { + return { value: raw as Mode }; + } + return { + error: `Error: Invalid mode "${String(raw)}" (must be one of: search, scrape, crawl, map).`, + }; +} + +function optionalString(raw: unknown, name: string): Result<string | undefined> { + if (raw === undefined || raw === null) { + return { value: undefined }; + } + if (typeof raw === "string") { + return { value: raw }; + } + return { error: `Error: "${name}" must be a string.` }; +} + +function resolveLimit(raw: unknown, defaultLimit: number): Result<number> { + if (raw === undefined || raw === null) { + return { value: defaultLimit }; + } + const n = Number(raw); + if (!Number.isFinite(n) || n < 1) { + return { error: 'Error: "limit" must be a positive number.' }; + } + return { value: Math.min(Math.floor(n), MAX_LIMIT) }; +} + +function resolveBoolean(raw: unknown, name: string): Result<boolean> { + if (raw === undefined || raw === null) { + return { value: false }; + } + if (typeof raw === "boolean") { + return { value: raw }; + } + return { error: `Error: "${name}" must be a boolean.` }; +} + +/** + * Validate raw tool args and resolve a typed, mode-aware `ValidatedArgs`. + * Returns `{ error }` for invalid input — the tool surfaces it verbatim. + */ +export function validateArgs(args: unknown): ValidatedArgs | ValidationError { + if (args === null || args === undefined || typeof args !== "object") { + return { error: "Error: Arguments must be an object." }; + } + const obj = args as Record<string, unknown>; + + const format = resolveFormat(obj.format); + if ("error" in format) { + return format; + } + + const mode = resolveMode(obj.mode, obj.query, obj.url); + if ("error" in mode) { + return mode; + } + + const query = optionalString(obj.query, "query"); + if ("error" in query) { + return query; + } + + const url = optionalString(obj.url, "url"); + if ("error" in url) { + return url; + } + + switch (mode.value) { + case "search": { + if (query.value === undefined || query.value.trim().length === 0) { + return { error: "Error: query is required for search mode." }; + } + const limit = resolveLimit(obj.limit, SEARCH_DEFAULT_LIMIT); + if ("error" in limit) { + return limit; + } + const scrape = resolveBoolean(obj.scrape, "scrape"); + if ("error" in scrape) { + return scrape; + } + const lang = optionalString(obj.lang, "lang"); + if ("error" in lang) { + return lang; + } + const country = optionalString(obj.country, "country"); + if ("error" in country) { + return country; + } + const result: SearchArgs = { + mode: "search", + query: query.value, + limit: limit.value, + scrape: scrape.value, + format: format.value, + ...(lang.value !== undefined ? { lang: lang.value } : {}), + ...(country.value !== undefined ? { country: country.value } : {}), + }; + return result; + } + case "scrape": { + if (url.value === undefined || url.value.trim().length === 0) { + return { error: "Error: url is required for scrape mode." }; + } + const result: ScrapeArgs = { + mode: "scrape", + url: url.value, + format: format.value, + }; + return result; + } + case "crawl": { + if (url.value === undefined || url.value.trim().length === 0) { + return { error: "Error: url is required for crawl mode." }; + } + const limit = resolveLimit(obj.limit, CRAWL_DEFAULT_LIMIT); + if ("error" in limit) { + return limit; + } + const result: CrawlArgs = { + mode: "crawl", + url: url.value, + limit: limit.value, + format: format.value, + }; + return result; + } + case "map": { + if (url.value === undefined || url.value.trim().length === 0) { + return { error: "Error: url is required for map mode." }; + } + const result: MapArgs = { + mode: "map", + url: url.value, + format: format.value, + }; + return result; + } + } +} diff --git a/packages/tool-web-search/tsconfig.json b/packages/tool-web-search/tsconfig.json new file mode 100644 index 0000000..ff99a43 --- /dev/null +++ b/packages/tool-web-search/tsconfig.json @@ -0,0 +1,6 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { "rootDir": "src", "outDir": "dist", "composite": true }, + "include": ["src/**/*.ts"], + "references": [{ "path": "../kernel" }] +} @@ -5,7 +5,7 @@ > Keep this lean and current; do not let it re-accrete a step-by-step changelog. ## Status (current) -`tsc -b` EXIT 0 · biome clean · **1059 vitest + 199 transport bun green**. +`tsc -b` EXIT 0 · biome clean · **1097 vitest + 199 transport bun green**. Built and verified live (full-fidelity: every feature is a manifest-loaded extension through the host): @@ -454,7 +454,19 @@ path**: first extract a generic `@dispatch/openai-stream` library from **Boot smoke:** without `UMANS_API_KEY` → `"provider-umans: no UMANS_API_KEY. Provider not registered."` (graceful skip); with `UMANS_API_KEY=sk-test` → `"provider-umans: registered (model=umans-coder)"`. -- [ ] Live-verify against the real Umans API (not yet exercised end-to-end). +- [x] **LIVE-VERIFIED against the real Umans API:** the dev stack (umans-glm-5.2) called + `web_search` (Firecrawl) in a real turn — first live Umans API call, clean response. + +## web_search tool — Firecrawl (DONE) +Standard tool extension `tool-web-search` backed by a self-hosted Firecrawl instance +(`http://100.102.55.49:31329/v1`, Tailscale, no API key). One tool `web_search` with 4 +modes: search, scrape, crawl (polls status URL), map — mirroring the proven opencode tool. +Pure core: `validateArgs` (discriminated union by mode) + `format*` functions + `truncateOutput`. +Injected edge: `FirecrawlClient` (injectable `fetchFn` + `sleep` + `now`), `AbortSignal.any` +for per-request timeout + caller cancellation. `concurrencySafe: true`, `capabilities: { network: true }`. +38 tests. Report: `reports/tool-web-search.md`. +- **LIVE-VERIFIED:** the dev stack (umans-glm-5.2) called `web_search` → Firecrawl returned + real results (Paris, France) — first live Umans API call too. ## Open items - **Context window LIMIT (deferred, sibling of context size):** expose the selected model's max @@ -507,8 +519,7 @@ path**: first extract a generic `@dispatch/openai-stream` library from 5. **`todo` tool** — a per-conversation task-list tool the model maintains (like opencode's todowrite/todoread), as a standard tool extension; likely a surface so the FE can render the live list. - 6. **`web_search` tool** — a web search tool (like old dispatch's; - reference-only source at `../dispatch-source`), as a standard tool extension. + 6. ~~**`web_search` tool**~~ — **DONE** (see milestone section above). 7. **Message queue — close-with-queued-messages (deferred product decision):** if a client closes a conversation (`POST /conversations/:id/close`) while the queue is non-empty, the carry currently still fires (starts a new turn on the diff --git a/tsconfig.json b/tsconfig.json index b227e92..d084acb 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -22,6 +22,7 @@ { "path": "./packages/tool-shell" }, { "path": "./packages/tool-edit-file" }, { "path": "./packages/tool-write-file" }, + { "path": "./packages/tool-web-search" }, { "path": "./packages/skills" }, { "path": "./packages/cache-warming" }, { "path": "./packages/message-queue" }, |
