summaryrefslogtreecommitdiffhomepage
path: root/packages/tool-web-search/src/client.test.ts
blob: f020a83c00e65c964380c75c91603a01060bc53b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import { describe, expect, it } from "vitest";
import { createFirecrawlClient, type FetchLike } from "./client.js";

function jsonResponse(body: unknown, status = 200): Response {
	return new Response(JSON.stringify(body), {
		status,
		headers: { "Content-Type": "application/json" },
	});
}

interface CapturedCall {
	url: string;
	method?: string | undefined;
	body?: string | undefined;
}

/** Builds a fake fetch that returns scripted responses in order, capturing each call. */
function makeFetch(responses: Response[]): { fetchFn: FetchLike; calls: CapturedCall[] } {
	const calls: CapturedCall[] = [];
	let i = 0;
	const fetchFn: FetchLike = (async (input: string | URL | Request, init?: RequestInit) => {
		const url =
			typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
		calls.push({
			url,
			method: init?.method,
			body: typeof init?.body === "string" ? init.body : undefined,
		});
		return responses[i++] ?? jsonResponse({});
	}) as unknown as FetchLike;
	return { fetchFn, calls };
}

const BASE = "http://test-firecrawl.local/v1";
const signal = (): AbortSignal => new AbortController().signal;

describe("createFirecrawlClient.search", () => {
	it("sends POST /search with correct body", async () => {
		const { fetchFn, calls } = makeFetch([jsonResponse({ success: true, data: [] })]);
		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
		await client.search({ query: "hello", limit: 7 }, signal());

		const call = calls[0];
		if (!call) throw new Error("no call captured");
		expect(call.url).toBe(`${BASE}/search`);
		expect(call.method).toBe("POST");
		expect(JSON.parse(call.body ?? "{}")).toEqual({ query: "hello", limit: 7 });
	});

	it("returns parsed data on success", async () => {
		const data = [{ title: "T", url: "http://x", description: "d" }];
		const { fetchFn } = makeFetch([jsonResponse({ success: true, data })]);
		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
		const result = await client.search({ query: "hello", limit: 7 }, signal());
		expect(result).toEqual(data);
	});

	it("throws on !success", async () => {
		const { fetchFn } = makeFetch([jsonResponse({ success: false, error: "boom" })]);
		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
		await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("boom");
	});
});

describe("createFirecrawlClient.scrape", () => {
	it("sends POST /scrape with correct body", async () => {
		const { fetchFn, calls } = makeFetch([
			jsonResponse({ success: true, data: { markdown: "md", metadata: { title: "T" } } }),
		]);
		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
		await client.scrape({ url: "http://x", formats: ["markdown"] }, signal());

		const call = calls[0];
		if (!call) throw new Error("no call captured");
		expect(call.url).toBe(`${BASE}/scrape`);
		expect(call.method).toBe("POST");
		expect(JSON.parse(call.body ?? "{}")).toEqual({
			url: "http://x",
			formats: ["markdown"],
			onlyMainContent: true,
		});
	});
});

describe("createFirecrawlClient.crawl", () => {
	it("polls status URL until completed", async () => {
		const { fetchFn, calls } = makeFetch([
			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
			jsonResponse({ status: "scraping" }),
			jsonResponse({
				status: "completed",
				data: [{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }],
			}),
		]);
		const client = createFirecrawlClient({
			baseUrl: BASE,
			fetchFn,
			sleep: async () => {},
		});
		const pages = await client.crawl(
			{ url: "http://site", limit: 3, formats: ["markdown"] },
			signal(),
		);
		expect(pages).toEqual([{ markdown: "p1", metadata: { title: "P1", sourceURL: "http://p1" } }]);
		expect(calls.length).toBe(3);
	});

	it("returns data when completed", async () => {
		const { fetchFn } = makeFetch([
			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
			jsonResponse({
				status: "completed",
				data: [{ markdown: "page", metadata: { title: "T" } }],
			}),
		]);
		const client = createFirecrawlClient({
			baseUrl: BASE,
			fetchFn,
			sleep: async () => {},
		});
		const pages = await client.crawl(
			{ url: "http://site", limit: 3, formats: ["markdown"] },
			signal(),
		);
		expect(pages.length).toBe(1);
		expect(pages[0]?.markdown).toBe("page");
	});

	it("throws when status is failed", async () => {
		const { fetchFn } = makeFetch([
			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
			jsonResponse({ status: "failed", error: "boom" }),
		]);
		const client = createFirecrawlClient({
			baseUrl: BASE,
			fetchFn,
			sleep: async () => {},
		});
		await expect(
			client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, signal()),
		).rejects.toThrow("failed");
	});

	it("respects abort signal (stops polling)", async () => {
		const controller = new AbortController();
		const { fetchFn, calls } = makeFetch([
			jsonResponse({ success: true, url: `${BASE}/crawl/status/123` }),
		]);
		const client = createFirecrawlClient({
			baseUrl: BASE,
			fetchFn,
			sleep: async (_ms, sig) => {
				controller.abort();
				if (sig.aborted) throw new Error("Request aborted.");
			},
		});
		await expect(
			client.crawl({ url: "http://site", limit: 3, formats: ["markdown"] }, controller.signal),
		).rejects.toThrow();
		expect(calls.length).toBe(1);
	});
});

describe("createFirecrawlClient.map", () => {
	it("sends POST /map and returns links", async () => {
		const { fetchFn, calls } = makeFetch([
			jsonResponse({ success: true, links: ["http://a", "http://b"] }),
		]);
		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
		const links = await client.map("http://site", signal());
		expect(links).toEqual(["http://a", "http://b"]);

		const call = calls[0];
		if (!call) throw new Error("no call captured");
		expect(call.url).toBe(`${BASE}/map`);
		expect(call.method).toBe("POST");
		expect(JSON.parse(call.body ?? "{}")).toEqual({ url: "http://site" });
	});
});

describe("createFirecrawlClient.request (error paths)", () => {
	it("throws on HTTP error", async () => {
		const { fetchFn } = makeFetch([
			new Response("not found", { status: 404, statusText: "Not Found" }),
		]);
		const client = createFirecrawlClient({ baseUrl: BASE, fetchFn });
		await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("HTTP 404");
	});

	it("throws on timeout", async () => {
		const fetchFn: FetchLike = ((_input: string | URL | Request, init?: RequestInit) =>
			new Promise<Response>((_resolve, reject) => {
				const sig = init?.signal;
				if (!sig) return;
				sig.addEventListener("abort", () => {
					const err = new Error("aborted");
					err.name = "AbortError";
					reject(err);
				});
			})) as unknown as FetchLike;
		const client = createFirecrawlClient({
			baseUrl: BASE,
			fetchFn,
			timeoutMs: 10,
		});
		await expect(client.search({ query: "x", limit: 7 }, signal())).rejects.toThrow("timed out");
	});
});