import { describe, expect, it, vi } from "vitest"; import { z } from "zod"; import type { AgentConfig, AgentEvent } from "../../src/types/index.js"; // Mock bun:sqlite to avoid Bun-only import in vitest/Node vi.mock("../../src/db/index.js", () => ({ getDatabase: vi.fn(() => ({})), })); // Mock the credentials module that depends on the DB vi.mock("../../src/credentials/claude.js", () => ({ buildBillingHeaderValue: vi.fn(() => ""), SYSTEM_IDENTITY: "You are a test agent.", })); // Mock the ai module's streamText vi.mock("ai", async () => { const actual = await import("ai"); return { ...actual, streamText: vi.fn(), }; }); // Mock the provider vi.mock("@ai-sdk/openai-compatible", () => ({ createOpenAICompatible: vi.fn(() => (_model: string) => ({ type: "language-model", modelId: _model, })), })); const { Agent, anthropicThinkingProviderOptions } = await import("../../src/agent/agent.js"); const { streamText } = await import("ai"); function makeConfig(overrides: Partial = {}): AgentConfig { return { model: "test-model", apiKey: "test-key", baseURL: "https://example.com/v1", systemPrompt: "You are a helpful assistant.", tools: [], workingDirectory: "/tmp", ...overrides, }; } async function* makeFullStream( events: Array<{ type: string; [key: string]: unknown }>, ): AsyncGenerator<{ type: string; [key: string]: unknown }> { for (const event of events) { yield event; } } function makeMockStreamResult(events: Array<{ type: string; [key: string]: unknown }>) { return { fullStream: makeFullStream(events), } as ReturnType; } // v6 finish event — only finishReason, rawFinishReason, totalUsage (no usage/providerMetadata/response) const finishStop = { type: "finish", finishReason: "stop", rawFinishReason: "stop", totalUsage: { inputTokens: 10, outputTokens: 5 }, }; const finishToolCalls = { type: "finish", finishReason: "tool-calls", rawFinishReason: "tool_use", totalUsage: { inputTokens: 10, outputTokens: 5 }, }; describe("Agent", () => { it("starts in idle status", () => { const agent = new Agent(makeConfig()); expect(agent.status).toBe("idle"); }); it("has empty messages initially", () => { const agent = new Agent(makeConfig()); expect(agent.messages).toHaveLength(0); }); it("yields running then idle status events around a simple message", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ // v6: text-delta uses `text` (not `textDelta`) { type: "text-delta", id: "t0", text: "Hello!" }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events = []; for await (const event of agent.run("hi")) { events.push(event); } const types = events.map((e) => e.type); expect(types[0]).toBe("status"); expect(events[0]).toMatchObject({ type: "status", status: "running" }); const lastStatusEvent = events.filter((e) => e.type === "status").at(-1); expect(lastStatusEvent).toMatchObject({ type: "status", status: "idle" }); }); it("yields text-delta events", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ // v6: text-delta uses `text` (not `textDelta`) { type: "text-delta", id: "t0", text: "Hello" }, { type: "text-delta", id: "t0", text: " world" }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events = []; for await (const event of agent.run("test")) { events.push(event); } const textDeltas = events.filter((e) => e.type === "text-delta"); expect(textDeltas).toHaveLength(2); expect(textDeltas[0]).toMatchObject({ delta: "Hello" }); expect(textDeltas[1]).toMatchObject({ delta: " world" }); }); it("adds user message and assistant message to history", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "Response" }, finishStop]), ); const agent = new Agent(makeConfig()); for await (const _ of agent.run("my question")) { // consume generator } expect(agent.messages).toHaveLength(2); expect(agent.messages[0]).toMatchObject({ role: "user", chunks: [{ type: "text", text: "my question" }], }); expect(agent.messages[1]).toMatchObject({ role: "assistant", chunks: [{ type: "text", text: "Response" }], }); }); it("yields done event with final message", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "Done!" }, finishStop]), ); const agent = new Agent(makeConfig()); const events = []; for await (const event of agent.run("test")) { events.push(event); } const doneEvent = events.find((e) => e.type === "done"); expect(doneEvent).toBeDefined(); expect(doneEvent).toMatchObject({ type: "done", message: { role: "assistant", chunks: [{ type: "text", text: "Done!" }] }, }); }); it("yields tool-call and tool-result events", async () => { // First call: LLM emits a tool-call // Second call (after tool execution): LLM emits text response with no tool calls vi.mocked(streamText) .mockReturnValueOnce( makeMockStreamResult([ { type: "tool-call", toolCallId: "tc1", toolName: "read_file", // v6: `input` replaces `args` input: { path: "hello.txt" }, }, finishToolCalls, ]), ) .mockReturnValueOnce( makeMockStreamResult([ { type: "text-delta", id: "t0", text: "Here is the file." }, finishStop, ]), ); const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async (_args: Record) => "file contents", }; const agent = new Agent(makeConfig({ tools: [toolDef] })); const events = []; for await (const event of agent.run("read the file")) { events.push(event); } const toolCallEvent = events.find((e) => e.type === "tool-call"); expect(toolCallEvent).toMatchObject({ type: "tool-call", toolCall: { id: "tc1", name: "read_file" }, }); const toolResultEvent = events.find((e) => e.type === "tool-result"); expect(toolResultEvent).toMatchObject({ type: "tool-result", toolResult: { toolCallId: "tc1", result: "file contents" }, }); }); it("does NOT swallow trailing queued messages into history at turn end", async () => { // Regression for the "queue not consumed after the turn ends" bug. A // message that lands on the queue after the last tool call (here: a // no-tool turn) must be LEFT on the queue for the orchestrator to start // a new turn — not silently appended to history with no response. vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "done" }, finishStop]), ); const queue = [{ id: "q1", message: "answer me next", timestamp: 1 }]; const dequeueMessages = vi.fn(() => queue.splice(0, queue.length)); const agent = new Agent(makeConfig(), { dequeueMessages, waitForQueuedMessage: () => ({ promise: Promise.resolve(), cancel: () => {} }), }); const before = agent.messages.length; for await (const _ of agent.run("hello")) { // consume } // The agent appended exactly the user turn + its own assistant reply; // it did NOT drain the queue or append a trailing user message for it. expect(dequeueMessages).not.toHaveBeenCalled(); expect(queue).toHaveLength(1); const added = agent.messages.slice(before); expect(added.map((m) => m.role)).toEqual(["user", "assistant"]); expect( added.some((m) => m.chunks.some((c) => c.type === "text" && c.text === "answer me next")), ).toBe(false); }); it("still injects a mid-turn queued message into the last tool result", async () => { // The interrupt path (site 1) must be untouched by the turn-end fix: a // message present DURING a tool batch is folded into that batch's last // tool result as a [USER INTERRUPT], and the agent loops back to the LLM. vi.mocked(streamText) .mockReturnValueOnce( makeMockStreamResult([ { type: "tool-call", toolCallId: "tc1", toolName: "read_file", input: { path: "a.txt" } }, finishToolCalls, ]), ) .mockReturnValueOnce( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); const queue = [{ id: "q1", message: "stop and do X", timestamp: 1 }]; const dequeueMessages = vi.fn(() => queue.splice(0, queue.length)); const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async () => "file contents", }; const agent = new Agent(makeConfig({ tools: [toolDef] }), { dequeueMessages, waitForQueuedMessage: () => ({ promise: Promise.resolve(), cancel: () => {} }), }); const events: AgentEvent[] = []; for await (const event of agent.run("read it")) { events.push(event); } expect(dequeueMessages).toHaveBeenCalled(); const toolResult = events.find((e) => e.type === "tool-result") as | (AgentEvent & { toolResult: { result: string } }) | undefined; expect(toolResult?.toolResult.result).toContain("[USER INTERRUPT]"); expect(toolResult?.toolResult.result).toContain("stop and do X"); }); it("yields reasoning-delta events", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ // v6: reasoning-delta uses `text` (not `textDelta`) { type: "reasoning-delta", id: "r0", text: "thinking about this..." }, { type: "reasoning-delta", id: "r0", text: " more thoughts" }, { type: "text-delta", id: "t0", text: "Answer" }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events = []; for await (const event of agent.run("think")) { events.push(event); } const reasoningDeltas = events.filter((e) => e.type === "reasoning-delta"); expect(reasoningDeltas).toHaveLength(2); expect(reasoningDeltas[0]).toMatchObject({ delta: "thinking about this..." }); expect(reasoningDeltas[1]).toMatchObject({ delta: " more thoughts" }); }); it("yields reasoning-end event when providerMetadata is present", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ { type: "reasoning-delta", id: "r0", text: "some reasoning" }, { type: "reasoning-end", id: "r0", providerMetadata: { anthropic: { signature: "sig-1" } }, }, { type: "text-delta", id: "t0", text: "Answer" }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events = []; for await (const event of agent.run("think")) { events.push(event); } const reasoningEndEvent = events.find((e) => e.type === "reasoning-end"); expect(reasoningEndEvent).toBeDefined(); expect(reasoningEndEvent).toMatchObject({ type: "reasoning-end", metadata: { anthropic: { signature: "sig-1" } }, }); }); it("does NOT yield reasoning-end event when providerMetadata is absent", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ { type: "reasoning-delta", id: "r0", text: "some reasoning" }, { type: "reasoning-end", id: "r0", // No providerMetadata — non-Anthropic model }, { type: "text-delta", id: "t0", text: "Answer" }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events = []; for await (const event of agent.run("think")) { events.push(event); } const reasoningEndEvent = events.find((e) => e.type === "reasoning-end"); expect(reasoningEndEvent).toBeUndefined(); }); // ─── New v6 round-trip tests ────────────────────────────────────────────── it("signed thinking round-trip: ThinkingChunk.metadata → ReasoningPart.providerOptions", async () => { // Pre-seed the agent with a prior assistant message containing a ThinkingChunk // with metadata (the Anthropic signature blob). // Anthropic-path provider — for openai-compatible the metadata // would be lifted into providerOptions.openaiCompatible instead; // that path is covered by the DeepSeek tests further down. const agent = new Agent(makeConfig({ provider: "opencode-anthropic" })); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "prior user message" }], }); agent.messages.push({ role: "assistant", chunks: [ { type: "thinking", text: "I thought about it", metadata: { anthropic: { signature: "S" } }, }, { type: "text", text: "prior response" }, ], }); // Next turn: just return a simple text response vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "New answer" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { // consume } // Inspect the messages passed to streamText in this (last) call const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; expect(callArgs).toBeDefined(); const messages = callArgs?.messages as Array<{ role: string; content: unknown; }>; // Find the assistant message in the rebuilt ModelMessage[] const assistantMsg = messages.find((m) => m.role === "assistant"); expect(assistantMsg).toBeDefined(); const content = assistantMsg?.content as Array>; const reasoningPart = content.find((p) => p.type === "reasoning"); expect(reasoningPart).toBeDefined(); expect(reasoningPart).toMatchObject({ type: "reasoning", text: "I thought about it", providerOptions: { anthropic: { signature: "S" } }, }); }); it("tool-call input round-trip: ToolBatchEntry.arguments → ToolCallPart.input (not args)", async () => { // Pre-seed the agent with a prior assistant message containing a tool-batch chunk const agent = new Agent(makeConfig()); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "run a tool" }], }); agent.messages.push({ role: "assistant", chunks: [ { type: "tool-batch", calls: [ { id: "call-1", name: "read_file", arguments: { path: "/foo/bar.txt" }, result: "file contents", }, ], }, ], }); // Next turn: just return a simple text response vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "Done" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; // The assistant message should contain a tool-call part with `input` (not `args`) const assistantMsg = messages.find((m) => m.role === "assistant"); expect(assistantMsg).toBeDefined(); const content = assistantMsg?.content as Array>; const toolCallPart = content.find((p) => p.type === "tool-call"); expect(toolCallPart).toBeDefined(); expect(toolCallPart).toMatchObject({ type: "tool-call", toolCallId: "call-1", toolName: "read_file", input: { path: "/foo/bar.txt" }, // v6: input not args }); // Explicitly assert `args` is NOT present expect(toolCallPart).not.toHaveProperty("args"); }); it("tool-result output round-trip: result string → { type: 'text', value } ToolResultOutput", async () => { // Pre-seed the agent with a prior assistant message containing a tool-batch chunk const agent = new Agent(makeConfig()); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "run a tool" }], }); agent.messages.push({ role: "assistant", chunks: [ { type: "tool-batch", calls: [ { id: "call-2", name: "read_file", arguments: { path: "/foo/baz.txt" }, result: "the file content here", }, ], }, ], }); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "Done" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; // The tool message should contain a tool-result part with `output` (ToolResultOutput) const toolMsg = messages.find((m) => m.role === "tool"); expect(toolMsg).toBeDefined(); const toolContent = toolMsg?.content as Array>; expect(toolContent[0]).toMatchObject({ type: "tool-result", toolCallId: "call-2", toolName: "read_file", output: { type: "text", value: "the file content here" }, }); // Explicitly assert `result` (v4 raw string) is NOT present expect(toolContent[0]).not.toHaveProperty("result"); }); it("per-step segmentation: a [tool-batch, text] turn becomes [assistant(tool-call), tool(result), assistant(text)]", async () => { // `toModelMessages` segments a turn at each tool-batch boundary, so the // tool-batch (step 0) and the trailing text (step 1) land in SEPARATE // assistant messages — never a single invalid [tool_use, text] block. // This is the cache-stability fix and is applied for every provider. const agent = new Agent(makeConfig({ provider: "opencode-anthropic" })); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "run a tool and explain" }], }); agent.messages.push({ role: "assistant", chunks: [ // Note: tool-batch appears BEFORE text in chunks — this is the // problematic ordering that Anthropic rejects { type: "tool-batch", calls: [ { id: "call-3", name: "read_file", arguments: { path: "/tmp/x.txt" }, result: "x contents", }, ], }, { type: "text", text: "Here is my explanation." }, ], }); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; // No assistant message may mix tool-call and non-tool-call parts (the // invalid shape Anthropic rejects); segmentation guarantees this. const assistantMsgs = messages.filter((m) => m.role === "assistant"); for (const m of assistantMsgs) { const c = m.content as Array>; if (!Array.isArray(c)) continue; const hasToolCall = c.some((p) => p.type === "tool-call"); const hasNonToolCall = c.some((p) => p.type !== "tool-call"); expect(hasToolCall && hasNonToolCall).toBe(false); } // The seeded turn yields a tool-call assistant message immediately // followed by its tool-result message (valid tool_use → tool_result). const toolOnlyIdx = messages.findIndex((m) => { const c = m.content as Array>; return m.role === "assistant" && Array.isArray(c) && c.some((p) => p.type === "tool-call"); }); expect(toolOnlyIdx).toBeGreaterThanOrEqual(0); expect(messages[toolOnlyIdx + 1]?.role).toBe("tool"); }); it("per-step segmentation also applies to the openai-compatible provider", async () => { // Segmentation is provider-agnostic: a [tool-batch, text] turn is split // into separate assistant messages for openai-compatible too, with the // tool result in its own tool message (the standard OpenAI shape). const agent = new Agent(makeConfig()); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "run a tool and explain" }], }); agent.messages.push({ role: "assistant", chunks: [ { type: "tool-batch", calls: [ { id: "call-4", name: "read_file", arguments: { path: "/tmp/y.txt" }, result: "y contents", }, ], }, { type: "text", text: "Here is my explanation." }, ], }); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; // The seeded [tool-batch, text] turn is segmented: a tool-call-only // assistant message, its tool message, and a separate text assistant // message (the new turn's "ok" reply adds one more). No assistant // message mixes tool-call and non-tool-call parts. const assistantMsgs = messages.filter((m) => m.role === "assistant"); for (const m of assistantMsgs) { const c = m.content as Array>; if (!Array.isArray(c)) continue; expect(c.some((p) => p.type === "tool-call") && c.some((p) => p.type !== "tool-call")).toBe( false, ); } expect(messages.some((m) => m.role === "tool")).toBe(true); const toolCallMsg = assistantMsgs.find((m) => { const c = m.content as Array>; return Array.isArray(c) && c.some((p) => p.type === "tool-call"); }); expect(toolCallMsg).toBeDefined(); }); it("empty-text-part filter (Anthropic): empty text chunk is not sent", async () => { // Pre-seed an assistant message where a text chunk has empty text. const agent = new Agent(makeConfig({ provider: "opencode-anthropic" })); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "hello" }], }); agent.messages.push({ role: "assistant", chunks: [ { type: "text", text: "" }, // empty text — should be filtered out { type: "text", text: "non-empty response" }, ], }); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const assistantMsg = messages.find((m) => m.role === "assistant"); expect(assistantMsg).toBeDefined(); const content = assistantMsg?.content as Array>; // Empty text part should have been filtered out const emptyTextParts = content.filter((p) => p.type === "text" && p.text === ""); expect(emptyTextParts).toHaveLength(0); // The non-empty text part should still be there const nonEmptyTextParts = content.filter((p) => p.type === "text" && p.text !== ""); expect(nonEmptyTextParts).toHaveLength(1); expect(nonEmptyTextParts[0]).toMatchObject({ text: "non-empty response" }); }); it("empty-reasoning-part filter (Anthropic): empty reasoning chunk is not sent", async () => { // Anthropic's adaptive thinking mode occasionally produces a signed- // but-empty thinking block. We persist it (for signature round-trip // fidelity) but strip the empty `reasoning` part before sending it // back, or Anthropic rejects with "thinking block must have content". const agent = new Agent(makeConfig({ provider: "opencode-anthropic" })); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "hi" }] }); agent.messages.push({ role: "assistant", chunks: [ // Signed-but-empty thinking block { type: "thinking", text: "", metadata: { anthropic: { signature: "sig-empty" } } }, { type: "text", text: "answer" }, ], }); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const assistantMsg = messages.find((m) => m.role === "assistant"); const content = assistantMsg?.content as Array>; // Empty reasoning part must have been filtered out by the // Anthropic structural normalisation pass. const emptyReasoning = content.filter((p) => p.type === "reasoning" && p.text === ""); expect(emptyReasoning).toHaveLength(0); // The text part should still be there expect(content.some((p) => p.type === "text" && p.text === "answer")).toBe(true); }); it("toolCallId scrubbing (Anthropic): non-[a-zA-Z0-9_-] chars in tool IDs are sanitised", async () => { // Anthropic rejects toolCallId outside [a-zA-Z0-9_-]. Our internal // crypto.randomUUID IDs are safe, but defensively scrub for any // upstream-assigned IDs (subagent retrieval, provider-executed // tools, MCP, etc.). Mirrors opencode transform.ts:96-122. const agent = new Agent(makeConfig({ provider: "opencode-anthropic" })); agent.messages.push({ role: "user", chunks: [{ type: "text", text: "do the thing" }] }); agent.messages.push({ role: "assistant", chunks: [ { type: "tool-batch", calls: [ { id: "call.with/dots:and:slashes", // invalid chars name: "fake_tool", arguments: { x: 1 }, result: "ok", isError: false, }, ], }, ], }); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; // Assistant tool-call part must have scrubbed ID const assistantMsg = messages.find((m) => m.role === "assistant"); const assistantContent = assistantMsg?.content as Array>; const toolCallPart = assistantContent.find((p) => p.type === "tool-call"); expect(toolCallPart).toBeDefined(); expect(toolCallPart?.toolCallId).toBe("call_with_dots_and_slashes"); // Matching tool-result message must use the SAME scrubbed ID // so Anthropic can pair them. const toolMsg = messages.find((m) => m.role === "tool"); const toolContent = toolMsg?.content as Array>; expect(toolContent?.[0]?.toolCallId).toBe("call_with_dots_and_slashes"); }); it("reasoning metadata captured from stream is round-tripped on the next turn", async () => { // End-to-end integrity for the providerMetadata round-trip — the // bug that prompted the entire migration. Stream a turn that // emits reasoning-delta + reasoning-end with metadata, then run // ANOTHER turn and verify the metadata reaches the model via // ReasoningPart.providerOptions. const agent = new Agent(makeConfig({ provider: "opencode-anthropic" })); const sig = { anthropic: { signature: "round-trip-sig-1" } }; // Turn 1: model emits reasoning + signed reasoning-end vi.mocked(streamText).mockReturnValueOnce( makeMockStreamResult([ { type: "reasoning-delta", id: "r0", text: "let me think" }, { type: "reasoning-end", id: "r0", providerMetadata: sig }, { type: "text-delta", id: "t0", text: "answer" }, finishStop, ]), ); for await (const _ of agent.run("first question")) { /* consume */ } // After turn 1, the persisted chunks should include a ThinkingChunk // with the captured metadata. The agent's messages array IS the // canonical persisted shape (the DB just JSON-stringifies it). const turn1Assistant = agent.messages.find( (m, i) => m.role === "assistant" && i === agent.messages.length - 1, ); expect(turn1Assistant).toBeDefined(); const thinkingChunk = turn1Assistant?.chunks.find((c) => c.type === "thinking"); expect(thinkingChunk).toBeDefined(); expect(thinkingChunk).toMatchObject({ text: "let me think", metadata: sig }); // Turn 2: drive another turn, capture what streamText receives. vi.mocked(streamText).mockReturnValueOnce( makeMockStreamResult([ { type: "text-delta", id: "t1", text: "follow-up answer" }, finishStop, ]), ); for await (const _ of agent.run("second question")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const turn2Assistant = messages .filter((m) => m.role === "assistant") .find((m) => Array.isArray(m.content)); const turn2Content = turn2Assistant?.content as Array>; const reasoningPart = turn2Content.find((p) => p.type === "reasoning"); expect(reasoningPart).toBeDefined(); expect(reasoningPart).toMatchObject({ type: "reasoning", text: "let me think", providerOptions: sig, }); }); it("tool-error stream event yields a synthetic tool-result + error chunk and continues the turn", async () => { // Provider-executed tools (Anthropic server tools) bypass our // manual executor and surface as a `tool-error` stream event. // We must: // 1. Synthesize a tool-result with isError=true so the chunks // reflect that the tool ran and failed — this keeps the // tool-call/tool-result pairing complete and avoids the AI SDK // throwing MissingToolResultsError on the next round-trip. // 2. Emit an error chunk so the UI shows the failure. // 3. NOT transition to "error" status — the step breaks out of the // stream loop and the turn ends normally (here, with no further // tool calls pending, the agent completes to idle). vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ { type: "tool-error", toolCallId: "tc_server", toolName: "server_tool", error: new Error("upstream tool failure"), }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events: AgentEvent[] = []; for await (const event of agent.run("trigger")) { events.push(event); } // Synthetic tool-result with the upstream error const trEvent = events.find((e) => e.type === "tool-result"); expect(trEvent).toBeDefined(); expect(trEvent).toMatchObject({ type: "tool-result", toolResult: { toolCallId: "tc_server", isError: true }, }); // Error chunk for visibility const errEvent = events.find((e) => e.type === "error"); expect(errEvent).toBeDefined(); const errMsg = errEvent && "error" in errEvent ? errEvent.error : ""; expect(typeof errMsg).toBe("string"); expect((errMsg as string).includes("upstream tool failure")).toBe(true); // Status does NOT transition to error — the turn completes to idle. const lastStatus = events.filter((e) => e.type === "status").at(-1); expect(lastStatus).toMatchObject({ type: "status", status: "idle" }); // The turn produced a `done` event (it did not abort). expect(events.some((e) => e.type === "done")).toBe(true); }); it("tool-error leaves sibling tool calls to be resolved by the executor (not orphaned)", async () => { // When one tool in a batch errors, its siblings — whose tool-call // events were already yielded — must still receive a result, otherwise // the tool-call IDs are orphaned in the chunks (no matching result) // and the next LLM round-trip throws MissingToolResultsError. The // tool-error handler breaks out of the stream loop WITHOUT executing // the unresolved siblings inline; the normal manual-executor pass then // runs them. Here `sibling_tool` is not a registered tool, so the // executor returns an "Unknown tool" error result — completing the // tool-call/tool-result pairing with `isError: true`. vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ { type: "tool-call", toolCallId: "tc_sibling", toolName: "sibling_tool", input: {}, }, { type: "tool-error", toolCallId: "tc_failed", toolName: "failed_tool", error: new Error("boom"), }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events: AgentEvent[] = []; for await (const event of agent.run("trigger")) { events.push(event); } const toolResults = events.filter((e) => e.type === "tool-result"); // One for the failed tool, one for the sibling resolved by the executor. const siblingResult = toolResults.find( (e) => "toolResult" in e && e.toolResult.toolCallId === "tc_sibling", ); expect(siblingResult).toBeDefined(); expect(siblingResult).toMatchObject({ type: "tool-result", toolResult: { toolCallId: "tc_sibling", isError: true }, }); const siblingMsg = siblingResult && "toolResult" in siblingResult ? siblingResult.toolResult.result : ""; expect((siblingMsg as string).includes("sibling_tool")).toBe(true); // Status completes to idle (the turn continued, not aborted). const lastStatus = events.filter((e) => e.type === "status").at(-1); expect(lastStatus).toMatchObject({ type: "status", status: "idle" }); }); it("abort stream event surfaces as an error event and stops the turn", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ { type: "text-delta", id: "t0", text: "starting..." }, { type: "abort", reason: "user cancelled" }, ]), ); const agent = new Agent(makeConfig()); const events: AgentEvent[] = []; for await (const event of agent.run("hi")) { events.push(event); } const errEvent = events.find((e) => e.type === "error"); expect(errEvent).toBeDefined(); const errMsg = errEvent && "error" in errEvent ? errEvent.error : ""; expect( (errMsg as string).toLowerCase().includes("aborted") || (errMsg as string).includes("user cancelled"), ).toBe(true); const lastStatus = events.filter((e) => e.type === "status").at(-1); expect(lastStatus).toMatchObject({ type: "status", status: "error" }); }); it("openai-compatible reasoning round-trip: ThinkingChunk -> providerOptions.openaiCompatible.reasoning_content (DeepSeek scenario)", async () => { // Reproducer for the "reasoning_content must be passed back" error // from DeepSeek via OpenCode Go. // // applyOpenAICompatibleReasoningNormalisation strips the // `{ type: "reasoning", text }` parts and lifts the concatenated // text into `providerOptions.openaiCompatible.reasoning_content`. // The v6 SDK provider serializes the message-level // `providerOptions.openaiCompatible.*` into the wire `assistant` // message via its `metadata` spread (line 247 of the SDK dist). // This route emits `reasoning_content` regardless of empty/non- // empty text — which is what DeepSeek requires. const agent = new Agent( makeConfig({ model: "deepseek-v4-pro", // no provider field → default openai-compatible path }), ); agent.messages.push( { role: "user", chunks: [{ type: "text", text: "ping" }] }, { role: "assistant", chunks: [ { type: "thinking", text: "let me reason about this" }, { type: "text", text: "ok done" }, ], }, ); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown; providerOptions?: { openaiCompatible?: { reasoning_content?: string } }; }>; const assistantMsg = messages.find((m) => m.role === "assistant"); if (!assistantMsg || !Array.isArray(assistantMsg.content)) { throw new Error("expected structured assistant content"); } const content = assistantMsg.content as Array>; // Reasoning parts have been stripped from content (lifted into // providerOptions instead). expect(content.find((p) => p.type === "reasoning")).toBeUndefined(); // reasoning_content is set on providerOptions.openaiCompatible. // This is what reaches DeepSeek and prevents the rejection. expect(assistantMsg.providerOptions?.openaiCompatible?.reasoning_content).toBe( "let me reason about this", ); // The text part still survives in content. const textPart = content.find((p) => p.type === "text"); expect(textPart).toMatchObject({ type: "text", text: "ok done" }); // And critically, the message must NOT carry a providerMetadata // key (the v4-era misnamed key). v3 prompts use `providerOptions`. expect((assistantMsg as Record).providerMetadata).toBeUndefined(); }); it("openai-compatible empty-reasoning edge case: forces reasoning_content='' so DeepSeek does not reject", async () => { // DeepSeek will reject the follow-up turn with "must be passed // back" if a prior assistant turn emitted reasoning AND the // follow-up doesn't include `reasoning_content` (even empty). // The v6 SDK's content-side path skips emission when reasoning // is empty (see `dist/index.mjs:245`); our normalisation routes // it via providerOptions instead, which fires unconditionally. const agent = new Agent(makeConfig({ model: "deepseek-v4-pro" })); agent.messages.push( { role: "user", chunks: [{ type: "text", text: "ping" }] }, { role: "assistant", chunks: [ // Empty thinking — captured but produced no actual text. { type: "thinking", text: "" }, { type: "text", text: "answer" }, ], }, ); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("follow-up")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown; providerOptions?: { openaiCompatible?: { reasoning_content?: string } }; }>; const assistantMsg = messages.find((m) => m.role === "assistant"); if (!assistantMsg) throw new Error("expected assistant message"); // The empty-string reasoning_content is explicitly set on // providerOptions. (`""` is intentional and required — // `assistantMsg.providerOptions?.openaiCompatible?.reasoning_content` // must not be `undefined`.) const rc = assistantMsg.providerOptions?.openaiCompatible?.reasoning_content; expect(rc).toBeDefined(); expect(rc).toBe(""); }); it("openai-compatible normalisation does NOT run for messages without any reasoning parts", async () => { // DeepSeek only requires `reasoning_content` AFTER a thinking // turn. For purely-text assistant messages, we should leave // providerOptions alone. const agent = new Agent(makeConfig({ model: "deepseek-v4-pro" })); agent.messages.push( { role: "user", chunks: [{ type: "text", text: "hi" }] }, { role: "assistant", chunks: [{ type: "text", text: "hello back" }], }, ); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); for await (const _ of agent.run("again")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; providerOptions?: { openaiCompatible?: { reasoning_content?: string } }; }>; const assistantMsg = messages.find((m) => m.role === "assistant"); // No reasoning chunks → no providerOptions injection. (May still // be undefined entirely if nothing else set it.) const rc = assistantMsg?.providerOptions?.openaiCompatible?.reasoning_content; expect(rc).toBeUndefined(); }); // ─── Prompt-caching: tool-result grouping & breakpoints (notes/claude-report.md) ── it("groups a turn's tool results into a SINGLE role:'tool' message (Root Cause 2)", async () => { // The agent batches three distinct read_file calls in one step. The // rebuilt ModelMessage[] must contain exactly ONE `role: "tool"` message // holding all three results (not three separate tool messages). Per- // result messages would strand the rolling cache breakpoints on the last // two adjacent tool results, wasting a breakpoint. const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async (args: Record) => `contents of ${String(args.path)}`, }; vi.mocked(streamText) .mockReturnValueOnce( makeMockStreamResult([ { type: "tool-call", toolCallId: "b1", toolName: "read_file", input: { path: "a.txt" } }, { type: "tool-call", toolCallId: "b2", toolName: "read_file", input: { path: "b.txt" } }, { type: "tool-call", toolCallId: "b3", toolName: "read_file", input: { path: "c.txt" } }, finishToolCalls, ]), ) .mockReturnValueOnce( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "done" }, finishStop]), ); const agent = new Agent(makeConfig({ provider: "opencode-anthropic", tools: [toolDef] })); for await (const _ of agent.run("read three files")) { /* consume */ } // Inspect the step-1 request (sent after the batch executed) — its tail // is the assistant tool-calls + the grouped tool results. const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const toolMsgs = messages.filter((m) => m.role === "tool"); expect(toolMsgs).toHaveLength(1); const toolContent = toolMsgs[0]?.content as Array>; expect(toolContent).toHaveLength(3); expect(toolContent.every((p) => p.type === "tool-result")).toBe(true); // IDs preserved per result. expect(toolContent.map((p) => p.toolCallId)).toEqual(["b1", "b2", "b3"]); }); it("places cache breakpoints on [assistant, grouped-tool], not adjacent tool results (Root Cause 2)", async () => { // With grouping, the last two non-system messages of a mid-turn request // are [assistant(tool-calls), tool(all results)]. Both — plus the system // message — must carry an ephemeral cacheControl marker. The pre-fix bug // put both rolling breakpoints on two adjacent tool-result messages and // never marked the assistant turn. const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async (args: Record) => `contents of ${String(args.path)}`, }; vi.mocked(streamText) .mockReturnValueOnce( makeMockStreamResult([ { type: "tool-call", toolCallId: "c1", toolName: "read_file", input: { path: "a.txt" } }, { type: "tool-call", toolCallId: "c2", toolName: "read_file", input: { path: "b.txt" } }, { type: "tool-call", toolCallId: "c3", toolName: "read_file", input: { path: "c.txt" } }, finishToolCalls, ]), ) .mockReturnValueOnce( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "done" }, finishStop]), ); const agent = new Agent(makeConfig({ provider: "opencode-anthropic", tools: [toolDef] })); for await (const _ of agent.run("read three files")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown; providerOptions?: { anthropic?: { cacheControl?: { type?: string } } }; }>; const isCached = (m?: { providerOptions?: { anthropic?: { cacheControl?: { type?: string } } }; }) => m?.providerOptions?.anthropic?.cacheControl?.type === "ephemeral"; // Exactly one tool message — no adjacent tool-result breakpoints. expect(messages.filter((m) => m.role === "tool")).toHaveLength(1); const systemMsg = messages.find((m) => m.role === "system"); const assistantMsg = messages.find((m) => m.role === "assistant"); const toolMsg = messages.find((m) => m.role === "tool"); expect(isCached(systemMsg)).toBe(true); expect(isCached(assistantMsg)).toBe(true); expect(isCached(toolMsg)).toBe(true); }); it("does NOT attach cacheControl for the openai-compatible (non-Anthropic) path", async () => { // Sanity check: caching markers are Anthropic-only. The OpenAI-compatible // endpoints do automatic server-side prefix caching and reject explicit // cache_control, so no marker should be attached. vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "hi" }, finishStop]), ); const agent = new Agent(makeConfig()); // default → openai-compatible for await (const _ of agent.run("hello")) { /* consume */ } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; providerOptions?: { anthropic?: { cacheControl?: unknown } }; }>; for (const m of messages) { expect(m.providerOptions?.anthropic?.cacheControl).toBeUndefined(); } }); // ─── Tool-call dedup (notes/tool-runner-duplication-incident.md) ───────────────── it("deduplicates byte-identical tool calls within a single batch", async () => { // Claude can degenerate and emit the same tool call (same name + args) // many times in one batch. Each copy keeps its own id (and still gets its // own result), but the tool must execute only ONCE — re-running identical // idempotent reads wastes time/money and floods the context. let execCount = 0; const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async (args: Record) => { execCount++; return `contents of ${String(args.path)}`; }, }; vi.mocked(streamText) .mockReturnValueOnce( makeMockStreamResult([ { type: "tool-call", toolCallId: "d1", toolName: "read_file", input: { path: "package.json" }, }, { type: "tool-call", toolCallId: "d2", toolName: "read_file", input: { path: "package.json" }, }, { type: "tool-call", toolCallId: "d3", toolName: "read_file", input: { path: "package.json" }, }, finishToolCalls, ]), ) .mockReturnValueOnce( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "done" }, finishStop]), ); const agent = new Agent(makeConfig({ tools: [toolDef] })); const events: AgentEvent[] = []; for await (const e of agent.run("read it thrice")) { events.push(e); } // Executed exactly once despite three identical calls. expect(execCount).toBe(1); // Every call id still received its own result, all with identical content. const results = events.filter( (e): e is Extract => e.type === "tool-result", ); expect(results).toHaveLength(3); expect(results.map((e) => e.toolResult.toolCallId).sort()).toEqual(["d1", "d2", "d3"]); for (const r of results) { expect(r.toolResult.result).toBe("contents of package.json"); } }); it("does NOT deduplicate tool calls with differing arguments", async () => { // Dedup is keyed on name + serialized arguments. Distinct args must each // execute — only byte-identical calls collapse. let execCount = 0; const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async (args: Record) => { execCount++; return `contents of ${String(args.path)}`; }, }; vi.mocked(streamText) .mockReturnValueOnce( makeMockStreamResult([ { type: "tool-call", toolCallId: "e1", toolName: "read_file", input: { path: "a.txt" } }, { type: "tool-call", toolCallId: "e2", toolName: "read_file", input: { path: "b.txt" } }, { type: "tool-call", toolCallId: "e3", toolName: "read_file", input: { path: "a.txt" } }, finishToolCalls, ]), ) .mockReturnValueOnce( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "done" }, finishStop]), ); const agent = new Agent(makeConfig({ tools: [toolDef] })); for await (const _ of agent.run("read a, b, a")) { /* consume */ } // a.txt + b.txt are distinct → two executions; the repeated a.txt reuses // the first result. expect(execCount).toBe(2); }); // ─── Cache stability: per-step wire prefix is immutable ───────────────────── it("keeps earlier steps' wire messages byte-identical across requests (cache prefix is stable)", async () => { // A 3-step tool turn. The messages for steps 0 and 1 must serialize // identically in the step-2 request and the step-3 request — that // byte-stability is what lets Anthropic's rolling prompt cache extend // instead of re-writing the whole prefix every step (notes/cache-miss-report.md). // Uses the openai-compatible provider so no cacheControl markers (which // intentionally move each step) obscure the content comparison. let n = 0; // mock.calls accumulates across tests in this file — reset so our // `calls.length` assertions count only this run's requests. vi.mocked(streamText).mockClear(); const toolDef = { name: "read_file", description: "reads a file", parameters: z.object({ path: z.string() }), execute: async (args: Record) => `contents of ${String(args.path)}`, }; const toolStep = (id: string, path: string) => makeMockStreamResult([ { type: "reasoning-delta", id: `r${id}`, text: `thinking ${id}` }, { type: "text-delta", id: `t${id}`, text: `step ${id}` }, { type: "tool-call", toolCallId: id, toolName: "read_file", input: { path } }, finishToolCalls, ]); vi.mocked(streamText).mockImplementation(() => { n++; if (n === 1) return toolStep("s0", "a.txt"); if (n === 2) return toolStep("s1", "b.txt"); if (n === 3) return toolStep("s2", "c.txt"); return makeMockStreamResult([{ type: "text-delta", id: "tf", text: "done" }, finishStop]); }); const agent = new Agent(makeConfig({ tools: [toolDef] })); for await (const _ of agent.run("go")) { /* consume */ } // 4 streamText calls (steps 0..3). Compare the step-2 request (call idx 2) // and step-3 request (call idx 3). const calls = vi.mocked(streamText).mock.calls; expect(calls.length).toBe(4); const req2 = calls[2]?.[0]?.messages as unknown[]; const req3 = calls[3]?.[0]?.messages as unknown[]; // Step-2 request = [system, user, a(s0), tool(s0), a(s1), tool(s1)] (6). // Step-3 request appends a(s2), tool(s2). The shared 6-message prefix // must be byte-identical. expect(req2).toHaveLength(6); expect(req3).toHaveLength(8); expect(JSON.stringify(req3.slice(0, 6))).toBe(JSON.stringify(req2)); // And each step really is its own [assistant, tool] pair (not one merged // assistant message with all tool calls bunched together). const roles = (req3 as Array<{ role: string }>).map((m) => m.role); expect(roles).toEqual([ "system", "user", "assistant", "tool", "assistant", "tool", "assistant", "tool", ]); }); // ─── Usage / cache-rate telemetry ────────────────────────────────────────── it("emits a usage event from the finish-step part with the cache read/write split", async () => { // The per-step `usage` (with Anthropic's cache read/write split in // `inputTokenDetails`) rides on the `finish-step` part — NOT the terminal // `finish` part, which only carries the aggregate `totalUsage`. The agent // re-emits it as a `usage` AgentEvent that powers the Cache Rate view. vi.mocked(streamText).mockReturnValue( makeMockStreamResult([ { type: "text-delta", id: "t0", text: "hi" }, { type: "finish-step", finishReason: "stop", rawFinishReason: "stop", usage: { inputTokens: 1000, outputTokens: 50, inputTokenDetails: { noCacheTokens: 200, cacheReadTokens: 750, cacheWriteTokens: 50, }, }, }, finishStop, ]), ); const agent = new Agent(makeConfig()); const events: AgentEvent[] = []; for await (const e of agent.run("hi")) { events.push(e); } const usageEvents = events.filter( (e): e is Extract => e.type === "usage", ); // Exactly one usage event (from finish-step) — the terminal `finish` // part must NOT double-count. expect(usageEvents).toHaveLength(1); expect(usageEvents[0]?.usage).toEqual({ inputTokens: 1000, outputTokens: 50, cacheReadTokens: 750, cacheWriteTokens: 50, }); }); it("does NOT emit a usage event when no finish-step usage is present", async () => { // `finishStop` (type `finish`, aggregate `totalUsage` only) must not // trigger a usage event — and with no `finish-step` part there is no // per-step usage to emit. vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "hi" }, finishStop]), ); const agent = new Agent(makeConfig()); const events: AgentEvent[] = []; for await (const e of agent.run("hi")) { events.push(e); } expect(events.some((e) => e.type === "usage")).toBe(false); }); }); describe("anthropicThinkingProviderOptions — adaptive-thinking model detection", () => { // Pure function: no provider construction, no streamText, no network I/O. // Mirrors opencode's transform.ts detection — Opus 4.7+ AND Opus/Sonnet 4.6 // are adaptive; only Opus 4.7+ needs display:"summarized" to surface thinking. it("Opus 4.8 → adaptive + display:summarized (the reported bug)", () => { expect(anthropicThinkingProviderOptions("claude-opus-4-8", "max")).toEqual({ thinking: { type: "adaptive", display: "summarized" }, effort: "max", }); }); it("Opus 4.7 → adaptive + display:summarized (dash and dot id forms)", () => { const expected = { thinking: { type: "adaptive", display: "summarized" }, effort: "high" }; expect(anthropicThinkingProviderOptions("claude-opus-4-7", "high")).toEqual(expected); expect(anthropicThinkingProviderOptions("claude-opus-4.7", "high")).toEqual(expected); }); it("Sonnet 4.6 → adaptive WITHOUT display (dash and dot id forms)", () => { const expected = { thinking: { type: "adaptive" }, effort: "medium" }; expect(anthropicThinkingProviderOptions("claude-sonnet-4-6", "medium")).toEqual(expected); expect(anthropicThinkingProviderOptions("claude-sonnet-4.6", "medium")).toEqual(expected); }); it("Opus 4.6 → adaptive WITHOUT display", () => { expect(anthropicThinkingProviderOptions("claude-opus-4-6", "high")).toEqual({ thinking: { type: "adaptive" }, effort: "high", }); }); it("older Claude (Opus 4.5, dated Sonnet) → classic enabled thinking", () => { expect(anthropicThinkingProviderOptions("claude-opus-4-5", "max")).toEqual({ thinking: { type: "enabled", budgetTokens: 31999 }, }); expect(anthropicThinkingProviderOptions("claude-sonnet-4-20250514", "high")).toEqual({ thinking: { type: "enabled", budgetTokens: 16000 }, }); }); it("uses a version parse, not a hardcoded string (future Opus 4.9 is adaptive)", () => { expect(anthropicThinkingProviderOptions("claude-opus-4-9", "high")).toEqual({ thinking: { type: "adaptive", display: "summarized" }, effort: "high", }); }); it("maps reasoning effort → budgetTokens for enabled (non-adaptive) models", () => { const budget = (e: "low" | "medium" | "high" | "xhigh" | "max") => { const opts = anthropicThinkingProviderOptions("claude-3-7-sonnet", e) as { thinking: { type: "enabled"; budgetTokens: number }; }; return opts.thinking.budgetTokens; }; expect(budget("low")).toBe(2000); expect(budget("medium")).toBe(5000); expect(budget("high")).toBe(16000); expect(budget("xhigh")).toBe(24000); expect(budget("max")).toBe(31999); }); it("xhigh budget sits strictly between high and max (ordering invariant)", () => { const budget = (e: "high" | "xhigh" | "max") => { const opts = anthropicThinkingProviderOptions("claude-3-7-sonnet", e) as { thinking: { type: "enabled"; budgetTokens: number }; }; return opts.thinking.budgetTokens; }; expect(budget("high")).toBeLessThan(budget("xhigh")); expect(budget("xhigh")).toBeLessThan(budget("max")); }); it("forwards xhigh verbatim as the adaptive effort sibling (Opus 4.7+)", () => { expect(anthropicThinkingProviderOptions("claude-opus-4-8", "xhigh")).toEqual({ thinking: { type: "adaptive", display: "summarized" }, effort: "xhigh", }); }); describe("multimodal user content", () => { it("emits ordered text + image parts to the model when content is provided", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); const agent = new Agent(makeConfig()); for await (const _ of agent.run("here is image A: [image]", { content: [ { type: "text", text: "here is image A: " }, { type: "attachment", mediaType: "image/png", data: "QQ==" }, ], })) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const userMsg = messages.find((m) => m.role === "user"); expect(userMsg).toBeDefined(); // Multimodal turn → content is an ordered parts array, not a string. expect(Array.isArray(userMsg?.content)).toBe(true); const parts = userMsg?.content as Array>; expect(parts[0]).toMatchObject({ type: "text", text: "here is image A: " }); expect(parts[1]).toMatchObject({ type: "image", mediaType: "image/png" }); expect(String(parts[1]?.image)).toBe("data:image/png;base64,QQ=="); }); it("emits a FilePart for a PDF attachment with its filename", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); const agent = new Agent(makeConfig()); for await (const _ of agent.run("see [pdf]", { content: [ { type: "text", text: "see " }, { type: "attachment", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" }, ], })) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const userMsg = messages.find((m) => m.role === "user"); const parts = userMsg?.content as Array>; const filePart = parts.find((p) => p.type === "file"); expect(filePart).toMatchObject({ type: "file", mediaType: "application/pdf", filename: "doc.pdf", }); expect(String(filePart?.data)).toBe("data:application/pdf;base64,QQ=="); }); it("persists the user turn as text only (no content) for history", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); const agent = new Agent(makeConfig()); for await (const _ of agent.run("look: [image]", { content: [ { type: "text", text: "look: " }, { type: "attachment", mediaType: "image/png", data: "QQ==" }, ], })) { // consume } // The in-memory user message keeps the text chunk for the render/persist // path; the ephemeral `content` rides alongside it but isn't a chunk. const userMsg = agent.messages.find((m) => m.role === "user"); expect(userMsg?.chunks).toEqual([{ type: "text", text: "look: [image]" }]); }); it("falls back to a plain string when content has no attachment", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), ); const agent = new Agent(makeConfig()); for await (const _ of agent.run("plain text", { content: [{ type: "text", text: "plain text" }], })) { // consume } const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; const userMsg = messages.find((m) => m.role === "user"); // No attachment → plain string content (byte-identical to text-only path). expect(typeof userMsg?.content).toBe("string"); expect(userMsg?.content).toBe("plain text"); }); }); describe("warmCache (prompt-cache warming replay)", () => { function makeWarmStream(usage: { inputTokens: number; cacheReadTokens: number; cacheWriteTokens: number; }) { return makeMockStreamResult([ { type: "text-delta", id: "t0", text: "." }, { type: "finish-step", finishReason: "stop", rawFinishReason: "stop", usage: { inputTokens: usage.inputTokens, outputTokens: 1, inputTokenDetails: { noCacheTokens: usage.inputTokens - usage.cacheReadTokens - usage.cacheWriteTokens, cacheReadTokens: usage.cacheReadTokens, cacheWriteTokens: usage.cacheWriteTokens, }, }, }, finishStop, ]); } const history = [ { role: "user" as const, chunks: [{ type: "text" as const, text: "hello" }] }, { role: "assistant" as const, chunks: [{ type: "text" as const, text: "hi there" }] }, ]; it("returns the request usage (cache read/write split) without throwing", async () => { vi.mocked(streamText).mockReturnValue( makeWarmStream({ inputTokens: 1000, cacheReadTokens: 950, cacheWriteTokens: 0 }), ); const agent = new Agent(makeConfig({ provider: "anthropic" })); const usage = await agent.warmCache(history); expect(usage).toEqual({ inputTokens: 1000, outputTokens: 1, cacheReadTokens: 950, cacheWriteTokens: 0, }); }); it("appends a single trivial throwaway user turn at the END of the history", async () => { vi.mocked(streamText).mockReturnValue( makeWarmStream({ inputTokens: 10, cacheReadTokens: 5, cacheWriteTokens: 0 }), ); const agent = new Agent(makeConfig({ provider: "anthropic" })); await agent.warmCache(history); const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; // system + 2 history messages + 1 throwaway user turn. expect(messages[0]?.role).toBe("system"); const last = messages.at(-1); expect(last?.role).toBe("user"); // The throwaway turn's text must be the trivial probe. const lastText = JSON.stringify(last?.content); expect(lastText).toContain("reply with just a ."); // Exactly one extra user turn beyond the genuine history's single user msg. const userMsgs = messages.filter((m) => m.role === "user"); expect(userMsgs).toHaveLength(2); }); it("sends Anthropic cache_control breakpoints with the SAME toolChoice/thinking as a real turn", async () => { // Anthropic keys the MESSAGE cache on `tool_choice` AND the extended- // thinking parameters. If warming sent a different value than a real // turn, it would warm a DIFFERENT message-cache bucket and the user's // next real message would still miss. So warming MUST mirror run(): // toolChoice "auto" + the thinking providerOptions for the effort. vi.mocked(streamText).mockReturnValue( makeWarmStream({ inputTokens: 10, cacheReadTokens: 5, cacheWriteTokens: 0 }), ); const agent = new Agent(makeConfig({ provider: "anthropic" })); await agent.warmCache(history); const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; expect(callArgs?.toolChoice).toBe("auto"); // Thinking providerOptions present (effort defaults to "max"). expect(callArgs?.providerOptions?.anthropic).toBeDefined(); const messages = callArgs?.messages as Array<{ role: string; providerOptions?: { anthropic?: { cacheControl?: unknown } }; }>; const hasBreakpoint = messages.some( (m) => m.providerOptions?.anthropic?.cacheControl !== undefined, ); expect(hasBreakpoint).toBe(true); }); it("warming and a real turn send IDENTICAL cache-affecting params (same bucket)", async () => { // The core invariant of the whole feature: warmCache() and run() must // produce the same toolChoice + thinking providerOptions + maxOutputTokens // so the warming replay refreshes the EXACT cache the next real message // reads. Drive both and compare the cache-key inputs streamText receives. const cfg = makeConfig({ provider: "anthropic" }); // 1) Real turn for the same history + the probe text as the user msg. const realAgent = new Agent(cfg); realAgent.messages.push(...history.map((m) => ({ ...m }))); vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "text-delta", id: "t0", text: "." }, finishStop]), ); for await (const _ of realAgent.run("reply with just a .")) { // consume } const realArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; // 2) Warming replay for the same history. const warmAgent = new Agent(cfg); vi.mocked(streamText).mockReturnValue( makeWarmStream({ inputTokens: 10, cacheReadTokens: 5, cacheWriteTokens: 0 }), ); await warmAgent.warmCache(history); const warmArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; // The cache-affecting parameters must be byte-identical. expect(warmArgs?.toolChoice).toEqual(realArgs?.toolChoice); expect(warmArgs?.maxOutputTokens).toEqual(realArgs?.maxOutputTokens); expect(warmArgs?.providerOptions).toEqual(realArgs?.providerOptions); }); it("does NOT mutate the agent's own message history", async () => { vi.mocked(streamText).mockReturnValue( makeWarmStream({ inputTokens: 10, cacheReadTokens: 5, cacheWriteTokens: 0 }), ); const agent = new Agent(makeConfig({ provider: "anthropic" })); expect(agent.messages).toHaveLength(0); await agent.warmCache(history); // warmCache takes history as an argument and never touches `this.messages`. expect(agent.messages).toHaveLength(0); // And it must not have flipped the agent into a running state. expect(agent.status).toBe("idle"); }); it("throws a formatted error when the stream errors", async () => { vi.mocked(streamText).mockReturnValue( makeMockStreamResult([{ type: "error", error: new Error("boom") }]), ); const agent = new Agent(makeConfig({ provider: "anthropic" })); await expect(agent.warmCache(history)).rejects.toThrow(/boom/); }); }); });