fix: prompt caching, OpenCode Go MiniMax/Qwen support, Opus 4.7 thinking, SDK compat

- Implement Anthropic prompt caching: first system message + last 2 non-system messages get cache_control: ephemeral, mirroring OpenCode's applyCaching strategy. Move system prompt inline into messages array so providerOptions can attach. - Add opencode-anthropic provider variant routing MiniMax/Qwen models through the /messages endpoint with x-api-key auth, distinct from the Claude OAuth flow's Bearer auth and Claude Code mimicry. - Split isAnthropic into isClaudeOAuth (billing header, mcp_ tool prefix, thinking config) and usesAnthropicSDK (cache markers) so non-OAuth Anthropic-format gateways get the right treatment. - Pin @ai-sdk/anthropic to ^1.2.12: v3 returns LanguageModelV3-spec models that ai v4's streamText rejects at runtime ('AI SDK 4 only supports models that implement specification version v1'). Drop unnecessary V1 casts. - Restore Opus 4.7 extended thinking by rewriting the outgoing /messages body in the Claude OAuth fetch interceptor: inject thinking: { type: 'adaptive' } (v1 SDK can't emit it), strip temperature/top_p/top_k (Anthropic rejects them with thinking enabled). Gated on max_tokens > 4096 so effort=none still works. - Bump MAX_STEPS from 10 to 50 to align with AI SDK's stepCountIs(20) default and reduce mid-task halts. - Fix pre-existing typecheck errors in agent-manager.ts (entry/nextEntry narrowing), app.ts (agentModels body field), KeyUsage.svelte (m guards), and a TS2742 in provider.ts via explicit ModelFactory return type. - buildFallbackSequence now always returns at least one entry so processMessage runs the agent loop even without keyId/modelId (fixes 4 broken agent-manager tests).
author: Adam Malczewski <[email protected]> 2026-05-24 13:24:04 +0900
committer: Adam Malczewski <[email protected]> 2026-05-24 13:24:04 +0900
commit: 399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch)
tree: d67f18f5cca91a66e3146cbd2f48920571768e23
parent: 997b00034435440d412f955e05e53f09bae83f9e (diff)
download: dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz
dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip
7 files changed, 230 insertions, 76 deletions
diff --git a/bun.lock b/bun.lock
index 3bcf51b..92f730c 100644
--- a/bun.lock
+++ b/bun.lock
@@ -25,7 +25,7 @@
       "name": "@dispatch/core",
       "version": "0.0.1",
       "dependencies": {
-        "@ai-sdk/anthropic": "^3.0.0",
+        "@ai-sdk/anthropic": "^1.2.12",
         "@ai-sdk/openai-compatible": "^0.2.0",
         "ai": "^4.0.0",
         "chokidar": "^5.0.0",
@@ -64,13 +64,13 @@
   "packages": {
     "7zip-bin": ["[email protected]", "", {}, "sha512-ukTPVhqG4jNzMro2qA9HSCSSVJN3aN7tlb+hfqYCt3ER0yWroeA2VR38MNrOHLQ/cVj+DaIMad0kFCtWWowh/A=="],
 
-    "@ai-sdk/anthropic": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-saEX+h5JDOkT9P/+REKDyikbnJiToFuLipgNcsmu4Zr3GW5kW1m9HhvrPK+vj63itIOsoZU6tmVIjkrePOlIUA=="],
+    "@ai-sdk/anthropic": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-YSzjlko7JvuiyQFmI9RN1tNZdEiZxc+6xld/0tq/VkJaHpEzGAb1yiNxxvmYVcjvfu/PcvCxAAYXmTYQQ63IHQ=="],
 
     "@ai-sdk/openai-compatible": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-LkvfcM8slJedRyJa/MiMiaOzcMjV1zNDwzTHEGz7aAsgsQV0maLfmJRi/nuSwf5jmp0EouC+JXXDUj2l94HgQw=="],
 
-    "@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="],
+    "@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
 
-    "@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="],
+    "@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
 
     "@ai-sdk/react": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider-utils": "2.2.8", "@ai-sdk/ui-utils": "1.2.11", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "zod": "^3.23.8" }, "optionalPeers": ["zod"] }, "sha512-jK1IZZ22evPZoQW3vlkZ7wvjYGYF+tRBKXtrcolduIkQ/m/sOAVcVeVDUDvh1T91xCnWCdUGCPZg2avZ90mv3g=="],
 
@@ -240,8 +240,6 @@
 
     "@sindresorhus/is": ["@sindresorhus/[email protected]", "", {}, "sha512-t09vSN3MdfsyCHoFcTRCH/iUtG7OJ0CsjzB8cjAmKc/va/kIgeDI/TxsigdncE/4be734m0cvIYwNaV4i2XqAw=="],
 
-    "@standard-schema/spec": ["@standard-schema/[email protected]", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
-
     "@sveltejs/acorn-typescript": ["@sveltejs/[email protected]", "", { "peerDependencies": { "acorn": "^8.9.0" } }, "sha512-lVJX6qEgs/4DOcRTpo56tmKzVPtoWAaVbL4hfO7t7NVwl9AAXzQR6cihesW1BmNMPl+bK6dreu2sOKBP2Q9CIA=="],
 
     "@sveltejs/vite-plugin-svelte": ["@sveltejs/[email protected]", "", { "dependencies": { "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "debug": "^4.4.1", "deepmerge": "^4.3.1", "kleur": "^4.1.5", "magic-string": "^0.30.17", "vitefu": "^1.0.6" }, "peerDependencies": { "svelte": "^5.0.0", "vite": "^6.0.0" } }, "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ=="],
@@ -530,8 +528,6 @@
 
     "estree-walker": ["[email protected]", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="],
 
-    "eventsource-parser": ["[email protected]", "", {}, "sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ=="],
-
     "expect-type": ["[email protected]", "", {}, "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA=="],
 
     "exponential-backoff": ["[email protected]", "", {}, "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA=="],
@@ -950,16 +946,6 @@
 
     "zod-to-json-schema": ["[email protected]", "", { "peerDependencies": { "zod": "^3.25.28 || ^4" } }, "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA=="],
 
-    "@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
-    "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
-    "@ai-sdk/react/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
-    "@ai-sdk/ui-utils/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
-    "@ai-sdk/ui-utils/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
     "@electron/asar/minimatch": ["[email protected]", "", { "dependencies": { "brace-expansion": "^1.1.7" } }, "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w=="],
 
     "@electron/fuses/fs-extra": ["[email protected]", "", { "dependencies": { "at-least-node": "^1.0.0", "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", "universalify": "^2.0.0" } }, "sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ=="],
@@ -990,10 +976,6 @@
 
     "@tailwindcss/oxide-wasm32-wasi/tslib": ["[email protected]", "", { "bundled": true }, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
 
-    "ai/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
-    "ai/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
     "app-builder-lib/@electron/get": ["@electron/[email protected]", "", { "dependencies": { "debug": "^4.1.1", "env-paths": "^2.2.0", "fs-extra": "^8.1.0", "got": "^11.8.5", "progress": "^2.0.3", "semver": "^6.2.0", "sumchecker": "^3.0.1" }, "optionalDependencies": { "global-agent": "^3.0.0" } }, "sha512-F+nKc0xW+kVbBRhFzaMgPy3KwmuNTYX1fx6+FxxoSnNgwYX6LD7AKBTWkU0MQ6IBoe7dz069CNkR673sPAgkCQ=="],
 
     "app-builder-lib/ci-info": ["[email protected]", "", {}, "sha512-Wdy2Igu8OcBpI2pZePZ5oWjPC38tmDVx5WKUXKwlLYkA0ozo85sLsLvkBbBn/sZaSCMFOGZJ14fvW9t5/d7kdA=="],
@@ -1038,8 +1020,6 @@
 
     "tiny-async-pool/semver": ["[email protected]", "", { "bin": { "semver": "bin/semver" } }, "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g=="],
 
-    "@ai-sdk/react/@ai-sdk/provider-utils/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
     "@electron/asar/minimatch/brace-expansion": ["[email protected]", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g=="],
 
     "@electron/get/fs-extra/jsonfile": ["[email protected]", "", { "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg=="],
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 1a28371..73b65f5 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -98,6 +98,15 @@ Good approach:
 3. Work through each file sequentially
 `.trim();
 
+/**
+ * Returns true for OpenCode Go models served via the Anthropic-format
+ * `/messages` endpoint (MiniMax M2.x, Qwen3.x Plus). See
+ * https://opencode.ai/docs/go/#endpoints for the per-model endpoint table.
+ */
+function isOpencodeGoAnthropicModel(modelId: string): boolean {
+	return modelId.startsWith("minimax-") || modelId.startsWith("qwen");
+}
+
 function buildSystemPrompt(toolNames: string[], basePrompt?: string): string {
 	const base = basePrompt || DEFAULT_SYSTEM_PROMPT;
 	const toolList = toolNames
@@ -553,6 +562,16 @@ export class AgentManager {
 							apiKey = envKey;
 							baseURL = key.base_url;
 							model = effectiveModelId;
+							// OpenCode Go splits its catalog across two endpoints:
+							//   `/chat/completions` — GLM, Kimi, DeepSeek, MiMo (OpenAI-compatible)
+							//   `/messages`        — MiniMax, Qwen (Anthropic-format)
+							// The configured key has provider="opencode-go" which defaults to
+							// the OpenAI-compatible path. When the selected model lives on the
+							// `/messages` route, route through the API-key Anthropic provider
+							// instead so the SDK targets the correct endpoint and protocol.
+							if (key.provider === "opencode-go" && isOpencodeGoAnthropicModel(model)) {
+								provider = "opencode-anthropic";
+							}
 							tabAgent.keyId = effectiveKeyId;
 							tabAgent.modelId = effectiveModelId;
 							useOverride = true;
@@ -847,8 +866,12 @@ export class AgentManager {
 
 		for (let fallbackIdx = 0; fallbackIdx < maxFallbackAttempts; fallbackIdx++) {
 			const entry = fallbackSequence[fallbackIdx];
-			currentKeyId = entry.key_id;
-			currentModelId = entry.model_id;
+			if (!entry) break; // unreachable: loop bound guarantees defined, satisfies TS
+			// Convert empty strings (used when caller omitted keyId/modelId in
+			// manual mode) to undefined so `getOrCreateAgentForTab` falls back
+			// to the tabAgent's stored defaults via the `?? tabAgent.keyId` chain.
+			currentKeyId = entry.key_id || undefined;
+			currentModelId = entry.model_id || undefined;
 			allOutput = "";
 			let assistantText = "";
 			let assistantThinking = "";
@@ -977,8 +1000,8 @@ export class AgentManager {
 
 				// Try the next entry in the agent's fallback sequence
 				const nextIdx = fallbackIdx + 1;
-				if (nextIdx < maxFallbackAttempts) {
-					const nextEntry = fallbackSequence[nextIdx];
+				const nextEntry = fallbackSequence[nextIdx];
+				if (nextIdx < maxFallbackAttempts && nextEntry) {
 					const fallbackMsg =
 						`Key "${tabAgent.keyId}" rate limited. ` +
 						`Falling back to "${nextEntry.key_id}" (model: ${nextEntry.model_id})...`;
@@ -1021,9 +1044,11 @@ export class AgentManager {
 			const startIdx = models.findIndex((m) => m.key_id === keyId && m.model_id === modelId);
 			return startIdx >= 0 ? models.slice(startIdx) : models;
 		}
-		// Manual mode: no fallback — just the selected key/model pair
-		if (keyId && modelId) return [{ key_id: keyId, model_id: modelId }];
-		return [];
+		// Manual mode: no fallback — just the selected key/model pair.
+		// Always return at least one entry so `processMessage` runs the agent
+		// once (empty strings let `getOrCreateAgentForTab` fall back to the
+		// tabAgent's stored defaults or environment-driven config).
+		return [{ key_id: keyId ?? "", model_id: modelId ?? "" }];
 	}
 
 	queueMessage(tabId: string, message: string, clientId?: string): { messageId: string } {
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index d7dd0be..ba5dabd 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -41,6 +41,7 @@ app.post("/chat", async (c) => {
 		message?: unknown;
 		keyId?: unknown;
 		modelId?: unknown;
+		agentModels?: unknown;
 		reasoningEffort?: unknown;
 		workingDirectory?: unknown;
 		queueId?: unknown;
diff --git a/packages/core/package.json b/packages/core/package.json
index 6f88398..6345ac3 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -11,7 +11,7 @@
 		"typecheck": "tsc --noEmit"
 	},
 	"dependencies": {
-		"@ai-sdk/anthropic": "^3.0.0",
+		"@ai-sdk/anthropic": "^1.2.12",
 		"@ai-sdk/openai-compatible": "^0.2.0",
 		"ai": "^4.0.0",
 		"chokidar": "^5.0.0",
diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts
index 24de59d..3cd8a5b 100644
--- a/packages/core/src/agent/agent.ts
+++ b/packages/core/src/agent/agent.ts
@@ -1,6 +1,6 @@
 import { realpathSync } from "node:fs";
 import { dirname, isAbsolute, relative, resolve } from "node:path";
-import type { CoreMessage, LanguageModelV1 } from "ai";
+import type { CoreMessage, CoreSystemMessage } from "ai";
 import { streamText } from "ai";
 import { buildBillingHeaderValue, SYSTEM_IDENTITY } from "../credentials/claude.js";
 import { createProvider, prefixToolName, unprefixToolName } from "../llm/provider.js";
@@ -16,7 +16,7 @@ import type {
 	ToolResult,
 } from "../types/index.js";
 
-function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMessage[] {
+function toCoreMessages(messages: ChatMessage[], useToolPrefix?: boolean): CoreMessage[] {
 	const result: CoreMessage[] = [];
 	for (const msg of messages) {
 		if (msg.role === "user") {
@@ -27,12 +27,12 @@ function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMes
 				| { type: "tool-call"; toolCallId: string; toolName: string; args: Record<string, unknown> }
 			> = [{ type: "text", text: msg.content }];
 			for (const tc of msg.toolCalls ?? []) {
-				const toolName = isAnthropic ? prefixToolName(tc.name) : tc.name;
+				const toolName = useToolPrefix ? prefixToolName(tc.name) : tc.name;
 				parts.push({ type: "tool-call", toolCallId: tc.id, toolName, args: tc.arguments });
 			}
 			result.push({ role: "assistant", content: parts });
 			for (const tr of msg.toolResults ?? []) {
-				const toolName = isAnthropic ? prefixToolName(tr.toolName) : tr.toolName;
+				const toolName = useToolPrefix ? prefixToolName(tr.toolName) : tr.toolName;
 				result.push({
 					role: "tool",
 					content: [
@@ -45,6 +45,47 @@ function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMes
 	return result;
 }
 
+/**
+ * Apply Anthropic prompt-caching breakpoints to a message list.
+ *
+ * Anthropic caches the entire request prefix up to (and including) any block
+ * marked with `cache_control`. Up to 4 breakpoints per request; we use three
+ * (first system + last 2 non-system).
+ *
+ * Strategy (mirrors OpenCode's `applyCaching` in transform.ts):
+ *  - Mark the first system message → caches system prompt (and tools, which
+ *    sit before messages in the request body).
+ *  - Mark the last 2 non-system messages → rolling cache that extends through
+ *    the conversation each turn.
+ *
+ * Only applied for the Anthropic provider. OpenCode Zen's OpenAI-compatible
+ * endpoint (`/zen/v1/chat/completions`) backs models like MiniMax, GLM, Kimi,
+ * Grok, etc. — those upstreams do automatic prefix caching server-side and
+ * don't accept `cache_control` markers. OpenCode's own transform.ts gates
+ * `applyCaching` on Anthropic-family detection for the same reason. Models
+ * served via `@ai-sdk/openai` (GPT) and `@ai-sdk/google` (Gemini) likewise
+ * use server-side automatic caching.
+ */
+function applyAnthropicCaching(msgs: CoreMessage[]): void {
+	const targets = new Set<CoreMessage>();
+
+	const systemMsgs = msgs.filter((m) => m.role === "system").slice(0, 2);
+	for (const m of systemMsgs) targets.add(m);
+
+	const nonSystem = msgs.filter((m) => m.role !== "system").slice(-2);
+	for (const m of nonSystem) targets.add(m);
+
+	for (const msg of targets) {
+		msg.providerOptions = {
+			...msg.providerOptions,
+			anthropic: {
+				...(msg.providerOptions?.anthropic ?? {}),
+				cacheControl: { type: "ephemeral" },
+			},
+		};
+	}
+}
+
 function formatError(err: unknown, config: AgentConfig): string {
 	const context = `[model=${config.model}, baseURL=${config.baseURL}]`;
 
@@ -66,7 +107,7 @@ function formatError(err: unknown, config: AgentConfig): string {
 	return `${String(err)} ${context}`;
 }
 
-const MAX_STEPS = 10;
+const MAX_STEPS = 50;
 
 export class Agent {
 	status: AgentStatus = "idle";
@@ -223,7 +264,15 @@ export class Agent {
 		this.messages.push({ role: "user", content: userMessage });
 
 		const registry = createToolRegistry(this.config.tools);
-		const isAnthropic = this.config.provider === "anthropic";
+		// `isClaudeOAuth` gates Claude-Code-CLI-specific behavior: billing-header
+		// injection, identity preamble, `mcp_*` tool name prefix, and extended
+		// thinking config. Only the OAuth flow (provider="anthropic") needs these.
+		// `usesAnthropicSDK` is the broader category — any provider whose
+		// requests are serialized by `@ai-sdk/anthropic` and therefore expect
+		// Anthropic-style `cache_control` markers. Today that's Claude OAuth
+		// plus OpenCode Go's MiniMax/Qwen routes.
+		const isClaudeOAuth = this.config.provider === "anthropic";
+		const usesAnthropicSDK = isClaudeOAuth || this.config.provider === "opencode-anthropic";
 		const providerFactory = createProvider({
 			apiKey: this.config.apiKey,
 			baseURL: this.config.baseURL,
@@ -231,17 +280,21 @@ export class Agent {
 			claudeCredentials: this.config.claudeCredentials,
 		});
 
-		// For Anthropic provider, prefix tool names and build full system prompt
+		// Only the Claude OAuth flow expects `mcp_*` prefixed tool names. The
+		// OpenCode Go anthropic-format endpoint passes tools through to MiniMax
+		// or Qwen, which expect raw names.
 		const aiTools = registry.getAISDKTools();
-		const tools = isAnthropic
+		const tools = isClaudeOAuth
 			? Object.fromEntries(
 					Object.entries(aiTools).map(([name, tool]) => [prefixToolName(name), tool]),
 				)
 			: aiTools;
 
-		// Build system prompt
+		// Build system prompt — Claude OAuth requests embed a billing header
+		// and the Claude Code identity preamble so Anthropic recognizes the
+		// request as coming from the official CLI.
 		let systemPrompt = this.config.systemPrompt;
-		if (isAnthropic) {
+		if (isClaudeOAuth) {
 			const billingHeader = buildBillingHeaderValue(this.messages);
 			systemPrompt = `${billingHeader}\n${SYSTEM_IDENTITY}\n\n${systemPrompt}`;
 		}
@@ -260,41 +313,58 @@ export class Agent {
 				const effort = options?.reasoningEffort ?? this.config.reasoningEffort ?? "max";
 
 				// Build stream text options
-				const rawModel = providerFactory(this.config.model);
-				const model = rawModel as unknown as LanguageModelV1;
+				const model = providerFactory(this.config.model);
+
+				// Build the message list with the system prompt prepended as a system
+				// role message. This is required for Anthropic prompt caching: the
+				// `system` shortcut parameter takes a plain string with nowhere to
+				// attach `providerOptions.anthropic.cacheControl`. Moving it inline
+				// also lets us apply rolling cache breakpoints to the last messages.
+				const systemMessage: CoreSystemMessage = { role: "system", content: systemPrompt };
+				const coreMessages: CoreMessage[] = [
+					systemMessage,
+					...toCoreMessages(stepMessages, isClaudeOAuth),
+				];
+
+				if (usesAnthropicSDK) {
+					applyAnthropicCaching(coreMessages);
+				}
+
 				const streamOptions: Parameters<typeof streamText>[0] = {
 					model,
-					system: systemPrompt,
-					messages: toCoreMessages(stepMessages, isAnthropic),
+					messages: coreMessages,
 					tools,
 				};
 
-				if (isAnthropic && effort !== "none") {
-					const modelId = this.config.model;
-					const isOpus47 = modelId === "claude-opus-4-7";
-
+				if (isClaudeOAuth && effort !== "none") {
+					// Opus 4.7 rejects `thinking: { type: "enabled" }` ("reasoning-
+					// signature without reasoning") and only supports adaptive thinking.
+					// `@ai-sdk/anthropic` v1.x can't emit `type: "adaptive"`, so we
+					// leave `providerOptions.anthropic.thinking` unset and let the
+					// custom fetch in `createClaudeOAuthProvider` inject the adaptive
+					// shape into the request body. We still set `maxTokens` here so
+					// the SDK serializes it — adaptive thinking spends from this
+					// budget rather than a separate one.
+					const isOpus47 = this.config.model === "claude-opus-4-7";
+					const budgetTokens =
+						effort === "max"
+							? 16000
+							: effort === "high"
+								? 10000
+								: effort === "medium"
+									? 5000
+									: effort === "low"
+										? 2000
+										: 0;
 					if (isOpus47) {
-						// Opus 4.7 only supports adaptive thinking
-						streamOptions.providerOptions = {
-							anthropic: { thinking: { type: "adaptive" as const } },
-						};
+						streamOptions.maxTokens = budgetTokens + 8000;
 					} else {
-						const budgetTokens =
-							effort === "max"
-								? 16000
-								: effort === "high"
-									? 10000
-									: effort === "medium"
-										? 5000
-										: effort === "low"
-											? 2000
-											: 0;
 						streamOptions.providerOptions = {
 							anthropic: { thinking: { type: "enabled" as const, budgetTokens } },
 						};
 						streamOptions.maxTokens = budgetTokens + 8000;
 					}
-				} else if (!isAnthropic && effort !== "none") {
+				} else if (!usesAnthropicSDK && effort !== "none") {
 					streamOptions.providerOptions = { openaiCompatible: { reasoningEffort: effort } };
 				}
 
@@ -313,7 +383,7 @@ export class Agent {
 							yield { type: "reasoning-delta", delta: event.textDelta };
 						} else if (event.type === "tool-call") {
 							const rawName = event.toolName;
-							const toolName = isAnthropic ? unprefixToolName(rawName) : rawName;
+							const toolName = isClaudeOAuth ? unprefixToolName(rawName) : rawName;
 							const toolCall: ToolCall = {
 								id: event.toolCallId,
 								name: toolName,
diff --git a/packages/core/src/llm/provider.ts b/packages/core/src/llm/provider.ts
index 7cbb829..a7d800c 100644
--- a/packages/core/src/llm/provider.ts
+++ b/packages/core/src/llm/provider.ts
@@ -1,6 +1,6 @@
 import { createAnthropic } from "@ai-sdk/anthropic";
 import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
-import type { LanguageModelV1Middleware, LanguageModelV1Prompt } from "ai";
+import type { LanguageModelV1, LanguageModelV1Middleware, LanguageModelV1Prompt } from "ai";
 import { wrapLanguageModel } from "ai";
 
 function normalizeMessages(msgs: unknown[]): unknown[] {
@@ -61,9 +61,19 @@ function unprefixToolName(name: string): string {
 	return name;
 }
 
-export function createProvider(config: ProviderConfig) {
+// Explicit factory return type so the inferred type doesn't leak references
+// into transitive `@ai-sdk/provider` paths (which would trip TS2742).
+// `@ai-sdk/anthropic` v1.x already returns `LanguageModelV1`-spec models;
+// `@ai-sdk/openai-compatible` v0.2.x and `wrapLanguageModel` likewise.
+export type ModelFactory = (modelId: string) => LanguageModelV1;
+
+export function createProvider(config: ProviderConfig): ModelFactory {
 	if (config.provider === "anthropic") {
-		return createAnthropicProvider(config);
+		return createClaudeOAuthProvider(config);
+	}
+
+	if (config.provider === "opencode-anthropic") {
+		return createApiKeyAnthropicProvider(config);
 	}
 
 	// Default: OpenAI-compatible provider
@@ -94,7 +104,21 @@ export function createProvider(config: ProviderConfig) {
 	};
 }
 
-function createAnthropicProvider(config: ProviderConfig) {
+/**
+ * Claude OAuth provider. Used by Dispatch's `anthropic` provider keys
+ * (claude-pro, claude-max). Swaps `x-api-key` for `Authorization: Bearer`
+ * to satisfy Anthropic's OAuth flow, and mimics Claude Code CLI request
+ * headers so the request bills against the user's Claude subscription.
+ *
+ * The custom fetch also rewrites the outgoing JSON body for Claude Opus 4.7:
+ * that model rejects `thinking: { type: "enabled", budget_tokens }` (the only
+ * shape `@ai-sdk/anthropic` v1.x can emit) with "reasoning-signature without
+ * reasoning", and instead requires `thinking: { type: "adaptive" }`. `ai` v4
+ * is pinned to V1-spec providers, so we can't upgrade to v3 of the Anthropic
+ * SDK without breaking everything. Doing the rewrite here keeps the rest of
+ * the agent path SDK-agnostic and limits the special case to one model.
+ */
+function createClaudeOAuthProvider(config: ProviderConfig): ModelFactory {
 	const accessToken = config.claudeCredentials?.accessToken ?? config.apiKey;
 
 	const customFetch = Object.assign(
@@ -102,7 +126,9 @@ function createAnthropicProvider(config: ProviderConfig) {
 			const headers = new Headers(init?.headers);
 			headers.delete("x-api-key");
 			headers.set("authorization", `Bearer ${accessToken}`);
-			return globalThis.fetch(url, { ...init, headers });
+
+			const body = rewriteBodyForOpus47(init?.body);
+			return globalThis.fetch(url, { ...init, headers, body });
 		},
 		{ preconnect: globalThis.fetch.preconnect?.bind(globalThis.fetch) },
 	);
@@ -118,9 +144,61 @@ function createAnthropicProvider(config: ProviderConfig) {
 		fetch: customFetch as typeof globalThis.fetch,
 	});
 
-	return (modelId: string) => {
-		return anthropic(modelId);
-	};
+	return (modelId: string) => anthropic(modelId);
+}
+
+/**
+ * If the request body is a JSON `/messages` payload targeting Claude Opus 4.7
+ * and the caller signaled they want thinking (by setting `max_tokens` above
+ * Anthropic's default 4096), insert `thinking: { type: "adaptive" }`.
+ *
+ * Skipping the rewrite when `max_tokens` is small (or absent) keeps `effort:
+ * "none"` requests as plain non-thinking calls — agent.ts only sets a high
+ * `max_tokens` when thinking is wanted, so this acts as a clean signal.
+ *
+ * Returns the body unchanged for any other model, any non-string body, or any
+ * payload that fails to parse, leaving non-Anthropic providers, non-Opus-4.7
+ * Claude models, and streaming/binary uploads unaffected.
+ */
+function rewriteBodyForOpus47(body: BodyInit | null | undefined): BodyInit | null | undefined {
+	if (typeof body !== "string") return body;
+	let parsed: Record<string, unknown>;
+	try {
+		parsed = JSON.parse(body) as Record<string, unknown>;
+	} catch {
+		return body;
+	}
+	if (parsed.model !== "claude-opus-4-7") return body;
+	const maxTokens = typeof parsed.max_tokens === "number" ? parsed.max_tokens : 0;
+	if (maxTokens <= 4096) return body;
+	parsed.thinking = { type: "adaptive" };
+	// Anthropic rejects requests that combine extended thinking (enabled or
+	// adaptive) with any temperature other than 1. `ai` v4 defaults
+	// `temperature: 0`, and the v1 Anthropic SDK normally strips it when its
+	// own `isThinking` flag is set — but we're injecting `thinking` here,
+	// behind the SDK's back, so we have to strip it ourselves. Same for
+	// `top_p` and `top_k`, which are likewise rejected with thinking.
+	delete parsed.temperature;
+	delete parsed.top_p;
+	delete parsed.top_k;
+	return JSON.stringify(parsed);
+}
+
+/**
+ * Plain-API-key Anthropic-format provider. Used to hit gateways that speak
+ * Anthropic's `/messages` protocol with a standard `x-api-key` header — most
+ * importantly OpenCode Go's MiniMax and Qwen routes. Unlike the Claude OAuth
+ * variant, no `claudeCredentials` are present, no Claude Code mimicry headers
+ * are sent, and the API key is passed verbatim through the SDK's default
+ * authentication path.
+ */
+function createApiKeyAnthropicProvider(config: ProviderConfig): ModelFactory {
+	const anthropic = createAnthropic({
+		apiKey: config.apiKey,
+		baseURL: config.baseURL || "https://opencode.ai/zen/go/v1",
+	});
+
+	return (modelId: string) => anthropic(modelId);
 }
 
 export { prefixToolName, unprefixToolName };
diff --git a/packages/frontend/src/lib/components/KeyUsage.svelte b/packages/frontend/src/lib/components/KeyUsage.svelte
index 96a4b08..00d179e 100644
--- a/packages/frontend/src/lib/components/KeyUsage.svelte
+++ b/packages/frontend/src/lib/components/KeyUsage.svelte
@@ -440,13 +440,13 @@ function hasBucketData(bucket: UsageBucket | undefined): boolean {
 									<span class="text-xs text-base-content/50">Models</span>
 									<span class="text-xs font-mono">{entry.data.models.length} available</span>
 								</div>
-								{#if m.rpm > 0}
+								{#if m && m.rpm > 0}
 									<div class="flex items-center justify-between">
 										<span class="text-xs text-base-content/50">RPM</span>
 										<span class="text-xs font-mono">{m.rpm}</span>
 									</div>
 								{/if}
-								{#if m.requestsPerDay > 0}
+								{#if m && m.requestsPerDay > 0}
 									<div class="flex items-center justify-between">
 										<span class="text-xs text-base-content/50">RPD</span>
 										<span class="text-xs font-mono">{m.requestsPerDay.toLocaleString()}</span>
author	Adam Malczewski <[email protected]>	2026-05-24 13:24:04 +0900
committer	Adam Malczewski <[email protected]>	2026-05-24 13:24:04 +0900
commit	399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch)
tree	d67f18f5cca91a66e3146cbd2f48920571768e23
parent	997b00034435440d412f955e05e53f09bae83f9e (diff)
download	dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip