summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-05-24 13:24:04 +0900
committerAdam Malczewski <[email protected]>2026-05-24 13:24:04 +0900
commit399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch)
treed67f18f5cca91a66e3146cbd2f48920571768e23
parent997b00034435440d412f955e05e53f09bae83f9e (diff)
downloaddispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz
dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip
fix: prompt caching, OpenCode Go MiniMax/Qwen support, Opus 4.7 thinking, SDK compat
- Implement Anthropic prompt caching: first system message + last 2 non-system messages get cache_control: ephemeral, mirroring OpenCode's applyCaching strategy. Move system prompt inline into messages array so providerOptions can attach. - Add opencode-anthropic provider variant routing MiniMax/Qwen models through the /messages endpoint with x-api-key auth, distinct from the Claude OAuth flow's Bearer auth and Claude Code mimicry. - Split isAnthropic into isClaudeOAuth (billing header, mcp_ tool prefix, thinking config) and usesAnthropicSDK (cache markers) so non-OAuth Anthropic-format gateways get the right treatment. - Pin @ai-sdk/anthropic to ^1.2.12: v3 returns LanguageModelV3-spec models that ai v4's streamText rejects at runtime ('AI SDK 4 only supports models that implement specification version v1'). Drop unnecessary V1 casts. - Restore Opus 4.7 extended thinking by rewriting the outgoing /messages body in the Claude OAuth fetch interceptor: inject thinking: { type: 'adaptive' } (v1 SDK can't emit it), strip temperature/top_p/top_k (Anthropic rejects them with thinking enabled). Gated on max_tokens > 4096 so effort=none still works. - Bump MAX_STEPS from 10 to 50 to align with AI SDK's stepCountIs(20) default and reduce mid-task halts. - Fix pre-existing typecheck errors in agent-manager.ts (entry/nextEntry narrowing), app.ts (agentModels body field), KeyUsage.svelte (m guards), and a TS2742 in provider.ts via explicit ModelFactory return type. - buildFallbackSequence now always returns at least one entry so processMessage runs the agent loop even without keyId/modelId (fixes 4 broken agent-manager tests).
-rw-r--r--bun.lock28
-rw-r--r--packages/api/src/agent-manager.ts39
-rw-r--r--packages/api/src/app.ts1
-rw-r--r--packages/core/package.json2
-rw-r--r--packages/core/src/agent/agent.ts138
-rw-r--r--packages/core/src/llm/provider.ts94
-rw-r--r--packages/frontend/src/lib/components/KeyUsage.svelte4
7 files changed, 230 insertions, 76 deletions
diff --git a/bun.lock b/bun.lock
index 3bcf51b..92f730c 100644
--- a/bun.lock
+++ b/bun.lock
@@ -25,7 +25,7 @@
"name": "@dispatch/core",
"version": "0.0.1",
"dependencies": {
- "@ai-sdk/anthropic": "^3.0.0",
+ "@ai-sdk/anthropic": "^1.2.12",
"@ai-sdk/openai-compatible": "^0.2.0",
"ai": "^4.0.0",
"chokidar": "^5.0.0",
@@ -64,13 +64,13 @@
"packages": {
"7zip-bin": ["[email protected]", "", {}, "sha512-ukTPVhqG4jNzMro2qA9HSCSSVJN3aN7tlb+hfqYCt3ER0yWroeA2VR38MNrOHLQ/cVj+DaIMad0kFCtWWowh/A=="],
- "@ai-sdk/anthropic": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-saEX+h5JDOkT9P/+REKDyikbnJiToFuLipgNcsmu4Zr3GW5kW1m9HhvrPK+vj63itIOsoZU6tmVIjkrePOlIUA=="],
+ "@ai-sdk/anthropic": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-YSzjlko7JvuiyQFmI9RN1tNZdEiZxc+6xld/0tq/VkJaHpEzGAb1yiNxxvmYVcjvfu/PcvCxAAYXmTYQQ63IHQ=="],
"@ai-sdk/openai-compatible": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-LkvfcM8slJedRyJa/MiMiaOzcMjV1zNDwzTHEGz7aAsgsQV0maLfmJRi/nuSwf5jmp0EouC+JXXDUj2l94HgQw=="],
- "@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="],
+ "@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
- "@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="],
+ "@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
"@ai-sdk/react": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider-utils": "2.2.8", "@ai-sdk/ui-utils": "1.2.11", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "zod": "^3.23.8" }, "optionalPeers": ["zod"] }, "sha512-jK1IZZ22evPZoQW3vlkZ7wvjYGYF+tRBKXtrcolduIkQ/m/sOAVcVeVDUDvh1T91xCnWCdUGCPZg2avZ90mv3g=="],
@@ -240,8 +240,6 @@
"@sindresorhus/is": ["@sindresorhus/[email protected]", "", {}, "sha512-t09vSN3MdfsyCHoFcTRCH/iUtG7OJ0CsjzB8cjAmKc/va/kIgeDI/TxsigdncE/4be734m0cvIYwNaV4i2XqAw=="],
- "@standard-schema/spec": ["@standard-schema/[email protected]", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
-
"@sveltejs/acorn-typescript": ["@sveltejs/[email protected]", "", { "peerDependencies": { "acorn": "^8.9.0" } }, "sha512-lVJX6qEgs/4DOcRTpo56tmKzVPtoWAaVbL4hfO7t7NVwl9AAXzQR6cihesW1BmNMPl+bK6dreu2sOKBP2Q9CIA=="],
"@sveltejs/vite-plugin-svelte": ["@sveltejs/[email protected]", "", { "dependencies": { "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "debug": "^4.4.1", "deepmerge": "^4.3.1", "kleur": "^4.1.5", "magic-string": "^0.30.17", "vitefu": "^1.0.6" }, "peerDependencies": { "svelte": "^5.0.0", "vite": "^6.0.0" } }, "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ=="],
@@ -530,8 +528,6 @@
"estree-walker": ["[email protected]", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="],
- "eventsource-parser": ["[email protected]", "", {}, "sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ=="],
-
"expect-type": ["[email protected]", "", {}, "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA=="],
"exponential-backoff": ["[email protected]", "", {}, "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA=="],
@@ -950,16 +946,6 @@
"zod-to-json-schema": ["[email protected]", "", { "peerDependencies": { "zod": "^3.25.28 || ^4" } }, "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA=="],
- "@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
- "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
- "@ai-sdk/react/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
- "@ai-sdk/ui-utils/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
- "@ai-sdk/ui-utils/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
"@electron/asar/minimatch": ["[email protected]", "", { "dependencies": { "brace-expansion": "^1.1.7" } }, "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w=="],
"@electron/fuses/fs-extra": ["[email protected]", "", { "dependencies": { "at-least-node": "^1.0.0", "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", "universalify": "^2.0.0" } }, "sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ=="],
@@ -990,10 +976,6 @@
"@tailwindcss/oxide-wasm32-wasi/tslib": ["[email protected]", "", { "bundled": true }, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
- "ai/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
- "ai/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
-
"app-builder-lib/@electron/get": ["@electron/[email protected]", "", { "dependencies": { "debug": "^4.1.1", "env-paths": "^2.2.0", "fs-extra": "^8.1.0", "got": "^11.8.5", "progress": "^2.0.3", "semver": "^6.2.0", "sumchecker": "^3.0.1" }, "optionalDependencies": { "global-agent": "^3.0.0" } }, "sha512-F+nKc0xW+kVbBRhFzaMgPy3KwmuNTYX1fx6+FxxoSnNgwYX6LD7AKBTWkU0MQ6IBoe7dz069CNkR673sPAgkCQ=="],
"app-builder-lib/ci-info": ["[email protected]", "", {}, "sha512-Wdy2Igu8OcBpI2pZePZ5oWjPC38tmDVx5WKUXKwlLYkA0ozo85sLsLvkBbBn/sZaSCMFOGZJ14fvW9t5/d7kdA=="],
@@ -1038,8 +1020,6 @@
"tiny-async-pool/semver": ["[email protected]", "", { "bin": { "semver": "bin/semver" } }, "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g=="],
- "@ai-sdk/react/@ai-sdk/provider-utils/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
-
"@electron/asar/minimatch/brace-expansion": ["[email protected]", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g=="],
"@electron/get/fs-extra/jsonfile": ["[email protected]", "", { "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg=="],
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 1a28371..73b65f5 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -98,6 +98,15 @@ Good approach:
3. Work through each file sequentially
`.trim();
+/**
+ * Returns true for OpenCode Go models served via the Anthropic-format
+ * `/messages` endpoint (MiniMax M2.x, Qwen3.x Plus). See
+ * https://opencode.ai/docs/go/#endpoints for the per-model endpoint table.
+ */
+function isOpencodeGoAnthropicModel(modelId: string): boolean {
+ return modelId.startsWith("minimax-") || modelId.startsWith("qwen");
+}
+
function buildSystemPrompt(toolNames: string[], basePrompt?: string): string {
const base = basePrompt || DEFAULT_SYSTEM_PROMPT;
const toolList = toolNames
@@ -553,6 +562,16 @@ export class AgentManager {
apiKey = envKey;
baseURL = key.base_url;
model = effectiveModelId;
+ // OpenCode Go splits its catalog across two endpoints:
+ // `/chat/completions` — GLM, Kimi, DeepSeek, MiMo (OpenAI-compatible)
+ // `/messages` — MiniMax, Qwen (Anthropic-format)
+ // The configured key has provider="opencode-go" which defaults to
+ // the OpenAI-compatible path. When the selected model lives on the
+ // `/messages` route, route through the API-key Anthropic provider
+ // instead so the SDK targets the correct endpoint and protocol.
+ if (key.provider === "opencode-go" && isOpencodeGoAnthropicModel(model)) {
+ provider = "opencode-anthropic";
+ }
tabAgent.keyId = effectiveKeyId;
tabAgent.modelId = effectiveModelId;
useOverride = true;
@@ -847,8 +866,12 @@ export class AgentManager {
for (let fallbackIdx = 0; fallbackIdx < maxFallbackAttempts; fallbackIdx++) {
const entry = fallbackSequence[fallbackIdx];
- currentKeyId = entry.key_id;
- currentModelId = entry.model_id;
+ if (!entry) break; // unreachable: loop bound guarantees defined, satisfies TS
+ // Convert empty strings (used when caller omitted keyId/modelId in
+ // manual mode) to undefined so `getOrCreateAgentForTab` falls back
+ // to the tabAgent's stored defaults via the `?? tabAgent.keyId` chain.
+ currentKeyId = entry.key_id || undefined;
+ currentModelId = entry.model_id || undefined;
allOutput = "";
let assistantText = "";
let assistantThinking = "";
@@ -977,8 +1000,8 @@ export class AgentManager {
// Try the next entry in the agent's fallback sequence
const nextIdx = fallbackIdx + 1;
- if (nextIdx < maxFallbackAttempts) {
- const nextEntry = fallbackSequence[nextIdx];
+ const nextEntry = fallbackSequence[nextIdx];
+ if (nextIdx < maxFallbackAttempts && nextEntry) {
const fallbackMsg =
`Key "${tabAgent.keyId}" rate limited. ` +
`Falling back to "${nextEntry.key_id}" (model: ${nextEntry.model_id})...`;
@@ -1021,9 +1044,11 @@ export class AgentManager {
const startIdx = models.findIndex((m) => m.key_id === keyId && m.model_id === modelId);
return startIdx >= 0 ? models.slice(startIdx) : models;
}
- // Manual mode: no fallback — just the selected key/model pair
- if (keyId && modelId) return [{ key_id: keyId, model_id: modelId }];
- return [];
+ // Manual mode: no fallback — just the selected key/model pair.
+ // Always return at least one entry so `processMessage` runs the agent
+ // once (empty strings let `getOrCreateAgentForTab` fall back to the
+ // tabAgent's stored defaults or environment-driven config).
+ return [{ key_id: keyId ?? "", model_id: modelId ?? "" }];
}
queueMessage(tabId: string, message: string, clientId?: string): { messageId: string } {
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index d7dd0be..ba5dabd 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -41,6 +41,7 @@ app.post("/chat", async (c) => {
message?: unknown;
keyId?: unknown;
modelId?: unknown;
+ agentModels?: unknown;
reasoningEffort?: unknown;
workingDirectory?: unknown;
queueId?: unknown;
diff --git a/packages/core/package.json b/packages/core/package.json
index 6f88398..6345ac3 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -11,7 +11,7 @@
"typecheck": "tsc --noEmit"
},
"dependencies": {
- "@ai-sdk/anthropic": "^3.0.0",
+ "@ai-sdk/anthropic": "^1.2.12",
"@ai-sdk/openai-compatible": "^0.2.0",
"ai": "^4.0.0",
"chokidar": "^5.0.0",
diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts
index 24de59d..3cd8a5b 100644
--- a/packages/core/src/agent/agent.ts
+++ b/packages/core/src/agent/agent.ts
@@ -1,6 +1,6 @@
import { realpathSync } from "node:fs";
import { dirname, isAbsolute, relative, resolve } from "node:path";
-import type { CoreMessage, LanguageModelV1 } from "ai";
+import type { CoreMessage, CoreSystemMessage } from "ai";
import { streamText } from "ai";
import { buildBillingHeaderValue, SYSTEM_IDENTITY } from "../credentials/claude.js";
import { createProvider, prefixToolName, unprefixToolName } from "../llm/provider.js";
@@ -16,7 +16,7 @@ import type {
ToolResult,
} from "../types/index.js";
-function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMessage[] {
+function toCoreMessages(messages: ChatMessage[], useToolPrefix?: boolean): CoreMessage[] {
const result: CoreMessage[] = [];
for (const msg of messages) {
if (msg.role === "user") {
@@ -27,12 +27,12 @@ function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMes
| { type: "tool-call"; toolCallId: string; toolName: string; args: Record<string, unknown> }
> = [{ type: "text", text: msg.content }];
for (const tc of msg.toolCalls ?? []) {
- const toolName = isAnthropic ? prefixToolName(tc.name) : tc.name;
+ const toolName = useToolPrefix ? prefixToolName(tc.name) : tc.name;
parts.push({ type: "tool-call", toolCallId: tc.id, toolName, args: tc.arguments });
}
result.push({ role: "assistant", content: parts });
for (const tr of msg.toolResults ?? []) {
- const toolName = isAnthropic ? prefixToolName(tr.toolName) : tr.toolName;
+ const toolName = useToolPrefix ? prefixToolName(tr.toolName) : tr.toolName;
result.push({
role: "tool",
content: [
@@ -45,6 +45,47 @@ function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMes
return result;
}
+/**
+ * Apply Anthropic prompt-caching breakpoints to a message list.
+ *
+ * Anthropic caches the entire request prefix up to (and including) any block
+ * marked with `cache_control`. Up to 4 breakpoints per request; we use three
+ * (first system + last 2 non-system).
+ *
+ * Strategy (mirrors OpenCode's `applyCaching` in transform.ts):
+ * - Mark the first system message → caches system prompt (and tools, which
+ * sit before messages in the request body).
+ * - Mark the last 2 non-system messages → rolling cache that extends through
+ * the conversation each turn.
+ *
+ * Only applied for the Anthropic provider. OpenCode Zen's OpenAI-compatible
+ * endpoint (`/zen/v1/chat/completions`) backs models like MiniMax, GLM, Kimi,
+ * Grok, etc. — those upstreams do automatic prefix caching server-side and
+ * don't accept `cache_control` markers. OpenCode's own transform.ts gates
+ * `applyCaching` on Anthropic-family detection for the same reason. Models
+ * served via `@ai-sdk/openai` (GPT) and `@ai-sdk/google` (Gemini) likewise
+ * use server-side automatic caching.
+ */
+function applyAnthropicCaching(msgs: CoreMessage[]): void {
+ const targets = new Set<CoreMessage>();
+
+ const systemMsgs = msgs.filter((m) => m.role === "system").slice(0, 2);
+ for (const m of systemMsgs) targets.add(m);
+
+ const nonSystem = msgs.filter((m) => m.role !== "system").slice(-2);
+ for (const m of nonSystem) targets.add(m);
+
+ for (const msg of targets) {
+ msg.providerOptions = {
+ ...msg.providerOptions,
+ anthropic: {
+ ...(msg.providerOptions?.anthropic ?? {}),
+ cacheControl: { type: "ephemeral" },
+ },
+ };
+ }
+}
+
function formatError(err: unknown, config: AgentConfig): string {
const context = `[model=${config.model}, baseURL=${config.baseURL}]`;
@@ -66,7 +107,7 @@ function formatError(err: unknown, config: AgentConfig): string {
return `${String(err)} ${context}`;
}
-const MAX_STEPS = 10;
+const MAX_STEPS = 50;
export class Agent {
status: AgentStatus = "idle";
@@ -223,7 +264,15 @@ export class Agent {
this.messages.push({ role: "user", content: userMessage });
const registry = createToolRegistry(this.config.tools);
- const isAnthropic = this.config.provider === "anthropic";
+ // `isClaudeOAuth` gates Claude-Code-CLI-specific behavior: billing-header
+ // injection, identity preamble, `mcp_*` tool name prefix, and extended
+ // thinking config. Only the OAuth flow (provider="anthropic") needs these.
+ // `usesAnthropicSDK` is the broader category — any provider whose
+ // requests are serialized by `@ai-sdk/anthropic` and therefore expect
+ // Anthropic-style `cache_control` markers. Today that's Claude OAuth
+ // plus OpenCode Go's MiniMax/Qwen routes.
+ const isClaudeOAuth = this.config.provider === "anthropic";
+ const usesAnthropicSDK = isClaudeOAuth || this.config.provider === "opencode-anthropic";
const providerFactory = createProvider({
apiKey: this.config.apiKey,
baseURL: this.config.baseURL,
@@ -231,17 +280,21 @@ export class Agent {
claudeCredentials: this.config.claudeCredentials,
});
- // For Anthropic provider, prefix tool names and build full system prompt
+ // Only the Claude OAuth flow expects `mcp_*` prefixed tool names. The
+ // OpenCode Go anthropic-format endpoint passes tools through to MiniMax
+ // or Qwen, which expect raw names.
const aiTools = registry.getAISDKTools();
- const tools = isAnthropic
+ const tools = isClaudeOAuth
? Object.fromEntries(
Object.entries(aiTools).map(([name, tool]) => [prefixToolName(name), tool]),
)
: aiTools;
- // Build system prompt
+ // Build system prompt — Claude OAuth requests embed a billing header
+ // and the Claude Code identity preamble so Anthropic recognizes the
+ // request as coming from the official CLI.
let systemPrompt = this.config.systemPrompt;
- if (isAnthropic) {
+ if (isClaudeOAuth) {
const billingHeader = buildBillingHeaderValue(this.messages);
systemPrompt = `${billingHeader}\n${SYSTEM_IDENTITY}\n\n${systemPrompt}`;
}
@@ -260,41 +313,58 @@ export class Agent {
const effort = options?.reasoningEffort ?? this.config.reasoningEffort ?? "max";
// Build stream text options
- const rawModel = providerFactory(this.config.model);
- const model = rawModel as unknown as LanguageModelV1;
+ const model = providerFactory(this.config.model);
+
+ // Build the message list with the system prompt prepended as a system
+ // role message. This is required for Anthropic prompt caching: the
+ // `system` shortcut parameter takes a plain string with nowhere to
+ // attach `providerOptions.anthropic.cacheControl`. Moving it inline
+ // also lets us apply rolling cache breakpoints to the last messages.
+ const systemMessage: CoreSystemMessage = { role: "system", content: systemPrompt };
+ const coreMessages: CoreMessage[] = [
+ systemMessage,
+ ...toCoreMessages(stepMessages, isClaudeOAuth),
+ ];
+
+ if (usesAnthropicSDK) {
+ applyAnthropicCaching(coreMessages);
+ }
+
const streamOptions: Parameters<typeof streamText>[0] = {
model,
- system: systemPrompt,
- messages: toCoreMessages(stepMessages, isAnthropic),
+ messages: coreMessages,
tools,
};
- if (isAnthropic && effort !== "none") {
- const modelId = this.config.model;
- const isOpus47 = modelId === "claude-opus-4-7";
-
+ if (isClaudeOAuth && effort !== "none") {
+ // Opus 4.7 rejects `thinking: { type: "enabled" }` ("reasoning-
+ // signature without reasoning") and only supports adaptive thinking.
+ // `@ai-sdk/anthropic` v1.x can't emit `type: "adaptive"`, so we
+ // leave `providerOptions.anthropic.thinking` unset and let the
+ // custom fetch in `createClaudeOAuthProvider` inject the adaptive
+ // shape into the request body. We still set `maxTokens` here so
+ // the SDK serializes it — adaptive thinking spends from this
+ // budget rather than a separate one.
+ const isOpus47 = this.config.model === "claude-opus-4-7";
+ const budgetTokens =
+ effort === "max"
+ ? 16000
+ : effort === "high"
+ ? 10000
+ : effort === "medium"
+ ? 5000
+ : effort === "low"
+ ? 2000
+ : 0;
if (isOpus47) {
- // Opus 4.7 only supports adaptive thinking
- streamOptions.providerOptions = {
- anthropic: { thinking: { type: "adaptive" as const } },
- };
+ streamOptions.maxTokens = budgetTokens + 8000;
} else {
- const budgetTokens =
- effort === "max"
- ? 16000
- : effort === "high"
- ? 10000
- : effort === "medium"
- ? 5000
- : effort === "low"
- ? 2000
- : 0;
streamOptions.providerOptions = {
anthropic: { thinking: { type: "enabled" as const, budgetTokens } },
};
streamOptions.maxTokens = budgetTokens + 8000;
}
- } else if (!isAnthropic && effort !== "none") {
+ } else if (!usesAnthropicSDK && effort !== "none") {
streamOptions.providerOptions = { openaiCompatible: { reasoningEffort: effort } };
}
@@ -313,7 +383,7 @@ export class Agent {
yield { type: "reasoning-delta", delta: event.textDelta };
} else if (event.type === "tool-call") {
const rawName = event.toolName;
- const toolName = isAnthropic ? unprefixToolName(rawName) : rawName;
+ const toolName = isClaudeOAuth ? unprefixToolName(rawName) : rawName;
const toolCall: ToolCall = {
id: event.toolCallId,
name: toolName,
diff --git a/packages/core/src/llm/provider.ts b/packages/core/src/llm/provider.ts
index 7cbb829..a7d800c 100644
--- a/packages/core/src/llm/provider.ts
+++ b/packages/core/src/llm/provider.ts
@@ -1,6 +1,6 @@
import { createAnthropic } from "@ai-sdk/anthropic";
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
-import type { LanguageModelV1Middleware, LanguageModelV1Prompt } from "ai";
+import type { LanguageModelV1, LanguageModelV1Middleware, LanguageModelV1Prompt } from "ai";
import { wrapLanguageModel } from "ai";
function normalizeMessages(msgs: unknown[]): unknown[] {
@@ -61,9 +61,19 @@ function unprefixToolName(name: string): string {
return name;
}
-export function createProvider(config: ProviderConfig) {
+// Explicit factory return type so the inferred type doesn't leak references
+// into transitive `@ai-sdk/provider` paths (which would trip TS2742).
+// `@ai-sdk/anthropic` v1.x already returns `LanguageModelV1`-spec models;
+// `@ai-sdk/openai-compatible` v0.2.x and `wrapLanguageModel` likewise.
+export type ModelFactory = (modelId: string) => LanguageModelV1;
+
+export function createProvider(config: ProviderConfig): ModelFactory {
if (config.provider === "anthropic") {
- return createAnthropicProvider(config);
+ return createClaudeOAuthProvider(config);
+ }
+
+ if (config.provider === "opencode-anthropic") {
+ return createApiKeyAnthropicProvider(config);
}
// Default: OpenAI-compatible provider
@@ -94,7 +104,21 @@ export function createProvider(config: ProviderConfig) {
};
}
-function createAnthropicProvider(config: ProviderConfig) {
+/**
+ * Claude OAuth provider. Used by Dispatch's `anthropic` provider keys
+ * (claude-pro, claude-max). Swaps `x-api-key` for `Authorization: Bearer`
+ * to satisfy Anthropic's OAuth flow, and mimics Claude Code CLI request
+ * headers so the request bills against the user's Claude subscription.
+ *
+ * The custom fetch also rewrites the outgoing JSON body for Claude Opus 4.7:
+ * that model rejects `thinking: { type: "enabled", budget_tokens }` (the only
+ * shape `@ai-sdk/anthropic` v1.x can emit) with "reasoning-signature without
+ * reasoning", and instead requires `thinking: { type: "adaptive" }`. `ai` v4
+ * is pinned to V1-spec providers, so we can't upgrade to v3 of the Anthropic
+ * SDK without breaking everything. Doing the rewrite here keeps the rest of
+ * the agent path SDK-agnostic and limits the special case to one model.
+ */
+function createClaudeOAuthProvider(config: ProviderConfig): ModelFactory {
const accessToken = config.claudeCredentials?.accessToken ?? config.apiKey;
const customFetch = Object.assign(
@@ -102,7 +126,9 @@ function createAnthropicProvider(config: ProviderConfig) {
const headers = new Headers(init?.headers);
headers.delete("x-api-key");
headers.set("authorization", `Bearer ${accessToken}`);
- return globalThis.fetch(url, { ...init, headers });
+
+ const body = rewriteBodyForOpus47(init?.body);
+ return globalThis.fetch(url, { ...init, headers, body });
},
{ preconnect: globalThis.fetch.preconnect?.bind(globalThis.fetch) },
);
@@ -118,9 +144,61 @@ function createAnthropicProvider(config: ProviderConfig) {
fetch: customFetch as typeof globalThis.fetch,
});
- return (modelId: string) => {
- return anthropic(modelId);
- };
+ return (modelId: string) => anthropic(modelId);
+}
+
+/**
+ * If the request body is a JSON `/messages` payload targeting Claude Opus 4.7
+ * and the caller signaled they want thinking (by setting `max_tokens` above
+ * Anthropic's default 4096), insert `thinking: { type: "adaptive" }`.
+ *
+ * Skipping the rewrite when `max_tokens` is small (or absent) keeps `effort:
+ * "none"` requests as plain non-thinking calls — agent.ts only sets a high
+ * `max_tokens` when thinking is wanted, so this acts as a clean signal.
+ *
+ * Returns the body unchanged for any other model, any non-string body, or any
+ * payload that fails to parse, leaving non-Anthropic providers, non-Opus-4.7
+ * Claude models, and streaming/binary uploads unaffected.
+ */
+function rewriteBodyForOpus47(body: BodyInit | null | undefined): BodyInit | null | undefined {
+ if (typeof body !== "string") return body;
+ let parsed: Record<string, unknown>;
+ try {
+ parsed = JSON.parse(body) as Record<string, unknown>;
+ } catch {
+ return body;
+ }
+ if (parsed.model !== "claude-opus-4-7") return body;
+ const maxTokens = typeof parsed.max_tokens === "number" ? parsed.max_tokens : 0;
+ if (maxTokens <= 4096) return body;
+ parsed.thinking = { type: "adaptive" };
+ // Anthropic rejects requests that combine extended thinking (enabled or
+ // adaptive) with any temperature other than 1. `ai` v4 defaults
+ // `temperature: 0`, and the v1 Anthropic SDK normally strips it when its
+ // own `isThinking` flag is set — but we're injecting `thinking` here,
+ // behind the SDK's back, so we have to strip it ourselves. Same for
+ // `top_p` and `top_k`, which are likewise rejected with thinking.
+ delete parsed.temperature;
+ delete parsed.top_p;
+ delete parsed.top_k;
+ return JSON.stringify(parsed);
+}
+
+/**
+ * Plain-API-key Anthropic-format provider. Used to hit gateways that speak
+ * Anthropic's `/messages` protocol with a standard `x-api-key` header — most
+ * importantly OpenCode Go's MiniMax and Qwen routes. Unlike the Claude OAuth
+ * variant, no `claudeCredentials` are present, no Claude Code mimicry headers
+ * are sent, and the API key is passed verbatim through the SDK's default
+ * authentication path.
+ */
+function createApiKeyAnthropicProvider(config: ProviderConfig): ModelFactory {
+ const anthropic = createAnthropic({
+ apiKey: config.apiKey,
+ baseURL: config.baseURL || "https://opencode.ai/zen/go/v1",
+ });
+
+ return (modelId: string) => anthropic(modelId);
}
export { prefixToolName, unprefixToolName };
diff --git a/packages/frontend/src/lib/components/KeyUsage.svelte b/packages/frontend/src/lib/components/KeyUsage.svelte
index 96a4b08..00d179e 100644
--- a/packages/frontend/src/lib/components/KeyUsage.svelte
+++ b/packages/frontend/src/lib/components/KeyUsage.svelte
@@ -440,13 +440,13 @@ function hasBucketData(bucket: UsageBucket | undefined): boolean {
<span class="text-xs text-base-content/50">Models</span>
<span class="text-xs font-mono">{entry.data.models.length} available</span>
</div>
- {#if m.rpm > 0}
+ {#if m && m.rpm > 0}
<div class="flex items-center justify-between">
<span class="text-xs text-base-content/50">RPM</span>
<span class="text-xs font-mono">{m.rpm}</span>
</div>
{/if}
- {#if m.requestsPerDay > 0}
+ {#if m && m.requestsPerDay > 0}
<div class="flex items-center justify-between">
<span class="text-xs text-base-content/50">RPD</span>
<span class="text-xs font-mono">{m.requestsPerDay.toLocaleString()}</span>