diff options
| author | Adam Malczewski <[email protected]> | 2026-05-24 13:24:04 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-05-24 13:24:04 +0900 |
| commit | 399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch) | |
| tree | d67f18f5cca91a66e3146cbd2f48920571768e23 | |
| parent | 997b00034435440d412f955e05e53f09bae83f9e (diff) | |
| download | dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip | |
fix: prompt caching, OpenCode Go MiniMax/Qwen support, Opus 4.7 thinking, SDK compat
- Implement Anthropic prompt caching: first system message + last 2 non-system messages get cache_control: ephemeral, mirroring OpenCode's applyCaching strategy. Move system prompt inline into messages array so providerOptions can attach.
- Add opencode-anthropic provider variant routing MiniMax/Qwen models through the /messages endpoint with x-api-key auth, distinct from the Claude OAuth flow's Bearer auth and Claude Code mimicry.
- Split isAnthropic into isClaudeOAuth (billing header, mcp_ tool prefix, thinking config) and usesAnthropicSDK (cache markers) so non-OAuth Anthropic-format gateways get the right treatment.
- Pin @ai-sdk/anthropic to ^1.2.12: v3 returns LanguageModelV3-spec models that ai v4's streamText rejects at runtime ('AI SDK 4 only supports models that implement specification version v1'). Drop unnecessary V1 casts.
- Restore Opus 4.7 extended thinking by rewriting the outgoing /messages body in the Claude OAuth fetch interceptor: inject thinking: { type: 'adaptive' } (v1 SDK can't emit it), strip temperature/top_p/top_k (Anthropic rejects them with thinking enabled). Gated on max_tokens > 4096 so effort=none still works.
- Bump MAX_STEPS from 10 to 50 to align with AI SDK's stepCountIs(20) default and reduce mid-task halts.
- Fix pre-existing typecheck errors in agent-manager.ts (entry/nextEntry narrowing), app.ts (agentModels body field), KeyUsage.svelte (m guards), and a TS2742 in provider.ts via explicit ModelFactory return type.
- buildFallbackSequence now always returns at least one entry so processMessage runs the agent loop even without keyId/modelId (fixes 4 broken agent-manager tests).
| -rw-r--r-- | bun.lock | 28 | ||||
| -rw-r--r-- | packages/api/src/agent-manager.ts | 39 | ||||
| -rw-r--r-- | packages/api/src/app.ts | 1 | ||||
| -rw-r--r-- | packages/core/package.json | 2 | ||||
| -rw-r--r-- | packages/core/src/agent/agent.ts | 138 | ||||
| -rw-r--r-- | packages/core/src/llm/provider.ts | 94 | ||||
| -rw-r--r-- | packages/frontend/src/lib/components/KeyUsage.svelte | 4 |
7 files changed, 230 insertions, 76 deletions
@@ -25,7 +25,7 @@ "name": "@dispatch/core", "version": "0.0.1", "dependencies": { - "@ai-sdk/anthropic": "^3.0.0", + "@ai-sdk/anthropic": "^1.2.12", "@ai-sdk/openai-compatible": "^0.2.0", "ai": "^4.0.0", "chokidar": "^5.0.0", @@ -64,13 +64,13 @@ "packages": { "7zip-bin": ["[email protected]", "", {}, "sha512-ukTPVhqG4jNzMro2qA9HSCSSVJN3aN7tlb+hfqYCt3ER0yWroeA2VR38MNrOHLQ/cVj+DaIMad0kFCtWWowh/A=="], - "@ai-sdk/anthropic": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-saEX+h5JDOkT9P/+REKDyikbnJiToFuLipgNcsmu4Zr3GW5kW1m9HhvrPK+vj63itIOsoZU6tmVIjkrePOlIUA=="], + "@ai-sdk/anthropic": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-YSzjlko7JvuiyQFmI9RN1tNZdEiZxc+6xld/0tq/VkJaHpEzGAb1yiNxxvmYVcjvfu/PcvCxAAYXmTYQQ63IHQ=="], "@ai-sdk/openai-compatible": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-LkvfcM8slJedRyJa/MiMiaOzcMjV1zNDwzTHEGz7aAsgsQV0maLfmJRi/nuSwf5jmp0EouC+JXXDUj2l94HgQw=="], - "@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="], + "@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], - "@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="], + "@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], "@ai-sdk/react": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider-utils": "2.2.8", "@ai-sdk/ui-utils": "1.2.11", "swr": "^2.2.5", "throttleit": "2.1.0" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "zod": "^3.23.8" }, "optionalPeers": ["zod"] }, "sha512-jK1IZZ22evPZoQW3vlkZ7wvjYGYF+tRBKXtrcolduIkQ/m/sOAVcVeVDUDvh1T91xCnWCdUGCPZg2avZ90mv3g=="], @@ -240,8 +240,6 @@ "@sindresorhus/is": ["@sindresorhus/[email protected]", "", {}, "sha512-t09vSN3MdfsyCHoFcTRCH/iUtG7OJ0CsjzB8cjAmKc/va/kIgeDI/TxsigdncE/4be734m0cvIYwNaV4i2XqAw=="], - "@standard-schema/spec": ["@standard-schema/[email protected]", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="], - "@sveltejs/acorn-typescript": ["@sveltejs/[email protected]", "", { "peerDependencies": { "acorn": "^8.9.0" } }, "sha512-lVJX6qEgs/4DOcRTpo56tmKzVPtoWAaVbL4hfO7t7NVwl9AAXzQR6cihesW1BmNMPl+bK6dreu2sOKBP2Q9CIA=="], "@sveltejs/vite-plugin-svelte": ["@sveltejs/[email protected]", "", { "dependencies": { "@sveltejs/vite-plugin-svelte-inspector": "^4.0.1", "debug": "^4.4.1", "deepmerge": "^4.3.1", "kleur": "^4.1.5", "magic-string": "^0.30.17", "vitefu": "^1.0.6" }, "peerDependencies": { "svelte": "^5.0.0", "vite": "^6.0.0" } }, "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ=="], @@ -530,8 +528,6 @@ "estree-walker": ["[email protected]", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="], - "eventsource-parser": ["[email protected]", "", {}, "sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ=="], - "expect-type": ["[email protected]", "", {}, "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA=="], "exponential-backoff": ["[email protected]", "", {}, "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA=="], @@ -950,16 +946,6 @@ "zod-to-json-schema": ["[email protected]", "", { "peerDependencies": { "zod": "^3.25.28 || ^4" } }, "sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA=="], - "@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], - - "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], - - "@ai-sdk/react/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], - - "@ai-sdk/ui-utils/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], - - "@ai-sdk/ui-utils/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], - "@electron/asar/minimatch": ["[email protected]", "", { "dependencies": { "brace-expansion": "^1.1.7" } }, "sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w=="], "@electron/fuses/fs-extra": ["[email protected]", "", { "dependencies": { "at-least-node": "^1.0.0", "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", "universalify": "^2.0.0" } }, "sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ=="], @@ -990,10 +976,6 @@ "@tailwindcss/oxide-wasm32-wasi/tslib": ["[email protected]", "", { "bundled": true }, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], - "ai/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], - - "ai/@ai-sdk/provider-utils": ["@ai-sdk/[email protected]", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="], - "app-builder-lib/@electron/get": ["@electron/[email protected]", "", { "dependencies": { "debug": "^4.1.1", "env-paths": "^2.2.0", "fs-extra": "^8.1.0", "got": "^11.8.5", "progress": "^2.0.3", "semver": "^6.2.0", "sumchecker": "^3.0.1" }, "optionalDependencies": { "global-agent": "^3.0.0" } }, "sha512-F+nKc0xW+kVbBRhFzaMgPy3KwmuNTYX1fx6+FxxoSnNgwYX6LD7AKBTWkU0MQ6IBoe7dz069CNkR673sPAgkCQ=="], "app-builder-lib/ci-info": ["[email protected]", "", {}, "sha512-Wdy2Igu8OcBpI2pZePZ5oWjPC38tmDVx5WKUXKwlLYkA0ozo85sLsLvkBbBn/sZaSCMFOGZJ14fvW9t5/d7kdA=="], @@ -1038,8 +1020,6 @@ "tiny-async-pool/semver": ["[email protected]", "", { "bin": { "semver": "bin/semver" } }, "sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g=="], - "@ai-sdk/react/@ai-sdk/provider-utils/@ai-sdk/provider": ["@ai-sdk/[email protected]", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="], - "@electron/asar/minimatch/brace-expansion": ["[email protected]", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g=="], "@electron/get/fs-extra/jsonfile": ["[email protected]", "", { "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg=="], diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 1a28371..73b65f5 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -98,6 +98,15 @@ Good approach: 3. Work through each file sequentially `.trim(); +/** + * Returns true for OpenCode Go models served via the Anthropic-format + * `/messages` endpoint (MiniMax M2.x, Qwen3.x Plus). See + * https://opencode.ai/docs/go/#endpoints for the per-model endpoint table. + */ +function isOpencodeGoAnthropicModel(modelId: string): boolean { + return modelId.startsWith("minimax-") || modelId.startsWith("qwen"); +} + function buildSystemPrompt(toolNames: string[], basePrompt?: string): string { const base = basePrompt || DEFAULT_SYSTEM_PROMPT; const toolList = toolNames @@ -553,6 +562,16 @@ export class AgentManager { apiKey = envKey; baseURL = key.base_url; model = effectiveModelId; + // OpenCode Go splits its catalog across two endpoints: + // `/chat/completions` — GLM, Kimi, DeepSeek, MiMo (OpenAI-compatible) + // `/messages` — MiniMax, Qwen (Anthropic-format) + // The configured key has provider="opencode-go" which defaults to + // the OpenAI-compatible path. When the selected model lives on the + // `/messages` route, route through the API-key Anthropic provider + // instead so the SDK targets the correct endpoint and protocol. + if (key.provider === "opencode-go" && isOpencodeGoAnthropicModel(model)) { + provider = "opencode-anthropic"; + } tabAgent.keyId = effectiveKeyId; tabAgent.modelId = effectiveModelId; useOverride = true; @@ -847,8 +866,12 @@ export class AgentManager { for (let fallbackIdx = 0; fallbackIdx < maxFallbackAttempts; fallbackIdx++) { const entry = fallbackSequence[fallbackIdx]; - currentKeyId = entry.key_id; - currentModelId = entry.model_id; + if (!entry) break; // unreachable: loop bound guarantees defined, satisfies TS + // Convert empty strings (used when caller omitted keyId/modelId in + // manual mode) to undefined so `getOrCreateAgentForTab` falls back + // to the tabAgent's stored defaults via the `?? tabAgent.keyId` chain. + currentKeyId = entry.key_id || undefined; + currentModelId = entry.model_id || undefined; allOutput = ""; let assistantText = ""; let assistantThinking = ""; @@ -977,8 +1000,8 @@ export class AgentManager { // Try the next entry in the agent's fallback sequence const nextIdx = fallbackIdx + 1; - if (nextIdx < maxFallbackAttempts) { - const nextEntry = fallbackSequence[nextIdx]; + const nextEntry = fallbackSequence[nextIdx]; + if (nextIdx < maxFallbackAttempts && nextEntry) { const fallbackMsg = `Key "${tabAgent.keyId}" rate limited. ` + `Falling back to "${nextEntry.key_id}" (model: ${nextEntry.model_id})...`; @@ -1021,9 +1044,11 @@ export class AgentManager { const startIdx = models.findIndex((m) => m.key_id === keyId && m.model_id === modelId); return startIdx >= 0 ? models.slice(startIdx) : models; } - // Manual mode: no fallback — just the selected key/model pair - if (keyId && modelId) return [{ key_id: keyId, model_id: modelId }]; - return []; + // Manual mode: no fallback — just the selected key/model pair. + // Always return at least one entry so `processMessage` runs the agent + // once (empty strings let `getOrCreateAgentForTab` fall back to the + // tabAgent's stored defaults or environment-driven config). + return [{ key_id: keyId ?? "", model_id: modelId ?? "" }]; } queueMessage(tabId: string, message: string, clientId?: string): { messageId: string } { diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index d7dd0be..ba5dabd 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -41,6 +41,7 @@ app.post("/chat", async (c) => { message?: unknown; keyId?: unknown; modelId?: unknown; + agentModels?: unknown; reasoningEffort?: unknown; workingDirectory?: unknown; queueId?: unknown; diff --git a/packages/core/package.json b/packages/core/package.json index 6f88398..6345ac3 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -11,7 +11,7 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "@ai-sdk/anthropic": "^3.0.0", + "@ai-sdk/anthropic": "^1.2.12", "@ai-sdk/openai-compatible": "^0.2.0", "ai": "^4.0.0", "chokidar": "^5.0.0", diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 24de59d..3cd8a5b 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -1,6 +1,6 @@ import { realpathSync } from "node:fs"; import { dirname, isAbsolute, relative, resolve } from "node:path"; -import type { CoreMessage, LanguageModelV1 } from "ai"; +import type { CoreMessage, CoreSystemMessage } from "ai"; import { streamText } from "ai"; import { buildBillingHeaderValue, SYSTEM_IDENTITY } from "../credentials/claude.js"; import { createProvider, prefixToolName, unprefixToolName } from "../llm/provider.js"; @@ -16,7 +16,7 @@ import type { ToolResult, } from "../types/index.js"; -function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMessage[] { +function toCoreMessages(messages: ChatMessage[], useToolPrefix?: boolean): CoreMessage[] { const result: CoreMessage[] = []; for (const msg of messages) { if (msg.role === "user") { @@ -27,12 +27,12 @@ function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMes | { type: "tool-call"; toolCallId: string; toolName: string; args: Record<string, unknown> } > = [{ type: "text", text: msg.content }]; for (const tc of msg.toolCalls ?? []) { - const toolName = isAnthropic ? prefixToolName(tc.name) : tc.name; + const toolName = useToolPrefix ? prefixToolName(tc.name) : tc.name; parts.push({ type: "tool-call", toolCallId: tc.id, toolName, args: tc.arguments }); } result.push({ role: "assistant", content: parts }); for (const tr of msg.toolResults ?? []) { - const toolName = isAnthropic ? prefixToolName(tr.toolName) : tr.toolName; + const toolName = useToolPrefix ? prefixToolName(tr.toolName) : tr.toolName; result.push({ role: "tool", content: [ @@ -45,6 +45,47 @@ function toCoreMessages(messages: ChatMessage[], isAnthropic?: boolean): CoreMes return result; } +/** + * Apply Anthropic prompt-caching breakpoints to a message list. + * + * Anthropic caches the entire request prefix up to (and including) any block + * marked with `cache_control`. Up to 4 breakpoints per request; we use three + * (first system + last 2 non-system). + * + * Strategy (mirrors OpenCode's `applyCaching` in transform.ts): + * - Mark the first system message → caches system prompt (and tools, which + * sit before messages in the request body). + * - Mark the last 2 non-system messages → rolling cache that extends through + * the conversation each turn. + * + * Only applied for the Anthropic provider. OpenCode Zen's OpenAI-compatible + * endpoint (`/zen/v1/chat/completions`) backs models like MiniMax, GLM, Kimi, + * Grok, etc. — those upstreams do automatic prefix caching server-side and + * don't accept `cache_control` markers. OpenCode's own transform.ts gates + * `applyCaching` on Anthropic-family detection for the same reason. Models + * served via `@ai-sdk/openai` (GPT) and `@ai-sdk/google` (Gemini) likewise + * use server-side automatic caching. + */ +function applyAnthropicCaching(msgs: CoreMessage[]): void { + const targets = new Set<CoreMessage>(); + + const systemMsgs = msgs.filter((m) => m.role === "system").slice(0, 2); + for (const m of systemMsgs) targets.add(m); + + const nonSystem = msgs.filter((m) => m.role !== "system").slice(-2); + for (const m of nonSystem) targets.add(m); + + for (const msg of targets) { + msg.providerOptions = { + ...msg.providerOptions, + anthropic: { + ...(msg.providerOptions?.anthropic ?? {}), + cacheControl: { type: "ephemeral" }, + }, + }; + } +} + function formatError(err: unknown, config: AgentConfig): string { const context = `[model=${config.model}, baseURL=${config.baseURL}]`; @@ -66,7 +107,7 @@ function formatError(err: unknown, config: AgentConfig): string { return `${String(err)} ${context}`; } -const MAX_STEPS = 10; +const MAX_STEPS = 50; export class Agent { status: AgentStatus = "idle"; @@ -223,7 +264,15 @@ export class Agent { this.messages.push({ role: "user", content: userMessage }); const registry = createToolRegistry(this.config.tools); - const isAnthropic = this.config.provider === "anthropic"; + // `isClaudeOAuth` gates Claude-Code-CLI-specific behavior: billing-header + // injection, identity preamble, `mcp_*` tool name prefix, and extended + // thinking config. Only the OAuth flow (provider="anthropic") needs these. + // `usesAnthropicSDK` is the broader category — any provider whose + // requests are serialized by `@ai-sdk/anthropic` and therefore expect + // Anthropic-style `cache_control` markers. Today that's Claude OAuth + // plus OpenCode Go's MiniMax/Qwen routes. + const isClaudeOAuth = this.config.provider === "anthropic"; + const usesAnthropicSDK = isClaudeOAuth || this.config.provider === "opencode-anthropic"; const providerFactory = createProvider({ apiKey: this.config.apiKey, baseURL: this.config.baseURL, @@ -231,17 +280,21 @@ export class Agent { claudeCredentials: this.config.claudeCredentials, }); - // For Anthropic provider, prefix tool names and build full system prompt + // Only the Claude OAuth flow expects `mcp_*` prefixed tool names. The + // OpenCode Go anthropic-format endpoint passes tools through to MiniMax + // or Qwen, which expect raw names. const aiTools = registry.getAISDKTools(); - const tools = isAnthropic + const tools = isClaudeOAuth ? Object.fromEntries( Object.entries(aiTools).map(([name, tool]) => [prefixToolName(name), tool]), ) : aiTools; - // Build system prompt + // Build system prompt — Claude OAuth requests embed a billing header + // and the Claude Code identity preamble so Anthropic recognizes the + // request as coming from the official CLI. let systemPrompt = this.config.systemPrompt; - if (isAnthropic) { + if (isClaudeOAuth) { const billingHeader = buildBillingHeaderValue(this.messages); systemPrompt = `${billingHeader}\n${SYSTEM_IDENTITY}\n\n${systemPrompt}`; } @@ -260,41 +313,58 @@ export class Agent { const effort = options?.reasoningEffort ?? this.config.reasoningEffort ?? "max"; // Build stream text options - const rawModel = providerFactory(this.config.model); - const model = rawModel as unknown as LanguageModelV1; + const model = providerFactory(this.config.model); + + // Build the message list with the system prompt prepended as a system + // role message. This is required for Anthropic prompt caching: the + // `system` shortcut parameter takes a plain string with nowhere to + // attach `providerOptions.anthropic.cacheControl`. Moving it inline + // also lets us apply rolling cache breakpoints to the last messages. + const systemMessage: CoreSystemMessage = { role: "system", content: systemPrompt }; + const coreMessages: CoreMessage[] = [ + systemMessage, + ...toCoreMessages(stepMessages, isClaudeOAuth), + ]; + + if (usesAnthropicSDK) { + applyAnthropicCaching(coreMessages); + } + const streamOptions: Parameters<typeof streamText>[0] = { model, - system: systemPrompt, - messages: toCoreMessages(stepMessages, isAnthropic), + messages: coreMessages, tools, }; - if (isAnthropic && effort !== "none") { - const modelId = this.config.model; - const isOpus47 = modelId === "claude-opus-4-7"; - + if (isClaudeOAuth && effort !== "none") { + // Opus 4.7 rejects `thinking: { type: "enabled" }` ("reasoning- + // signature without reasoning") and only supports adaptive thinking. + // `@ai-sdk/anthropic` v1.x can't emit `type: "adaptive"`, so we + // leave `providerOptions.anthropic.thinking` unset and let the + // custom fetch in `createClaudeOAuthProvider` inject the adaptive + // shape into the request body. We still set `maxTokens` here so + // the SDK serializes it — adaptive thinking spends from this + // budget rather than a separate one. + const isOpus47 = this.config.model === "claude-opus-4-7"; + const budgetTokens = + effort === "max" + ? 16000 + : effort === "high" + ? 10000 + : effort === "medium" + ? 5000 + : effort === "low" + ? 2000 + : 0; if (isOpus47) { - // Opus 4.7 only supports adaptive thinking - streamOptions.providerOptions = { - anthropic: { thinking: { type: "adaptive" as const } }, - }; + streamOptions.maxTokens = budgetTokens + 8000; } else { - const budgetTokens = - effort === "max" - ? 16000 - : effort === "high" - ? 10000 - : effort === "medium" - ? 5000 - : effort === "low" - ? 2000 - : 0; streamOptions.providerOptions = { anthropic: { thinking: { type: "enabled" as const, budgetTokens } }, }; streamOptions.maxTokens = budgetTokens + 8000; } - } else if (!isAnthropic && effort !== "none") { + } else if (!usesAnthropicSDK && effort !== "none") { streamOptions.providerOptions = { openaiCompatible: { reasoningEffort: effort } }; } @@ -313,7 +383,7 @@ export class Agent { yield { type: "reasoning-delta", delta: event.textDelta }; } else if (event.type === "tool-call") { const rawName = event.toolName; - const toolName = isAnthropic ? unprefixToolName(rawName) : rawName; + const toolName = isClaudeOAuth ? unprefixToolName(rawName) : rawName; const toolCall: ToolCall = { id: event.toolCallId, name: toolName, diff --git a/packages/core/src/llm/provider.ts b/packages/core/src/llm/provider.ts index 7cbb829..a7d800c 100644 --- a/packages/core/src/llm/provider.ts +++ b/packages/core/src/llm/provider.ts @@ -1,6 +1,6 @@ import { createAnthropic } from "@ai-sdk/anthropic"; import { createOpenAICompatible } from "@ai-sdk/openai-compatible"; -import type { LanguageModelV1Middleware, LanguageModelV1Prompt } from "ai"; +import type { LanguageModelV1, LanguageModelV1Middleware, LanguageModelV1Prompt } from "ai"; import { wrapLanguageModel } from "ai"; function normalizeMessages(msgs: unknown[]): unknown[] { @@ -61,9 +61,19 @@ function unprefixToolName(name: string): string { return name; } -export function createProvider(config: ProviderConfig) { +// Explicit factory return type so the inferred type doesn't leak references +// into transitive `@ai-sdk/provider` paths (which would trip TS2742). +// `@ai-sdk/anthropic` v1.x already returns `LanguageModelV1`-spec models; +// `@ai-sdk/openai-compatible` v0.2.x and `wrapLanguageModel` likewise. +export type ModelFactory = (modelId: string) => LanguageModelV1; + +export function createProvider(config: ProviderConfig): ModelFactory { if (config.provider === "anthropic") { - return createAnthropicProvider(config); + return createClaudeOAuthProvider(config); + } + + if (config.provider === "opencode-anthropic") { + return createApiKeyAnthropicProvider(config); } // Default: OpenAI-compatible provider @@ -94,7 +104,21 @@ export function createProvider(config: ProviderConfig) { }; } -function createAnthropicProvider(config: ProviderConfig) { +/** + * Claude OAuth provider. Used by Dispatch's `anthropic` provider keys + * (claude-pro, claude-max). Swaps `x-api-key` for `Authorization: Bearer` + * to satisfy Anthropic's OAuth flow, and mimics Claude Code CLI request + * headers so the request bills against the user's Claude subscription. + * + * The custom fetch also rewrites the outgoing JSON body for Claude Opus 4.7: + * that model rejects `thinking: { type: "enabled", budget_tokens }` (the only + * shape `@ai-sdk/anthropic` v1.x can emit) with "reasoning-signature without + * reasoning", and instead requires `thinking: { type: "adaptive" }`. `ai` v4 + * is pinned to V1-spec providers, so we can't upgrade to v3 of the Anthropic + * SDK without breaking everything. Doing the rewrite here keeps the rest of + * the agent path SDK-agnostic and limits the special case to one model. + */ +function createClaudeOAuthProvider(config: ProviderConfig): ModelFactory { const accessToken = config.claudeCredentials?.accessToken ?? config.apiKey; const customFetch = Object.assign( @@ -102,7 +126,9 @@ function createAnthropicProvider(config: ProviderConfig) { const headers = new Headers(init?.headers); headers.delete("x-api-key"); headers.set("authorization", `Bearer ${accessToken}`); - return globalThis.fetch(url, { ...init, headers }); + + const body = rewriteBodyForOpus47(init?.body); + return globalThis.fetch(url, { ...init, headers, body }); }, { preconnect: globalThis.fetch.preconnect?.bind(globalThis.fetch) }, ); @@ -118,9 +144,61 @@ function createAnthropicProvider(config: ProviderConfig) { fetch: customFetch as typeof globalThis.fetch, }); - return (modelId: string) => { - return anthropic(modelId); - }; + return (modelId: string) => anthropic(modelId); +} + +/** + * If the request body is a JSON `/messages` payload targeting Claude Opus 4.7 + * and the caller signaled they want thinking (by setting `max_tokens` above + * Anthropic's default 4096), insert `thinking: { type: "adaptive" }`. + * + * Skipping the rewrite when `max_tokens` is small (or absent) keeps `effort: + * "none"` requests as plain non-thinking calls — agent.ts only sets a high + * `max_tokens` when thinking is wanted, so this acts as a clean signal. + * + * Returns the body unchanged for any other model, any non-string body, or any + * payload that fails to parse, leaving non-Anthropic providers, non-Opus-4.7 + * Claude models, and streaming/binary uploads unaffected. + */ +function rewriteBodyForOpus47(body: BodyInit | null | undefined): BodyInit | null | undefined { + if (typeof body !== "string") return body; + let parsed: Record<string, unknown>; + try { + parsed = JSON.parse(body) as Record<string, unknown>; + } catch { + return body; + } + if (parsed.model !== "claude-opus-4-7") return body; + const maxTokens = typeof parsed.max_tokens === "number" ? parsed.max_tokens : 0; + if (maxTokens <= 4096) return body; + parsed.thinking = { type: "adaptive" }; + // Anthropic rejects requests that combine extended thinking (enabled or + // adaptive) with any temperature other than 1. `ai` v4 defaults + // `temperature: 0`, and the v1 Anthropic SDK normally strips it when its + // own `isThinking` flag is set — but we're injecting `thinking` here, + // behind the SDK's back, so we have to strip it ourselves. Same for + // `top_p` and `top_k`, which are likewise rejected with thinking. + delete parsed.temperature; + delete parsed.top_p; + delete parsed.top_k; + return JSON.stringify(parsed); +} + +/** + * Plain-API-key Anthropic-format provider. Used to hit gateways that speak + * Anthropic's `/messages` protocol with a standard `x-api-key` header — most + * importantly OpenCode Go's MiniMax and Qwen routes. Unlike the Claude OAuth + * variant, no `claudeCredentials` are present, no Claude Code mimicry headers + * are sent, and the API key is passed verbatim through the SDK's default + * authentication path. + */ +function createApiKeyAnthropicProvider(config: ProviderConfig): ModelFactory { + const anthropic = createAnthropic({ + apiKey: config.apiKey, + baseURL: config.baseURL || "https://opencode.ai/zen/go/v1", + }); + + return (modelId: string) => anthropic(modelId); } export { prefixToolName, unprefixToolName }; diff --git a/packages/frontend/src/lib/components/KeyUsage.svelte b/packages/frontend/src/lib/components/KeyUsage.svelte index 96a4b08..00d179e 100644 --- a/packages/frontend/src/lib/components/KeyUsage.svelte +++ b/packages/frontend/src/lib/components/KeyUsage.svelte @@ -440,13 +440,13 @@ function hasBucketData(bucket: UsageBucket | undefined): boolean { <span class="text-xs text-base-content/50">Models</span> <span class="text-xs font-mono">{entry.data.models.length} available</span> </div> - {#if m.rpm > 0} + {#if m && m.rpm > 0} <div class="flex items-center justify-between"> <span class="text-xs text-base-content/50">RPM</span> <span class="text-xs font-mono">{m.rpm}</span> </div> {/if} - {#if m.requestsPerDay > 0} + {#if m && m.requestsPerDay > 0} <div class="flex items-center justify-between"> <span class="text-xs text-base-content/50">RPD</span> <span class="text-xs font-mono">{m.requestsPerDay.toLocaleString()}</span> |
