diff options
| author | Adam Malczewski <[email protected]> | 2026-05-24 13:24:04 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-05-24 13:24:04 +0900 |
| commit | 399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch) | |
| tree | d67f18f5cca91a66e3146cbd2f48920571768e23 /packages/api/src | |
| parent | 997b00034435440d412f955e05e53f09bae83f9e (diff) | |
| download | dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip | |
fix: prompt caching, OpenCode Go MiniMax/Qwen support, Opus 4.7 thinking, SDK compat
- Implement Anthropic prompt caching: first system message + last 2 non-system messages get cache_control: ephemeral, mirroring OpenCode's applyCaching strategy. Move system prompt inline into messages array so providerOptions can attach.
- Add opencode-anthropic provider variant routing MiniMax/Qwen models through the /messages endpoint with x-api-key auth, distinct from the Claude OAuth flow's Bearer auth and Claude Code mimicry.
- Split isAnthropic into isClaudeOAuth (billing header, mcp_ tool prefix, thinking config) and usesAnthropicSDK (cache markers) so non-OAuth Anthropic-format gateways get the right treatment.
- Pin @ai-sdk/anthropic to ^1.2.12: v3 returns LanguageModelV3-spec models that ai v4's streamText rejects at runtime ('AI SDK 4 only supports models that implement specification version v1'). Drop unnecessary V1 casts.
- Restore Opus 4.7 extended thinking by rewriting the outgoing /messages body in the Claude OAuth fetch interceptor: inject thinking: { type: 'adaptive' } (v1 SDK can't emit it), strip temperature/top_p/top_k (Anthropic rejects them with thinking enabled). Gated on max_tokens > 4096 so effort=none still works.
- Bump MAX_STEPS from 10 to 50 to align with AI SDK's stepCountIs(20) default and reduce mid-task halts.
- Fix pre-existing typecheck errors in agent-manager.ts (entry/nextEntry narrowing), app.ts (agentModels body field), KeyUsage.svelte (m guards), and a TS2742 in provider.ts via explicit ModelFactory return type.
- buildFallbackSequence now always returns at least one entry so processMessage runs the agent loop even without keyId/modelId (fixes 4 broken agent-manager tests).
Diffstat (limited to 'packages/api/src')
| -rw-r--r-- | packages/api/src/agent-manager.ts | 39 | ||||
| -rw-r--r-- | packages/api/src/app.ts | 1 |
2 files changed, 33 insertions, 7 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 1a28371..73b65f5 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -98,6 +98,15 @@ Good approach: 3. Work through each file sequentially `.trim(); +/** + * Returns true for OpenCode Go models served via the Anthropic-format + * `/messages` endpoint (MiniMax M2.x, Qwen3.x Plus). See + * https://opencode.ai/docs/go/#endpoints for the per-model endpoint table. + */ +function isOpencodeGoAnthropicModel(modelId: string): boolean { + return modelId.startsWith("minimax-") || modelId.startsWith("qwen"); +} + function buildSystemPrompt(toolNames: string[], basePrompt?: string): string { const base = basePrompt || DEFAULT_SYSTEM_PROMPT; const toolList = toolNames @@ -553,6 +562,16 @@ export class AgentManager { apiKey = envKey; baseURL = key.base_url; model = effectiveModelId; + // OpenCode Go splits its catalog across two endpoints: + // `/chat/completions` — GLM, Kimi, DeepSeek, MiMo (OpenAI-compatible) + // `/messages` — MiniMax, Qwen (Anthropic-format) + // The configured key has provider="opencode-go" which defaults to + // the OpenAI-compatible path. When the selected model lives on the + // `/messages` route, route through the API-key Anthropic provider + // instead so the SDK targets the correct endpoint and protocol. + if (key.provider === "opencode-go" && isOpencodeGoAnthropicModel(model)) { + provider = "opencode-anthropic"; + } tabAgent.keyId = effectiveKeyId; tabAgent.modelId = effectiveModelId; useOverride = true; @@ -847,8 +866,12 @@ export class AgentManager { for (let fallbackIdx = 0; fallbackIdx < maxFallbackAttempts; fallbackIdx++) { const entry = fallbackSequence[fallbackIdx]; - currentKeyId = entry.key_id; - currentModelId = entry.model_id; + if (!entry) break; // unreachable: loop bound guarantees defined, satisfies TS + // Convert empty strings (used when caller omitted keyId/modelId in + // manual mode) to undefined so `getOrCreateAgentForTab` falls back + // to the tabAgent's stored defaults via the `?? tabAgent.keyId` chain. + currentKeyId = entry.key_id || undefined; + currentModelId = entry.model_id || undefined; allOutput = ""; let assistantText = ""; let assistantThinking = ""; @@ -977,8 +1000,8 @@ export class AgentManager { // Try the next entry in the agent's fallback sequence const nextIdx = fallbackIdx + 1; - if (nextIdx < maxFallbackAttempts) { - const nextEntry = fallbackSequence[nextIdx]; + const nextEntry = fallbackSequence[nextIdx]; + if (nextIdx < maxFallbackAttempts && nextEntry) { const fallbackMsg = `Key "${tabAgent.keyId}" rate limited. ` + `Falling back to "${nextEntry.key_id}" (model: ${nextEntry.model_id})...`; @@ -1021,9 +1044,11 @@ export class AgentManager { const startIdx = models.findIndex((m) => m.key_id === keyId && m.model_id === modelId); return startIdx >= 0 ? models.slice(startIdx) : models; } - // Manual mode: no fallback — just the selected key/model pair - if (keyId && modelId) return [{ key_id: keyId, model_id: modelId }]; - return []; + // Manual mode: no fallback — just the selected key/model pair. + // Always return at least one entry so `processMessage` runs the agent + // once (empty strings let `getOrCreateAgentForTab` fall back to the + // tabAgent's stored defaults or environment-driven config). + return [{ key_id: keyId ?? "", model_id: modelId ?? "" }]; } queueMessage(tabId: string, message: string, clientId?: string): { messageId: string } { diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index d7dd0be..ba5dabd 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -41,6 +41,7 @@ app.post("/chat", async (c) => { message?: unknown; keyId?: unknown; modelId?: unknown; + agentModels?: unknown; reasoningEffort?: unknown; workingDirectory?: unknown; queueId?: unknown; |
