fix: prompt caching, OpenCode Go MiniMax/Qwen support, Opus 4.7 thinking, SDK compat

- Implement Anthropic prompt caching: first system message + last 2 non-system messages get cache_control: ephemeral, mirroring OpenCode's applyCaching strategy. Move system prompt inline into messages array so providerOptions can attach. - Add opencode-anthropic provider variant routing MiniMax/Qwen models through the /messages endpoint with x-api-key auth, distinct from the Claude OAuth flow's Bearer auth and Claude Code mimicry. - Split isAnthropic into isClaudeOAuth (billing header, mcp_ tool prefix, thinking config) and usesAnthropicSDK (cache markers) so non-OAuth Anthropic-format gateways get the right treatment. - Pin @ai-sdk/anthropic to ^1.2.12: v3 returns LanguageModelV3-spec models that ai v4's streamText rejects at runtime ('AI SDK 4 only supports models that implement specification version v1'). Drop unnecessary V1 casts. - Restore Opus 4.7 extended thinking by rewriting the outgoing /messages body in the Claude OAuth fetch interceptor: inject thinking: { type: 'adaptive' } (v1 SDK can't emit it), strip temperature/top_p/top_k (Anthropic rejects them with thinking enabled). Gated on max_tokens > 4096 so effort=none still works. - Bump MAX_STEPS from 10 to 50 to align with AI SDK's stepCountIs(20) default and reduce mid-task halts. - Fix pre-existing typecheck errors in agent-manager.ts (entry/nextEntry narrowing), app.ts (agentModels body field), KeyUsage.svelte (m guards), and a TS2742 in provider.ts via explicit ModelFactory return type. - buildFallbackSequence now always returns at least one entry so processMessage runs the agent loop even without keyId/modelId (fixes 4 broken agent-manager tests).
author: Adam Malczewski <[email protected]> 2026-05-24 13:24:04 +0900
committer: Adam Malczewski <[email protected]> 2026-05-24 13:24:04 +0900
commit: 399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch)
tree: d67f18f5cca91a66e3146cbd2f48920571768e23 /packages/api/src
parent: 997b00034435440d412f955e05e53f09bae83f9e (diff)
download: dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz
dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip
2 files changed, 33 insertions, 7 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 1a28371..73b65f5 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -98,6 +98,15 @@ Good approach:
 3. Work through each file sequentially
 `.trim();
 
+/**
+ * Returns true for OpenCode Go models served via the Anthropic-format
+ * `/messages` endpoint (MiniMax M2.x, Qwen3.x Plus). See
+ * https://opencode.ai/docs/go/#endpoints for the per-model endpoint table.
+ */
+function isOpencodeGoAnthropicModel(modelId: string): boolean {
+	return modelId.startsWith("minimax-") || modelId.startsWith("qwen");
+}
+
 function buildSystemPrompt(toolNames: string[], basePrompt?: string): string {
 	const base = basePrompt || DEFAULT_SYSTEM_PROMPT;
 	const toolList = toolNames
@@ -553,6 +562,16 @@ export class AgentManager {
 							apiKey = envKey;
 							baseURL = key.base_url;
 							model = effectiveModelId;
+							// OpenCode Go splits its catalog across two endpoints:
+							//   `/chat/completions` — GLM, Kimi, DeepSeek, MiMo (OpenAI-compatible)
+							//   `/messages`        — MiniMax, Qwen (Anthropic-format)
+							// The configured key has provider="opencode-go" which defaults to
+							// the OpenAI-compatible path. When the selected model lives on the
+							// `/messages` route, route through the API-key Anthropic provider
+							// instead so the SDK targets the correct endpoint and protocol.
+							if (key.provider === "opencode-go" && isOpencodeGoAnthropicModel(model)) {
+								provider = "opencode-anthropic";
+							}
 							tabAgent.keyId = effectiveKeyId;
 							tabAgent.modelId = effectiveModelId;
 							useOverride = true;
@@ -847,8 +866,12 @@ export class AgentManager {
 
 		for (let fallbackIdx = 0; fallbackIdx < maxFallbackAttempts; fallbackIdx++) {
 			const entry = fallbackSequence[fallbackIdx];
-			currentKeyId = entry.key_id;
-			currentModelId = entry.model_id;
+			if (!entry) break; // unreachable: loop bound guarantees defined, satisfies TS
+			// Convert empty strings (used when caller omitted keyId/modelId in
+			// manual mode) to undefined so `getOrCreateAgentForTab` falls back
+			// to the tabAgent's stored defaults via the `?? tabAgent.keyId` chain.
+			currentKeyId = entry.key_id || undefined;
+			currentModelId = entry.model_id || undefined;
 			allOutput = "";
 			let assistantText = "";
 			let assistantThinking = "";
@@ -977,8 +1000,8 @@ export class AgentManager {
 
 				// Try the next entry in the agent's fallback sequence
 				const nextIdx = fallbackIdx + 1;
-				if (nextIdx < maxFallbackAttempts) {
-					const nextEntry = fallbackSequence[nextIdx];
+				const nextEntry = fallbackSequence[nextIdx];
+				if (nextIdx < maxFallbackAttempts && nextEntry) {
 					const fallbackMsg =
 						`Key "${tabAgent.keyId}" rate limited. ` +
 						`Falling back to "${nextEntry.key_id}" (model: ${nextEntry.model_id})...`;
@@ -1021,9 +1044,11 @@ export class AgentManager {
 			const startIdx = models.findIndex((m) => m.key_id === keyId && m.model_id === modelId);
 			return startIdx >= 0 ? models.slice(startIdx) : models;
 		}
-		// Manual mode: no fallback — just the selected key/model pair
-		if (keyId && modelId) return [{ key_id: keyId, model_id: modelId }];
-		return [];
+		// Manual mode: no fallback — just the selected key/model pair.
+		// Always return at least one entry so `processMessage` runs the agent
+		// once (empty strings let `getOrCreateAgentForTab` fall back to the
+		// tabAgent's stored defaults or environment-driven config).
+		return [{ key_id: keyId ?? "", model_id: modelId ?? "" }];
 	}
 
 	queueMessage(tabId: string, message: string, clientId?: string): { messageId: string } {
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index d7dd0be..ba5dabd 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -41,6 +41,7 @@ app.post("/chat", async (c) => {
 		message?: unknown;
 		keyId?: unknown;
 		modelId?: unknown;
+		agentModels?: unknown;
 		reasoningEffort?: unknown;
 		workingDirectory?: unknown;
 		queueId?: unknown;
author	Adam Malczewski <[email protected]>	2026-05-24 13:24:04 +0900
committer	Adam Malczewski <[email protected]>	2026-05-24 13:24:04 +0900
commit	399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch)
tree	d67f18f5cca91a66e3146cbd2f48920571768e23 /packages/api/src
parent	997b00034435440d412f955e05e53f09bae83f9e (diff)
download	dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip