summaryrefslogtreecommitdiffhomepage
path: root/packages/api/src
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-05-24 13:24:04 +0900
committerAdam Malczewski <[email protected]>2026-05-24 13:24:04 +0900
commit399e1509b93b9f3c56142f94b8fb2c30c2dedb2f (patch)
treed67f18f5cca91a66e3146cbd2f48920571768e23 /packages/api/src
parent997b00034435440d412f955e05e53f09bae83f9e (diff)
downloaddispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.tar.gz
dispatch-399e1509b93b9f3c56142f94b8fb2c30c2dedb2f.zip
fix: prompt caching, OpenCode Go MiniMax/Qwen support, Opus 4.7 thinking, SDK compat
- Implement Anthropic prompt caching: first system message + last 2 non-system messages get cache_control: ephemeral, mirroring OpenCode's applyCaching strategy. Move system prompt inline into messages array so providerOptions can attach. - Add opencode-anthropic provider variant routing MiniMax/Qwen models through the /messages endpoint with x-api-key auth, distinct from the Claude OAuth flow's Bearer auth and Claude Code mimicry. - Split isAnthropic into isClaudeOAuth (billing header, mcp_ tool prefix, thinking config) and usesAnthropicSDK (cache markers) so non-OAuth Anthropic-format gateways get the right treatment. - Pin @ai-sdk/anthropic to ^1.2.12: v3 returns LanguageModelV3-spec models that ai v4's streamText rejects at runtime ('AI SDK 4 only supports models that implement specification version v1'). Drop unnecessary V1 casts. - Restore Opus 4.7 extended thinking by rewriting the outgoing /messages body in the Claude OAuth fetch interceptor: inject thinking: { type: 'adaptive' } (v1 SDK can't emit it), strip temperature/top_p/top_k (Anthropic rejects them with thinking enabled). Gated on max_tokens > 4096 so effort=none still works. - Bump MAX_STEPS from 10 to 50 to align with AI SDK's stepCountIs(20) default and reduce mid-task halts. - Fix pre-existing typecheck errors in agent-manager.ts (entry/nextEntry narrowing), app.ts (agentModels body field), KeyUsage.svelte (m guards), and a TS2742 in provider.ts via explicit ModelFactory return type. - buildFallbackSequence now always returns at least one entry so processMessage runs the agent loop even without keyId/modelId (fixes 4 broken agent-manager tests).
Diffstat (limited to 'packages/api/src')
-rw-r--r--packages/api/src/agent-manager.ts39
-rw-r--r--packages/api/src/app.ts1
2 files changed, 33 insertions, 7 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 1a28371..73b65f5 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -98,6 +98,15 @@ Good approach:
3. Work through each file sequentially
`.trim();
+/**
+ * Returns true for OpenCode Go models served via the Anthropic-format
+ * `/messages` endpoint (MiniMax M2.x, Qwen3.x Plus). See
+ * https://opencode.ai/docs/go/#endpoints for the per-model endpoint table.
+ */
+function isOpencodeGoAnthropicModel(modelId: string): boolean {
+ return modelId.startsWith("minimax-") || modelId.startsWith("qwen");
+}
+
function buildSystemPrompt(toolNames: string[], basePrompt?: string): string {
const base = basePrompt || DEFAULT_SYSTEM_PROMPT;
const toolList = toolNames
@@ -553,6 +562,16 @@ export class AgentManager {
apiKey = envKey;
baseURL = key.base_url;
model = effectiveModelId;
+ // OpenCode Go splits its catalog across two endpoints:
+ // `/chat/completions` — GLM, Kimi, DeepSeek, MiMo (OpenAI-compatible)
+ // `/messages` — MiniMax, Qwen (Anthropic-format)
+ // The configured key has provider="opencode-go" which defaults to
+ // the OpenAI-compatible path. When the selected model lives on the
+ // `/messages` route, route through the API-key Anthropic provider
+ // instead so the SDK targets the correct endpoint and protocol.
+ if (key.provider === "opencode-go" && isOpencodeGoAnthropicModel(model)) {
+ provider = "opencode-anthropic";
+ }
tabAgent.keyId = effectiveKeyId;
tabAgent.modelId = effectiveModelId;
useOverride = true;
@@ -847,8 +866,12 @@ export class AgentManager {
for (let fallbackIdx = 0; fallbackIdx < maxFallbackAttempts; fallbackIdx++) {
const entry = fallbackSequence[fallbackIdx];
- currentKeyId = entry.key_id;
- currentModelId = entry.model_id;
+ if (!entry) break; // unreachable: loop bound guarantees defined, satisfies TS
+ // Convert empty strings (used when caller omitted keyId/modelId in
+ // manual mode) to undefined so `getOrCreateAgentForTab` falls back
+ // to the tabAgent's stored defaults via the `?? tabAgent.keyId` chain.
+ currentKeyId = entry.key_id || undefined;
+ currentModelId = entry.model_id || undefined;
allOutput = "";
let assistantText = "";
let assistantThinking = "";
@@ -977,8 +1000,8 @@ export class AgentManager {
// Try the next entry in the agent's fallback sequence
const nextIdx = fallbackIdx + 1;
- if (nextIdx < maxFallbackAttempts) {
- const nextEntry = fallbackSequence[nextIdx];
+ const nextEntry = fallbackSequence[nextIdx];
+ if (nextIdx < maxFallbackAttempts && nextEntry) {
const fallbackMsg =
`Key "${tabAgent.keyId}" rate limited. ` +
`Falling back to "${nextEntry.key_id}" (model: ${nextEntry.model_id})...`;
@@ -1021,9 +1044,11 @@ export class AgentManager {
const startIdx = models.findIndex((m) => m.key_id === keyId && m.model_id === modelId);
return startIdx >= 0 ? models.slice(startIdx) : models;
}
- // Manual mode: no fallback — just the selected key/model pair
- if (keyId && modelId) return [{ key_id: keyId, model_id: modelId }];
- return [];
+ // Manual mode: no fallback — just the selected key/model pair.
+ // Always return at least one entry so `processMessage` runs the agent
+ // once (empty strings let `getOrCreateAgentForTab` fall back to the
+ // tabAgent's stored defaults or environment-driven config).
+ return [{ key_id: keyId ?? "", model_id: modelId ?? "" }];
}
queueMessage(tabId: string, message: string, clientId?: string): { messageId: string } {
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index d7dd0be..ba5dabd 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -41,6 +41,7 @@ app.post("/chat", async (c) => {
message?: unknown;
keyId?: unknown;
modelId?: unknown;
+ agentModels?: unknown;
reasoningEffort?: unknown;
workingDirectory?: unknown;
queueId?: unknown;