diff options
Diffstat (limited to 'packages/api/src')
| -rw-r--r-- | packages/api/src/agent-manager.ts | 18 | ||||
| -rw-r--r-- | packages/api/src/app.ts | 7 |
2 files changed, 23 insertions, 2 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 109dd33..0a6f3c6 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -1040,7 +1040,12 @@ export class AgentManager { */ async warmCacheForTab( tabId: string, - opts: { keyId?: string; modelId?: string; agentModels?: AgentModelEntry[] } = {}, + opts: { + keyId?: string; + modelId?: string; + agentModels?: AgentModelEntry[]; + reasoningEffort?: ReasoningEffort; + } = {}, ): Promise<{ ok: true; usage: UsageData } | { ok: false; error: string }> { if (this.getTabStatus(tabId) === "running") { return { ok: false, error: "tab is generating" }; @@ -1060,6 +1065,13 @@ export class AgentManager { primary?.model_id || opts.modelId, ); + // Resolve the SAME reasoning effort the next real turn would use: + // per-model (agent definition) → per-tab selector → Agent default. + // This drives the thinking providerOptions, which is an Anthropic + // message-cache key — warming MUST match it or it warms a different + // cache bucket than the real turn reads (the 0%-on-switch bug). + const effort = primary?.effort ?? opts.reasoningEffort; + // Rebuild the genuine history exactly as `getOrCreateAgentForTab`'s // pre-population does, but keep the FULL history (no trailing-user // trim): warming replays the complete cached prefix as-is. @@ -1071,7 +1083,9 @@ export class AgentManager { history = [...agent.messages]; } - const usage = await agent.warmCache(history); + const usage = await agent.warmCache(history, { + ...(effort ? { reasoningEffort: effort } : {}), + }); return { ok: true, usage }; } catch (err) { return { ok: false, error: err instanceof Error ? err.message : String(err) }; diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index a957da7..72188ff 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -239,6 +239,7 @@ app.post("/chat/warm", async (c) => { keyId?: unknown; modelId?: unknown; agentModels?: unknown; + reasoningEffort?: unknown; }>(); const { tabId } = body; if (typeof tabId !== "string" || tabId.trim() === "") { @@ -247,11 +248,17 @@ app.post("/chat/warm", async (c) => { const keyId = typeof body.keyId === "string" ? body.keyId : undefined; const modelId = typeof body.modelId === "string" ? body.modelId : undefined; const agentModels = sanitizeAgentModels(body.agentModels); + // Same effort the real turn would use — a message-cache key, so warming must + // match it to refresh the SAME bucket the next real message reads. + const reasoningEffort = isReasoningEffort(body.reasoningEffort) + ? body.reasoningEffort + : undefined; const result = await agentManager.warmCacheForTab(tabId, { ...(keyId ? { keyId } : {}), ...(modelId ? { modelId } : {}), ...(agentModels ? { agentModels } : {}), + ...(reasoningEffort ? { reasoningEffort } : {}), }); if (!result.ok) { // "tab is generating" is an expected race (not a server fault) → 409. |
