summaryrefslogtreecommitdiffhomepage
path: root/packages/api/src
diff options
context:
space:
mode:
Diffstat (limited to 'packages/api/src')
-rw-r--r--packages/api/src/agent-manager.ts18
-rw-r--r--packages/api/src/app.ts7
2 files changed, 23 insertions, 2 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 109dd33..0a6f3c6 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -1040,7 +1040,12 @@ export class AgentManager {
*/
async warmCacheForTab(
tabId: string,
- opts: { keyId?: string; modelId?: string; agentModels?: AgentModelEntry[] } = {},
+ opts: {
+ keyId?: string;
+ modelId?: string;
+ agentModels?: AgentModelEntry[];
+ reasoningEffort?: ReasoningEffort;
+ } = {},
): Promise<{ ok: true; usage: UsageData } | { ok: false; error: string }> {
if (this.getTabStatus(tabId) === "running") {
return { ok: false, error: "tab is generating" };
@@ -1060,6 +1065,13 @@ export class AgentManager {
primary?.model_id || opts.modelId,
);
+ // Resolve the SAME reasoning effort the next real turn would use:
+ // per-model (agent definition) → per-tab selector → Agent default.
+ // This drives the thinking providerOptions, which is an Anthropic
+ // message-cache key — warming MUST match it or it warms a different
+ // cache bucket than the real turn reads (the 0%-on-switch bug).
+ const effort = primary?.effort ?? opts.reasoningEffort;
+
// Rebuild the genuine history exactly as `getOrCreateAgentForTab`'s
// pre-population does, but keep the FULL history (no trailing-user
// trim): warming replays the complete cached prefix as-is.
@@ -1071,7 +1083,9 @@ export class AgentManager {
history = [...agent.messages];
}
- const usage = await agent.warmCache(history);
+ const usage = await agent.warmCache(history, {
+ ...(effort ? { reasoningEffort: effort } : {}),
+ });
return { ok: true, usage };
} catch (err) {
return { ok: false, error: err instanceof Error ? err.message : String(err) };
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index a957da7..72188ff 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -239,6 +239,7 @@ app.post("/chat/warm", async (c) => {
keyId?: unknown;
modelId?: unknown;
agentModels?: unknown;
+ reasoningEffort?: unknown;
}>();
const { tabId } = body;
if (typeof tabId !== "string" || tabId.trim() === "") {
@@ -247,11 +248,17 @@ app.post("/chat/warm", async (c) => {
const keyId = typeof body.keyId === "string" ? body.keyId : undefined;
const modelId = typeof body.modelId === "string" ? body.modelId : undefined;
const agentModels = sanitizeAgentModels(body.agentModels);
+ // Same effort the real turn would use — a message-cache key, so warming must
+ // match it to refresh the SAME bucket the next real message reads.
+ const reasoningEffort = isReasoningEffort(body.reasoningEffort)
+ ? body.reasoningEffort
+ : undefined;
const result = await agentManager.warmCacheForTab(tabId, {
...(keyId ? { keyId } : {}),
...(modelId ? { modelId } : {}),
...(agentModels ? { agentModels } : {}),
+ ...(reasoningEffort ? { reasoningEffort } : {}),
});
if (!result.ok) {
// "tab is generating" is an expected race (not a server fault) → 409.