2 files changed, 23 insertions, 2 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 109dd33..0a6f3c6 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -1040,7 +1040,12 @@ export class AgentManager {
 	 */
 	async warmCacheForTab(
 		tabId: string,
-		opts: { keyId?: string; modelId?: string; agentModels?: AgentModelEntry[] } = {},
+		opts: {
+			keyId?: string;
+			modelId?: string;
+			agentModels?: AgentModelEntry[];
+			reasoningEffort?: ReasoningEffort;
+		} = {},
 	): Promise<{ ok: true; usage: UsageData } | { ok: false; error: string }> {
 		if (this.getTabStatus(tabId) === "running") {
 			return { ok: false, error: "tab is generating" };
@@ -1060,6 +1065,13 @@ export class AgentManager {
 				primary?.model_id || opts.modelId,
 			);
 
+			// Resolve the SAME reasoning effort the next real turn would use:
+			// per-model (agent definition) → per-tab selector → Agent default.
+			// This drives the thinking providerOptions, which is an Anthropic
+			// message-cache key — warming MUST match it or it warms a different
+			// cache bucket than the real turn reads (the 0%-on-switch bug).
+			const effort = primary?.effort ?? opts.reasoningEffort;
+
 			// Rebuild the genuine history exactly as `getOrCreateAgentForTab`'s
 			// pre-population does, but keep the FULL history (no trailing-user
 			// trim): warming replays the complete cached prefix as-is.
@@ -1071,7 +1083,9 @@ export class AgentManager {
 				history = [...agent.messages];
 			}
 
-			const usage = await agent.warmCache(history);
+			const usage = await agent.warmCache(history, {
+				...(effort ? { reasoningEffort: effort } : {}),
+			});
 			return { ok: true, usage };
 		} catch (err) {
 			return { ok: false, error: err instanceof Error ? err.message : String(err) };
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index a957da7..72188ff 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -239,6 +239,7 @@ app.post("/chat/warm", async (c) => {
 		keyId?: unknown;
 		modelId?: unknown;
 		agentModels?: unknown;
+		reasoningEffort?: unknown;
 	}>();
 	const { tabId } = body;
 	if (typeof tabId !== "string" || tabId.trim() === "") {
@@ -247,11 +248,17 @@ app.post("/chat/warm", async (c) => {
 	const keyId = typeof body.keyId === "string" ? body.keyId : undefined;
 	const modelId = typeof body.modelId === "string" ? body.modelId : undefined;
 	const agentModels = sanitizeAgentModels(body.agentModels);
+	// Same effort the real turn would use — a message-cache key, so warming must
+	// match it to refresh the SAME bucket the next real message reads.
+	const reasoningEffort = isReasoningEffort(body.reasoningEffort)
+		? body.reasoningEffort
+		: undefined;
 
 	const result = await agentManager.warmCacheForTab(tabId, {
 		...(keyId ? { keyId } : {}),
 		...(modelId ? { modelId } : {}),
 		...(agentModels ? { agentModels } : {}),
+		...(reasoningEffort ? { reasoningEffort } : {}),
 	});
 	if (!result.ok) {
 		// "tab is generating" is an expected race (not a server fault) → 409.