fix(cache-warming): accurate cache rate + expectedCacheRate (retention) metric

The Claude cache % read 100% whenever anything was cached, because the metric's denominator (inputTokens) excluded cached tokens on Anthropic. Fixed upstream in ../claude/provider-anthropic (inputTokens = total prompt); this commit adds the companion retention metric and exposes it: - transport-contract: WarmResponse += expectedCacheRate - transport-http: POST /chat/warm returns expectedCacheRate = cacheRead/(cacheRead+cacheWrite) - cache-warming: computeExpectedCacheRate + a per-conversation 'cache retention' surface stat - handoff: documents the fix + cache-rate vs expected-cache (cross-turn) for the FE Live-verified vs claude haiku: real turn cache rate 61% (was inflated 100%); warm within TTL expectedCacheRate=100%, after expiry=0%.
author: Adam Malczewski <[email protected]> 2026-06-11 14:11:13 +0900
committer: Adam Malczewski <[email protected]> 2026-06-11 14:11:13 +0900
commit: 7ffb6b28f5b6bdbfc53ebed94fc68af557612189 (patch)
tree: e66d9ea9d326ef771cc473d81ca5716ff78b08a8 /packages/cache-warming/src
parent: 763e5fb1c7fbfb4c7bbd43ffb935e42e5f5b5a42 (diff)
download: dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.tar.gz
dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.zip
6 files changed, 121 insertions, 9 deletions
diff --git a/packages/cache-warming/src/extension.ts b/packages/cache-warming/src/extension.ts
index 26d429b..802618a 100644
--- a/packages/cache-warming/src/extension.ts
+++ b/packages/cache-warming/src/extension.ts
@@ -77,7 +77,12 @@ export function activate(host: HostAPI): void {
 			return buildDefaultSpec();
 		}
 		const state = warmer.getState(convId);
-		return buildConversationSpec(state.enabled, state.intervalMs, state.lastPct);
+		return buildConversationSpec(
+			state.enabled,
+			state.intervalMs,
+			state.lastPct,
+			state.lastExpectedPct,
+		);
 	}
 
 	async function invoke(
diff --git a/packages/cache-warming/src/index.ts b/packages/cache-warming/src/index.ts
index d77f4ec..88cab3b 100644
--- a/packages/cache-warming/src/index.ts
+++ b/packages/cache-warming/src/index.ts
@@ -5,6 +5,7 @@ export {
 	type ConversationSettings,
 	type ConversationState,
 	computeCachePct,
+	computeExpectedCacheRate,
 	DEFAULT_INTERVAL_MS,
 	isTokenCurrent,
 	MIN_INTERVAL_MS,
diff --git a/packages/cache-warming/src/pure.test.ts b/packages/cache-warming/src/pure.test.ts
index 1c912f2..f5e2f1d 100644
--- a/packages/cache-warming/src/pure.test.ts
+++ b/packages/cache-warming/src/pure.test.ts
@@ -4,6 +4,7 @@ import {
 	buildConversationSpec,
 	buildDefaultSpec,
 	computeCachePct,
+	computeExpectedCacheRate,
 	isTokenCurrent,
 	MIN_INTERVAL_MS,
 	msToSeconds,
@@ -29,6 +30,20 @@ describe("computeCachePct", () => {
 	});
 });
 
+describe("computeExpectedCacheRate", () => {
+	it("cacheRead/(cacheRead+cacheWrite) rounded", () => {
+		expect(computeExpectedCacheRate(800, 200)).toBe(80);
+		expect(computeExpectedCacheRate(500, 500)).toBe(50);
+		expect(computeExpectedCacheRate(1000, 0)).toBe(100);
+		expect(computeExpectedCacheRate(0, 1000)).toBe(0);
+		expect(computeExpectedCacheRate(333, 667)).toBe(33);
+	});
+
+	it("0 when cacheRead+cacheWrite is 0", () => {
+		expect(computeExpectedCacheRate(0, 0)).toBe(0);
+	});
+});
+
 describe("shouldWarm", () => {
 	it("returns true when enabled, idle, and token matches", () => {
 		const state: ConversationState = {
@@ -36,6 +51,7 @@ describe("shouldWarm", () => {
 			intervalMs: 240_000,
 			active: false,
 			lastPct: null,
+			lastExpectedPct: null,
 			token: 5,
 		};
 		expect(shouldWarm(state, 5)).toBe(true);
@@ -47,6 +63,7 @@ describe("shouldWarm", () => {
 			intervalMs: 240_000,
 			active: false,
 			lastPct: null,
+			lastExpectedPct: null,
 			token: 5,
 		};
 		expect(shouldWarm(state, 5)).toBe(false);
@@ -58,6 +75,7 @@ describe("shouldWarm", () => {
 			intervalMs: 240_000,
 			active: true,
 			lastPct: null,
+			lastExpectedPct: null,
 			token: 5,
 		};
 		expect(shouldWarm(state, 5)).toBe(false);
@@ -69,6 +87,7 @@ describe("shouldWarm", () => {
 			intervalMs: 240_000,
 			active: false,
 			lastPct: null,
+			lastExpectedPct: null,
 			token: 5,
 		};
 		expect(shouldWarm(state, 6)).toBe(false);
@@ -162,12 +181,12 @@ describe("parseIntervalPayload", () => {
 });
 
 describe("buildConversationSpec", () => {
-	it("builds a per-conversation spec with toggle + number(interval) + last-% fields", () => {
-		const spec = buildConversationSpec(true, 240_000, 80);
+	it("builds a per-conversation spec with toggle + number(interval) + last-% + retention fields", () => {
+		const spec = buildConversationSpec(true, 240_000, 80, 95);
 		expect(spec.id).toBe("cache-warming");
 		expect(spec.region).toBe("side");
 		expect(spec.title).toBe("Cache Warming");
-		expect(spec.fields).toHaveLength(3);
+		expect(spec.fields).toHaveLength(4);
 
 		const toggle = spec.fields[0];
 		expect(toggle).toEqual({
@@ -194,20 +213,33 @@ describe("buildConversationSpec", () => {
 			label: "Last Cache %",
 			value: "80%",
 		});
+
+		const retention = spec.fields[3];
+		expect(retention).toEqual({
+			kind: "stat",
+			label: "Cache retention",
+			value: "95%",
+		});
 	});
 
-	it("shows — when lastPct is null", () => {
-		const spec = buildConversationSpec(true, 240_000, null);
+	it("shows — when lastPct and lastExpectedPct are null", () => {
+		const spec = buildConversationSpec(true, 240_000, null, null);
 		const stat = spec.fields[2];
 		expect(stat).toEqual({
 			kind: "stat",
 			label: "Last Cache %",
 			value: "—",
 		});
+		const retention = spec.fields[3];
+		expect(retention).toEqual({
+			kind: "stat",
+			label: "Cache retention",
+			value: "—",
+		});
 	});
 
 	it("reflects disabled state", () => {
-		const spec = buildConversationSpec(false, 120_000, 50);
+		const spec = buildConversationSpec(false, 120_000, 50, 75);
 		const toggle = spec.fields[0];
 		expect(toggle).toEqual({
 			kind: "toggle",
diff --git a/packages/cache-warming/src/pure.ts b/packages/cache-warming/src/pure.ts
index 7b91b11..ab6fc79 100644
--- a/packages/cache-warming/src/pure.ts
+++ b/packages/cache-warming/src/pure.ts
@@ -17,6 +17,7 @@ export interface ConversationSettings {
 export interface ConversationState extends ConversationSettings {
 	readonly active: boolean;
 	readonly lastPct: number | null;
+	readonly lastExpectedPct: number | null;
 	readonly token: number;
 }
 
@@ -43,6 +44,21 @@ export function computeCachePct(inputTokens: number, cacheReadTokens: number): n
 }
 
 /**
+ * Compute expected cache retention rate from token counts.
+ * Of the cacheable prefix the warm touched, how much was still warm (read back)
+ * vs. had to be (re)written.
+ * Returns an integer in [0, 100]. cacheRead + cacheWrite ≤ 0 → 0.
+ */
+export function computeExpectedCacheRate(
+	cacheReadTokens: number,
+	cacheWriteTokens: number,
+): number {
+	const total = cacheReadTokens + cacheWriteTokens;
+	if (total <= 0) return 0;
+	return Math.round((cacheReadTokens / total) * 100);
+}
+
+/**
  * Decide whether a conversation should be warmed right now.
  * Requires: enabled, idle (not active), and the token is current (not superseded).
  */
@@ -120,8 +136,10 @@ export function buildConversationSpec(
 	enabled: boolean,
 	intervalMs: number,
 	lastPct: number | null,
+	lastExpectedPct: number | null,
 ): SurfaceSpec {
 	const pctDisplay = lastPct === null ? "—" : `${lastPct}%`;
+	const retentionDisplay = lastExpectedPct === null ? "—" : `${lastExpectedPct}%`;
 	const toggle: ToggleField = {
 		kind: "toggle",
 		label: "Enabled",
@@ -142,11 +160,16 @@ export function buildConversationSpec(
 		label: "Last Cache %",
 		value: pctDisplay,
 	};
+	const retentionStat: StatField = {
+		kind: "stat",
+		label: "Cache retention",
+		value: retentionDisplay,
+	};
 	return {
 		id: "cache-warming",
 		region: "side",
 		title: "Cache Warming",
-		fields: [toggle, interval, stat],
+		fields: [toggle, interval, stat, retentionStat],
 	};
 }
 
diff --git a/packages/cache-warming/src/warmer.test.ts b/packages/cache-warming/src/warmer.test.ts
index 9865877..86908a2 100644
--- a/packages/cache-warming/src/warmer.test.ts
+++ b/packages/cache-warming/src/warmer.test.ts
@@ -182,6 +182,30 @@ describe("CacheWarmer", () => {
 		expect(state.lastPct).toBe(80);
 	});
 
+	it("a completed warm stores both lastPct (rate) and lastExpectedPct (retention)", async () => {
+		const timers = fakeTimers();
+		const warmer = createCacheWarmer({
+			warm: async () => ({
+				inputTokens: 1000,
+				outputTokens: 10,
+				cacheReadTokens: 700,
+				cacheWriteTokens: 300,
+			}),
+			storage: memStorage(),
+			logger: makeLogger(),
+			timers,
+			onSurfaceChange: () => {},
+		});
+
+		warmer.onTurnSettled("conv-1", {});
+		timers.flush();
+
+		await new Promise((r) => setTimeout(r, 10));
+		const state = warmer.getState("conv-1");
+		expect(state.lastPct).toBe(70);
+		expect(state.lastExpectedPct).toBe(70);
+	});
+
 	it("re-arms timer after warm completes", async () => {
 		const timers = fakeTimers();
 		let warmCount = 0;
@@ -316,4 +340,27 @@ describe("CacheWarmer", () => {
 		await warmer.setIntervalMs("conv-1", 30_000);
 		expect(changeCount).toBe(2);
 	});
+
+	it("the per-conversation spec includes a cache-retention stat", async () => {
+		const timers = fakeTimers();
+		const warmer = createCacheWarmer({
+			warm: async () => ({
+				inputTokens: 1000,
+				outputTokens: 10,
+				cacheReadTokens: 900,
+				cacheWriteTokens: 100,
+			}),
+			storage: memStorage(),
+			logger: makeLogger(),
+			timers,
+			onSurfaceChange: () => {},
+		});
+
+		warmer.onTurnSettled("conv-1", {});
+		timers.flush();
+		await new Promise((r) => setTimeout(r, 10));
+
+		const state = warmer.getState("conv-1");
+		expect(state.lastExpectedPct).toBe(90);
+	});
 });
diff --git a/packages/cache-warming/src/warmer.ts b/packages/cache-warming/src/warmer.ts
index 31dd41e..f50f346 100644
--- a/packages/cache-warming/src/warmer.ts
+++ b/packages/cache-warming/src/warmer.ts
@@ -5,6 +5,7 @@ import {
 	type ConversationSettings,
 	type ConversationState,
 	computeCachePct,
+	computeExpectedCacheRate,
 	DEFAULT_INTERVAL_MS,
 	isTokenCurrent,
 	MIN_INTERVAL_MS,
@@ -63,6 +64,7 @@ const DEFAULT_STATE: ConversationState = {
 	intervalMs: DEFAULT_INTERVAL_MS,
 	active: false,
 	lastPct: null,
+	lastExpectedPct: null,
 	token: 0,
 };
 
@@ -145,11 +147,13 @@ export function createCacheWarmer(deps: CacheWarmerDeps): CacheWarmer {
 			});
 		} else {
 			const pct = computeCachePct(result.inputTokens, result.cacheReadTokens);
-			setState(conversationId, { ...currentState, lastPct: pct });
+			const expectedPct = computeExpectedCacheRate(result.cacheReadTokens, result.cacheWriteTokens);
+			setState(conversationId, { ...currentState, lastPct: pct, lastExpectedPct: expectedPct });
 			deps.onSurfaceChange();
 			deps.logger.debug("cache-warming: warm complete", {
 				conversationId,
 				pct,
+				expectedPct,
 			});
 		}
author	Adam Malczewski <[email protected]>	2026-06-11 14:11:13 +0900
committer	Adam Malczewski <[email protected]>	2026-06-11 14:11:13 +0900
commit	7ffb6b28f5b6bdbfc53ebed94fc68af557612189 (patch)
tree	e66d9ea9d326ef771cc473d81ca5716ff78b08a8 /packages/cache-warming/src
parent	763e5fb1c7fbfb4c7bbd43ffb935e42e5f5b5a42 (diff)
download	dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.tar.gz dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.zip