summaryrefslogtreecommitdiffhomepage
path: root/packages/cache-warming/src
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-11 14:11:13 +0900
committerAdam Malczewski <[email protected]>2026-06-11 14:11:13 +0900
commit7ffb6b28f5b6bdbfc53ebed94fc68af557612189 (patch)
treee66d9ea9d326ef771cc473d81ca5716ff78b08a8 /packages/cache-warming/src
parent763e5fb1c7fbfb4c7bbd43ffb935e42e5f5b5a42 (diff)
downloaddispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.tar.gz
dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.zip
fix(cache-warming): accurate cache rate + expectedCacheRate (retention) metric
The Claude cache % read 100% whenever anything was cached, because the metric's denominator (inputTokens) excluded cached tokens on Anthropic. Fixed upstream in ../claude/provider-anthropic (inputTokens = total prompt); this commit adds the companion retention metric and exposes it: - transport-contract: WarmResponse += expectedCacheRate - transport-http: POST /chat/warm returns expectedCacheRate = cacheRead/(cacheRead+cacheWrite) - cache-warming: computeExpectedCacheRate + a per-conversation 'cache retention' surface stat - handoff: documents the fix + cache-rate vs expected-cache (cross-turn) for the FE Live-verified vs claude haiku: real turn cache rate 61% (was inflated 100%); warm within TTL expectedCacheRate=100%, after expiry=0%.
Diffstat (limited to 'packages/cache-warming/src')
-rw-r--r--packages/cache-warming/src/extension.ts7
-rw-r--r--packages/cache-warming/src/index.ts1
-rw-r--r--packages/cache-warming/src/pure.test.ts44
-rw-r--r--packages/cache-warming/src/pure.ts25
-rw-r--r--packages/cache-warming/src/warmer.test.ts47
-rw-r--r--packages/cache-warming/src/warmer.ts6
6 files changed, 121 insertions, 9 deletions
diff --git a/packages/cache-warming/src/extension.ts b/packages/cache-warming/src/extension.ts
index 26d429b..802618a 100644
--- a/packages/cache-warming/src/extension.ts
+++ b/packages/cache-warming/src/extension.ts
@@ -77,7 +77,12 @@ export function activate(host: HostAPI): void {
return buildDefaultSpec();
}
const state = warmer.getState(convId);
- return buildConversationSpec(state.enabled, state.intervalMs, state.lastPct);
+ return buildConversationSpec(
+ state.enabled,
+ state.intervalMs,
+ state.lastPct,
+ state.lastExpectedPct,
+ );
}
async function invoke(
diff --git a/packages/cache-warming/src/index.ts b/packages/cache-warming/src/index.ts
index d77f4ec..88cab3b 100644
--- a/packages/cache-warming/src/index.ts
+++ b/packages/cache-warming/src/index.ts
@@ -5,6 +5,7 @@ export {
type ConversationSettings,
type ConversationState,
computeCachePct,
+ computeExpectedCacheRate,
DEFAULT_INTERVAL_MS,
isTokenCurrent,
MIN_INTERVAL_MS,
diff --git a/packages/cache-warming/src/pure.test.ts b/packages/cache-warming/src/pure.test.ts
index 1c912f2..f5e2f1d 100644
--- a/packages/cache-warming/src/pure.test.ts
+++ b/packages/cache-warming/src/pure.test.ts
@@ -4,6 +4,7 @@ import {
buildConversationSpec,
buildDefaultSpec,
computeCachePct,
+ computeExpectedCacheRate,
isTokenCurrent,
MIN_INTERVAL_MS,
msToSeconds,
@@ -29,6 +30,20 @@ describe("computeCachePct", () => {
});
});
+describe("computeExpectedCacheRate", () => {
+ it("cacheRead/(cacheRead+cacheWrite) rounded", () => {
+ expect(computeExpectedCacheRate(800, 200)).toBe(80);
+ expect(computeExpectedCacheRate(500, 500)).toBe(50);
+ expect(computeExpectedCacheRate(1000, 0)).toBe(100);
+ expect(computeExpectedCacheRate(0, 1000)).toBe(0);
+ expect(computeExpectedCacheRate(333, 667)).toBe(33);
+ });
+
+ it("0 when cacheRead+cacheWrite is 0", () => {
+ expect(computeExpectedCacheRate(0, 0)).toBe(0);
+ });
+});
+
describe("shouldWarm", () => {
it("returns true when enabled, idle, and token matches", () => {
const state: ConversationState = {
@@ -36,6 +51,7 @@ describe("shouldWarm", () => {
intervalMs: 240_000,
active: false,
lastPct: null,
+ lastExpectedPct: null,
token: 5,
};
expect(shouldWarm(state, 5)).toBe(true);
@@ -47,6 +63,7 @@ describe("shouldWarm", () => {
intervalMs: 240_000,
active: false,
lastPct: null,
+ lastExpectedPct: null,
token: 5,
};
expect(shouldWarm(state, 5)).toBe(false);
@@ -58,6 +75,7 @@ describe("shouldWarm", () => {
intervalMs: 240_000,
active: true,
lastPct: null,
+ lastExpectedPct: null,
token: 5,
};
expect(shouldWarm(state, 5)).toBe(false);
@@ -69,6 +87,7 @@ describe("shouldWarm", () => {
intervalMs: 240_000,
active: false,
lastPct: null,
+ lastExpectedPct: null,
token: 5,
};
expect(shouldWarm(state, 6)).toBe(false);
@@ -162,12 +181,12 @@ describe("parseIntervalPayload", () => {
});
describe("buildConversationSpec", () => {
- it("builds a per-conversation spec with toggle + number(interval) + last-% fields", () => {
- const spec = buildConversationSpec(true, 240_000, 80);
+ it("builds a per-conversation spec with toggle + number(interval) + last-% + retention fields", () => {
+ const spec = buildConversationSpec(true, 240_000, 80, 95);
expect(spec.id).toBe("cache-warming");
expect(spec.region).toBe("side");
expect(spec.title).toBe("Cache Warming");
- expect(spec.fields).toHaveLength(3);
+ expect(spec.fields).toHaveLength(4);
const toggle = spec.fields[0];
expect(toggle).toEqual({
@@ -194,20 +213,33 @@ describe("buildConversationSpec", () => {
label: "Last Cache %",
value: "80%",
});
+
+ const retention = spec.fields[3];
+ expect(retention).toEqual({
+ kind: "stat",
+ label: "Cache retention",
+ value: "95%",
+ });
});
- it("shows — when lastPct is null", () => {
- const spec = buildConversationSpec(true, 240_000, null);
+ it("shows — when lastPct and lastExpectedPct are null", () => {
+ const spec = buildConversationSpec(true, 240_000, null, null);
const stat = spec.fields[2];
expect(stat).toEqual({
kind: "stat",
label: "Last Cache %",
value: "—",
});
+ const retention = spec.fields[3];
+ expect(retention).toEqual({
+ kind: "stat",
+ label: "Cache retention",
+ value: "—",
+ });
});
it("reflects disabled state", () => {
- const spec = buildConversationSpec(false, 120_000, 50);
+ const spec = buildConversationSpec(false, 120_000, 50, 75);
const toggle = spec.fields[0];
expect(toggle).toEqual({
kind: "toggle",
diff --git a/packages/cache-warming/src/pure.ts b/packages/cache-warming/src/pure.ts
index 7b91b11..ab6fc79 100644
--- a/packages/cache-warming/src/pure.ts
+++ b/packages/cache-warming/src/pure.ts
@@ -17,6 +17,7 @@ export interface ConversationSettings {
export interface ConversationState extends ConversationSettings {
readonly active: boolean;
readonly lastPct: number | null;
+ readonly lastExpectedPct: number | null;
readonly token: number;
}
@@ -43,6 +44,21 @@ export function computeCachePct(inputTokens: number, cacheReadTokens: number): n
}
/**
+ * Compute expected cache retention rate from token counts.
+ * Of the cacheable prefix the warm touched, how much was still warm (read back)
+ * vs. had to be (re)written.
+ * Returns an integer in [0, 100]. cacheRead + cacheWrite ≤ 0 → 0.
+ */
+export function computeExpectedCacheRate(
+ cacheReadTokens: number,
+ cacheWriteTokens: number,
+): number {
+ const total = cacheReadTokens + cacheWriteTokens;
+ if (total <= 0) return 0;
+ return Math.round((cacheReadTokens / total) * 100);
+}
+
+/**
* Decide whether a conversation should be warmed right now.
* Requires: enabled, idle (not active), and the token is current (not superseded).
*/
@@ -120,8 +136,10 @@ export function buildConversationSpec(
enabled: boolean,
intervalMs: number,
lastPct: number | null,
+ lastExpectedPct: number | null,
): SurfaceSpec {
const pctDisplay = lastPct === null ? "—" : `${lastPct}%`;
+ const retentionDisplay = lastExpectedPct === null ? "—" : `${lastExpectedPct}%`;
const toggle: ToggleField = {
kind: "toggle",
label: "Enabled",
@@ -142,11 +160,16 @@ export function buildConversationSpec(
label: "Last Cache %",
value: pctDisplay,
};
+ const retentionStat: StatField = {
+ kind: "stat",
+ label: "Cache retention",
+ value: retentionDisplay,
+ };
return {
id: "cache-warming",
region: "side",
title: "Cache Warming",
- fields: [toggle, interval, stat],
+ fields: [toggle, interval, stat, retentionStat],
};
}
diff --git a/packages/cache-warming/src/warmer.test.ts b/packages/cache-warming/src/warmer.test.ts
index 9865877..86908a2 100644
--- a/packages/cache-warming/src/warmer.test.ts
+++ b/packages/cache-warming/src/warmer.test.ts
@@ -182,6 +182,30 @@ describe("CacheWarmer", () => {
expect(state.lastPct).toBe(80);
});
+ it("a completed warm stores both lastPct (rate) and lastExpectedPct (retention)", async () => {
+ const timers = fakeTimers();
+ const warmer = createCacheWarmer({
+ warm: async () => ({
+ inputTokens: 1000,
+ outputTokens: 10,
+ cacheReadTokens: 700,
+ cacheWriteTokens: 300,
+ }),
+ storage: memStorage(),
+ logger: makeLogger(),
+ timers,
+ onSurfaceChange: () => {},
+ });
+
+ warmer.onTurnSettled("conv-1", {});
+ timers.flush();
+
+ await new Promise((r) => setTimeout(r, 10));
+ const state = warmer.getState("conv-1");
+ expect(state.lastPct).toBe(70);
+ expect(state.lastExpectedPct).toBe(70);
+ });
+
it("re-arms timer after warm completes", async () => {
const timers = fakeTimers();
let warmCount = 0;
@@ -316,4 +340,27 @@ describe("CacheWarmer", () => {
await warmer.setIntervalMs("conv-1", 30_000);
expect(changeCount).toBe(2);
});
+
+ it("the per-conversation spec includes a cache-retention stat", async () => {
+ const timers = fakeTimers();
+ const warmer = createCacheWarmer({
+ warm: async () => ({
+ inputTokens: 1000,
+ outputTokens: 10,
+ cacheReadTokens: 900,
+ cacheWriteTokens: 100,
+ }),
+ storage: memStorage(),
+ logger: makeLogger(),
+ timers,
+ onSurfaceChange: () => {},
+ });
+
+ warmer.onTurnSettled("conv-1", {});
+ timers.flush();
+ await new Promise((r) => setTimeout(r, 10));
+
+ const state = warmer.getState("conv-1");
+ expect(state.lastExpectedPct).toBe(90);
+ });
});
diff --git a/packages/cache-warming/src/warmer.ts b/packages/cache-warming/src/warmer.ts
index 31dd41e..f50f346 100644
--- a/packages/cache-warming/src/warmer.ts
+++ b/packages/cache-warming/src/warmer.ts
@@ -5,6 +5,7 @@ import {
type ConversationSettings,
type ConversationState,
computeCachePct,
+ computeExpectedCacheRate,
DEFAULT_INTERVAL_MS,
isTokenCurrent,
MIN_INTERVAL_MS,
@@ -63,6 +64,7 @@ const DEFAULT_STATE: ConversationState = {
intervalMs: DEFAULT_INTERVAL_MS,
active: false,
lastPct: null,
+ lastExpectedPct: null,
token: 0,
};
@@ -145,11 +147,13 @@ export function createCacheWarmer(deps: CacheWarmerDeps): CacheWarmer {
});
} else {
const pct = computeCachePct(result.inputTokens, result.cacheReadTokens);
- setState(conversationId, { ...currentState, lastPct: pct });
+ const expectedPct = computeExpectedCacheRate(result.cacheReadTokens, result.cacheWriteTokens);
+ setState(conversationId, { ...currentState, lastPct: pct, lastExpectedPct: expectedPct });
deps.onSurfaceChange();
deps.logger.debug("cache-warming: warm complete", {
conversationId,
pct,
+ expectedPct,
});
}