diff options
| author | Adam Malczewski <[email protected]> | 2026-06-11 14:11:13 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-11 14:11:13 +0900 |
| commit | 7ffb6b28f5b6bdbfc53ebed94fc68af557612189 (patch) | |
| tree | e66d9ea9d326ef771cc473d81ca5716ff78b08a8 /packages/cache-warming/src | |
| parent | 763e5fb1c7fbfb4c7bbd43ffb935e42e5f5b5a42 (diff) | |
| download | dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.tar.gz dispatch-7ffb6b28f5b6bdbfc53ebed94fc68af557612189.zip | |
fix(cache-warming): accurate cache rate + expectedCacheRate (retention) metric
The Claude cache % read 100% whenever anything was cached, because the metric's
denominator (inputTokens) excluded cached tokens on Anthropic. Fixed upstream in
../claude/provider-anthropic (inputTokens = total prompt); this commit adds the
companion retention metric and exposes it:
- transport-contract: WarmResponse += expectedCacheRate
- transport-http: POST /chat/warm returns expectedCacheRate = cacheRead/(cacheRead+cacheWrite)
- cache-warming: computeExpectedCacheRate + a per-conversation 'cache retention' surface stat
- handoff: documents the fix + cache-rate vs expected-cache (cross-turn) for the FE
Live-verified vs claude haiku: real turn cache rate 61% (was inflated 100%);
warm within TTL expectedCacheRate=100%, after expiry=0%.
Diffstat (limited to 'packages/cache-warming/src')
| -rw-r--r-- | packages/cache-warming/src/extension.ts | 7 | ||||
| -rw-r--r-- | packages/cache-warming/src/index.ts | 1 | ||||
| -rw-r--r-- | packages/cache-warming/src/pure.test.ts | 44 | ||||
| -rw-r--r-- | packages/cache-warming/src/pure.ts | 25 | ||||
| -rw-r--r-- | packages/cache-warming/src/warmer.test.ts | 47 | ||||
| -rw-r--r-- | packages/cache-warming/src/warmer.ts | 6 |
6 files changed, 121 insertions, 9 deletions
diff --git a/packages/cache-warming/src/extension.ts b/packages/cache-warming/src/extension.ts index 26d429b..802618a 100644 --- a/packages/cache-warming/src/extension.ts +++ b/packages/cache-warming/src/extension.ts @@ -77,7 +77,12 @@ export function activate(host: HostAPI): void { return buildDefaultSpec(); } const state = warmer.getState(convId); - return buildConversationSpec(state.enabled, state.intervalMs, state.lastPct); + return buildConversationSpec( + state.enabled, + state.intervalMs, + state.lastPct, + state.lastExpectedPct, + ); } async function invoke( diff --git a/packages/cache-warming/src/index.ts b/packages/cache-warming/src/index.ts index d77f4ec..88cab3b 100644 --- a/packages/cache-warming/src/index.ts +++ b/packages/cache-warming/src/index.ts @@ -5,6 +5,7 @@ export { type ConversationSettings, type ConversationState, computeCachePct, + computeExpectedCacheRate, DEFAULT_INTERVAL_MS, isTokenCurrent, MIN_INTERVAL_MS, diff --git a/packages/cache-warming/src/pure.test.ts b/packages/cache-warming/src/pure.test.ts index 1c912f2..f5e2f1d 100644 --- a/packages/cache-warming/src/pure.test.ts +++ b/packages/cache-warming/src/pure.test.ts @@ -4,6 +4,7 @@ import { buildConversationSpec, buildDefaultSpec, computeCachePct, + computeExpectedCacheRate, isTokenCurrent, MIN_INTERVAL_MS, msToSeconds, @@ -29,6 +30,20 @@ describe("computeCachePct", () => { }); }); +describe("computeExpectedCacheRate", () => { + it("cacheRead/(cacheRead+cacheWrite) rounded", () => { + expect(computeExpectedCacheRate(800, 200)).toBe(80); + expect(computeExpectedCacheRate(500, 500)).toBe(50); + expect(computeExpectedCacheRate(1000, 0)).toBe(100); + expect(computeExpectedCacheRate(0, 1000)).toBe(0); + expect(computeExpectedCacheRate(333, 667)).toBe(33); + }); + + it("0 when cacheRead+cacheWrite is 0", () => { + expect(computeExpectedCacheRate(0, 0)).toBe(0); + }); +}); + describe("shouldWarm", () => { it("returns true when enabled, idle, and token matches", () => { const state: ConversationState = { @@ -36,6 +51,7 @@ describe("shouldWarm", () => { intervalMs: 240_000, active: false, lastPct: null, + lastExpectedPct: null, token: 5, }; expect(shouldWarm(state, 5)).toBe(true); @@ -47,6 +63,7 @@ describe("shouldWarm", () => { intervalMs: 240_000, active: false, lastPct: null, + lastExpectedPct: null, token: 5, }; expect(shouldWarm(state, 5)).toBe(false); @@ -58,6 +75,7 @@ describe("shouldWarm", () => { intervalMs: 240_000, active: true, lastPct: null, + lastExpectedPct: null, token: 5, }; expect(shouldWarm(state, 5)).toBe(false); @@ -69,6 +87,7 @@ describe("shouldWarm", () => { intervalMs: 240_000, active: false, lastPct: null, + lastExpectedPct: null, token: 5, }; expect(shouldWarm(state, 6)).toBe(false); @@ -162,12 +181,12 @@ describe("parseIntervalPayload", () => { }); describe("buildConversationSpec", () => { - it("builds a per-conversation spec with toggle + number(interval) + last-% fields", () => { - const spec = buildConversationSpec(true, 240_000, 80); + it("builds a per-conversation spec with toggle + number(interval) + last-% + retention fields", () => { + const spec = buildConversationSpec(true, 240_000, 80, 95); expect(spec.id).toBe("cache-warming"); expect(spec.region).toBe("side"); expect(spec.title).toBe("Cache Warming"); - expect(spec.fields).toHaveLength(3); + expect(spec.fields).toHaveLength(4); const toggle = spec.fields[0]; expect(toggle).toEqual({ @@ -194,20 +213,33 @@ describe("buildConversationSpec", () => { label: "Last Cache %", value: "80%", }); + + const retention = spec.fields[3]; + expect(retention).toEqual({ + kind: "stat", + label: "Cache retention", + value: "95%", + }); }); - it("shows — when lastPct is null", () => { - const spec = buildConversationSpec(true, 240_000, null); + it("shows — when lastPct and lastExpectedPct are null", () => { + const spec = buildConversationSpec(true, 240_000, null, null); const stat = spec.fields[2]; expect(stat).toEqual({ kind: "stat", label: "Last Cache %", value: "—", }); + const retention = spec.fields[3]; + expect(retention).toEqual({ + kind: "stat", + label: "Cache retention", + value: "—", + }); }); it("reflects disabled state", () => { - const spec = buildConversationSpec(false, 120_000, 50); + const spec = buildConversationSpec(false, 120_000, 50, 75); const toggle = spec.fields[0]; expect(toggle).toEqual({ kind: "toggle", diff --git a/packages/cache-warming/src/pure.ts b/packages/cache-warming/src/pure.ts index 7b91b11..ab6fc79 100644 --- a/packages/cache-warming/src/pure.ts +++ b/packages/cache-warming/src/pure.ts @@ -17,6 +17,7 @@ export interface ConversationSettings { export interface ConversationState extends ConversationSettings { readonly active: boolean; readonly lastPct: number | null; + readonly lastExpectedPct: number | null; readonly token: number; } @@ -43,6 +44,21 @@ export function computeCachePct(inputTokens: number, cacheReadTokens: number): n } /** + * Compute expected cache retention rate from token counts. + * Of the cacheable prefix the warm touched, how much was still warm (read back) + * vs. had to be (re)written. + * Returns an integer in [0, 100]. cacheRead + cacheWrite ≤ 0 → 0. + */ +export function computeExpectedCacheRate( + cacheReadTokens: number, + cacheWriteTokens: number, +): number { + const total = cacheReadTokens + cacheWriteTokens; + if (total <= 0) return 0; + return Math.round((cacheReadTokens / total) * 100); +} + +/** * Decide whether a conversation should be warmed right now. * Requires: enabled, idle (not active), and the token is current (not superseded). */ @@ -120,8 +136,10 @@ export function buildConversationSpec( enabled: boolean, intervalMs: number, lastPct: number | null, + lastExpectedPct: number | null, ): SurfaceSpec { const pctDisplay = lastPct === null ? "—" : `${lastPct}%`; + const retentionDisplay = lastExpectedPct === null ? "—" : `${lastExpectedPct}%`; const toggle: ToggleField = { kind: "toggle", label: "Enabled", @@ -142,11 +160,16 @@ export function buildConversationSpec( label: "Last Cache %", value: pctDisplay, }; + const retentionStat: StatField = { + kind: "stat", + label: "Cache retention", + value: retentionDisplay, + }; return { id: "cache-warming", region: "side", title: "Cache Warming", - fields: [toggle, interval, stat], + fields: [toggle, interval, stat, retentionStat], }; } diff --git a/packages/cache-warming/src/warmer.test.ts b/packages/cache-warming/src/warmer.test.ts index 9865877..86908a2 100644 --- a/packages/cache-warming/src/warmer.test.ts +++ b/packages/cache-warming/src/warmer.test.ts @@ -182,6 +182,30 @@ describe("CacheWarmer", () => { expect(state.lastPct).toBe(80); }); + it("a completed warm stores both lastPct (rate) and lastExpectedPct (retention)", async () => { + const timers = fakeTimers(); + const warmer = createCacheWarmer({ + warm: async () => ({ + inputTokens: 1000, + outputTokens: 10, + cacheReadTokens: 700, + cacheWriteTokens: 300, + }), + storage: memStorage(), + logger: makeLogger(), + timers, + onSurfaceChange: () => {}, + }); + + warmer.onTurnSettled("conv-1", {}); + timers.flush(); + + await new Promise((r) => setTimeout(r, 10)); + const state = warmer.getState("conv-1"); + expect(state.lastPct).toBe(70); + expect(state.lastExpectedPct).toBe(70); + }); + it("re-arms timer after warm completes", async () => { const timers = fakeTimers(); let warmCount = 0; @@ -316,4 +340,27 @@ describe("CacheWarmer", () => { await warmer.setIntervalMs("conv-1", 30_000); expect(changeCount).toBe(2); }); + + it("the per-conversation spec includes a cache-retention stat", async () => { + const timers = fakeTimers(); + const warmer = createCacheWarmer({ + warm: async () => ({ + inputTokens: 1000, + outputTokens: 10, + cacheReadTokens: 900, + cacheWriteTokens: 100, + }), + storage: memStorage(), + logger: makeLogger(), + timers, + onSurfaceChange: () => {}, + }); + + warmer.onTurnSettled("conv-1", {}); + timers.flush(); + await new Promise((r) => setTimeout(r, 10)); + + const state = warmer.getState("conv-1"); + expect(state.lastExpectedPct).toBe(90); + }); }); diff --git a/packages/cache-warming/src/warmer.ts b/packages/cache-warming/src/warmer.ts index 31dd41e..f50f346 100644 --- a/packages/cache-warming/src/warmer.ts +++ b/packages/cache-warming/src/warmer.ts @@ -5,6 +5,7 @@ import { type ConversationSettings, type ConversationState, computeCachePct, + computeExpectedCacheRate, DEFAULT_INTERVAL_MS, isTokenCurrent, MIN_INTERVAL_MS, @@ -63,6 +64,7 @@ const DEFAULT_STATE: ConversationState = { intervalMs: DEFAULT_INTERVAL_MS, active: false, lastPct: null, + lastExpectedPct: null, token: 0, }; @@ -145,11 +147,13 @@ export function createCacheWarmer(deps: CacheWarmerDeps): CacheWarmer { }); } else { const pct = computeCachePct(result.inputTokens, result.cacheReadTokens); - setState(conversationId, { ...currentState, lastPct: pct }); + const expectedPct = computeExpectedCacheRate(result.cacheReadTokens, result.cacheWriteTokens); + setState(conversationId, { ...currentState, lastPct: pct, lastExpectedPct: expectedPct }); deps.onSurfaceChange(); deps.logger.debug("cache-warming: warm complete", { conversationId, pct, + expectedPct, }); } |
