diff options
Diffstat (limited to 'src/core/metrics')
| -rw-r--r-- | src/core/metrics/format.test.ts | 59 | ||||
| -rw-r--r-- | src/core/metrics/format.ts | 29 | ||||
| -rw-r--r-- | src/core/metrics/index.ts | 2 | ||||
| -rw-r--r-- | src/core/metrics/place.test.ts | 13 | ||||
| -rw-r--r-- | src/core/metrics/place.ts | 13 | ||||
| -rw-r--r-- | src/core/metrics/types.ts | 5 |
6 files changed, 119 insertions, 2 deletions
diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts index 77c5204..3eec93d 100644 --- a/src/core/metrics/format.test.ts +++ b/src/core/metrics/format.test.ts @@ -2,8 +2,10 @@ import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire"; import { describe, expect, it } from "vitest"; import { computeCachePct, + computeExpectedCachePct, computeTps, viewCacheRate, + viewExpectedCache, viewStepMetrics, viewTurnMetrics, } from "./format"; @@ -249,3 +251,60 @@ describe("viewCacheRate", () => { expect(miss.isHit).toBe(false); }); }); + +describe("computeExpectedCachePct", () => { + it("null when there is no prior turn (first turn has no baseline)", () => { + expect(computeExpectedCachePct({ inputTokens: 100, outputTokens: 0 }, null)).toBeNull(); + }); + + it("null when the prior turn cached nothing (denominator 0)", () => { + const prev = { inputTokens: 100, outputTokens: 0 }; + const current = { inputTokens: 200, outputTokens: 0, cacheReadTokens: 50 }; + expect(computeExpectedCachePct(current, prev)).toBeNull(); + }); + + it("100% when the whole prior cached prefix was read back (backend worked example)", () => { + // turn 1: cacheRead 0, cacheWrite 5146 → prefix 5146; turn 2 reads 5146 back. + const prev = { inputTokens: 5149, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 5146 }; + const current = { + inputTokens: 8462, + outputTokens: 0, + cacheReadTokens: 5146, + cacheWriteTokens: 3313, + }; + expect(computeExpectedCachePct(current, prev)).toBe(100); + }); + + it("drops below 100% when the cache busted (read < prior prefix)", () => { + const prev = { + inputTokens: 1000, + outputTokens: 0, + cacheReadTokens: 100, + cacheWriteTokens: 900, + }; + const current = { inputTokens: 1000, outputTokens: 0, cacheReadTokens: 500 }; + // 500 / (100 + 900) = 50% + expect(computeExpectedCachePct(current, prev)).toBe(50); + }); + + it("clamps to 100 if read somehow exceeds the prior prefix", () => { + const prev = { inputTokens: 100, outputTokens: 0, cacheWriteTokens: 100 }; + const current = { inputTokens: 100, outputTokens: 0, cacheReadTokens: 250 }; + expect(computeExpectedCachePct(current, prev)).toBe(100); + }); +}); + +describe("viewExpectedCache", () => { + it("null view when it cannot be derived (no prior turn)", () => { + expect(viewExpectedCache({ inputTokens: 100, outputTokens: 0 }, null)).toBeNull(); + }); + + it("success level + hit flag for full retention", () => { + const prev = { inputTokens: 5149, outputTokens: 0, cacheWriteTokens: 5146 }; + const current = { inputTokens: 8462, outputTokens: 0, cacheReadTokens: 5146 }; + const v = viewExpectedCache(current, prev); + expect(v?.pct).toBe(100); + expect(v?.level).toBe("success"); + expect(v?.isHit).toBe(true); + }); +}); diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts index cc86976..ee8db60 100644 --- a/src/core/metrics/format.ts +++ b/src/core/metrics/format.ts @@ -75,6 +75,35 @@ export function viewCacheRate(u: Usage): CacheRateView { return { pct, level: cacheLevel(pct), isHit: (u.cacheReadTokens ?? 0) > 0 }; } +/** + * Expected cache (retention): of the cache that existed going INTO this turn, how + * much was read back — `clamp01(cacheRead_N / (cacheRead_{N-1} + cacheWrite_{N-1}))`. + * The denominator is the PRIOR turn's cached prefix (what it read + what it wrote). + * Ideally ~100% on every turn after the first; <100% = the cache busted/expired. + * + * Returns `null` when it cannot be derived: no prior turn (`prev === null`) or the + * prior turn cached nothing (denominator <= 0) — distinct from a real 0%. + */ +export function computeExpectedCachePct(current: Usage, prev: Usage | null): number | null { + if (prev === null) return null; + const denom = (prev.cacheReadTokens ?? 0) + (prev.cacheWriteTokens ?? 0); + if (denom <= 0) return null; + const read = current.cacheReadTokens ?? 0; + const rate = read / denom; + const clamped = rate < 0 ? 0 : rate > 1 ? 1 : rate; + return Math.round(clamped * 100); +} + +/** + * Build a view of the cross-turn retention (percentage + colour level + hit flag), + * or `null` when it can't be derived (see `computeExpectedCachePct`). + */ +export function viewExpectedCache(current: Usage, prev: Usage | null): CacheRateView | null { + const pct = computeExpectedCachePct(current, prev); + if (pct === null) return null; + return { pct, level: cacheLevel(pct), isHit: (current.cacheReadTokens ?? 0) > 0 }; +} + /** Build a formatted view of a turn's aggregate metrics. */ export function viewTurnMetrics(turn: TurnMetrics): TurnMetricsView { const total = totalTokens(turn.usage); diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts index 6997ab9..8822159 100644 --- a/src/core/metrics/index.ts +++ b/src/core/metrics/index.ts @@ -1,7 +1,9 @@ export { computeCachePct, + computeExpectedCachePct, computeTps, viewCacheRate, + viewExpectedCache, viewStepMetrics, viewTurnMetrics, } from "./format"; diff --git a/src/core/metrics/place.test.ts b/src/core/metrics/place.test.ts index d94882d..0b9c0ec 100644 --- a/src/core/metrics/place.test.ts +++ b/src/core/metrics/place.test.ts @@ -526,4 +526,17 @@ describe("interleaveTurnMetrics — cumulative usage (cache total)", () => { expect(tm[0]?.cumulativeUsage.inputTokens).toBe(1000); expect(tm[0]?.cumulativeUsage.cacheReadTokens).toBe(500); }); + + it("carries the prior finalized turn's usage as the retention baseline", () => { + const rows = interleaveTurnMetrics( + [userGroup(1, "q1"), assistantGroup(2, "a1"), userGroup(3, "q2"), assistantGroup(4, "a2")], + [cacheEntry("t1", 2669, 10, 384), cacheEntry("t2", 2737, 10, 2560)], + ); + const tm = turnMetricsRows(rows); + // first finalized turn has no earlier baseline + expect(tm[0]?.prevTurnUsage).toBeNull(); + // second turn's baseline is the first turn's usage + expect(tm[1]?.prevTurnUsage?.inputTokens).toBe(2669); + expect(tm[1]?.prevTurnUsage?.cacheReadTokens).toBe(384); + }); }); diff --git a/src/core/metrics/place.ts b/src/core/metrics/place.ts index fc30df0..afeb84b 100644 --- a/src/core/metrics/place.ts +++ b/src/core/metrics/place.ts @@ -79,11 +79,19 @@ export function interleaveTurnMetrics( } // Running cumulative usage across finalized turns (conversation total at each - // entry index), for the per-turn "chat total" cache rate. + // entry index), for the per-turn "chat total" cache rate. Alongside it, the + // previous finalized turn's usage at each index — the baseline for cross-turn + // retention (expected cache). const cumulativeByEntry: Usage[] = []; + const prevUsageByEntry: (Usage | null)[] = []; let runningUsage: Usage = { inputTokens: 0, outputTokens: 0 }; + let lastFinalizedUsage: Usage | null = null; for (const e of entries) { - if (e.total !== null) runningUsage = addUsage(runningUsage, e.total.usage); + prevUsageByEntry.push(lastFinalizedUsage); + if (e.total !== null) { + runningUsage = addUsage(runningUsage, e.total.usage); + lastFinalizedUsage = e.total.usage; + } cumulativeByEntry.push(runningUsage); } @@ -170,6 +178,7 @@ export function interleaveTurnMetrics( kind: "turn-metrics", turn: entry.total, cumulativeUsage: cumulativeByEntry[seg] ?? entry.total.usage, + prevTurnUsage: prevUsageByEntry[seg] ?? null, }); } } diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts index cf2511c..f5557f7 100644 --- a/src/core/metrics/types.ts +++ b/src/core/metrics/types.ts @@ -52,6 +52,11 @@ export type MetricsRow = readonly turn: TurnMetrics; /** Cumulative usage across all finalized turns up to and including this one. */ readonly cumulativeUsage: Usage; + /** + * Usage of the most recent EARLIER finalized turn, or `null` when this is the + * first finalized turn. The baseline for cross-turn retention (expected cache). + */ + readonly prevTurnUsage: Usage | null; }; /** Formatted cache hit-rate view: percentage + colour severity + hit flag. */ |
