summaryrefslogtreecommitdiffhomepage
path: root/src/core/metrics
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/metrics')
-rw-r--r--src/core/metrics/format.test.ts59
-rw-r--r--src/core/metrics/format.ts29
-rw-r--r--src/core/metrics/index.ts2
-rw-r--r--src/core/metrics/place.test.ts13
-rw-r--r--src/core/metrics/place.ts13
-rw-r--r--src/core/metrics/types.ts5
6 files changed, 119 insertions, 2 deletions
diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts
index 77c5204..3eec93d 100644
--- a/src/core/metrics/format.test.ts
+++ b/src/core/metrics/format.test.ts
@@ -2,8 +2,10 @@ import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire";
import { describe, expect, it } from "vitest";
import {
computeCachePct,
+ computeExpectedCachePct,
computeTps,
viewCacheRate,
+ viewExpectedCache,
viewStepMetrics,
viewTurnMetrics,
} from "./format";
@@ -249,3 +251,60 @@ describe("viewCacheRate", () => {
expect(miss.isHit).toBe(false);
});
});
+
+describe("computeExpectedCachePct", () => {
+ it("null when there is no prior turn (first turn has no baseline)", () => {
+ expect(computeExpectedCachePct({ inputTokens: 100, outputTokens: 0 }, null)).toBeNull();
+ });
+
+ it("null when the prior turn cached nothing (denominator 0)", () => {
+ const prev = { inputTokens: 100, outputTokens: 0 };
+ const current = { inputTokens: 200, outputTokens: 0, cacheReadTokens: 50 };
+ expect(computeExpectedCachePct(current, prev)).toBeNull();
+ });
+
+ it("100% when the whole prior cached prefix was read back (backend worked example)", () => {
+ // turn 1: cacheRead 0, cacheWrite 5146 → prefix 5146; turn 2 reads 5146 back.
+ const prev = { inputTokens: 5149, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 5146 };
+ const current = {
+ inputTokens: 8462,
+ outputTokens: 0,
+ cacheReadTokens: 5146,
+ cacheWriteTokens: 3313,
+ };
+ expect(computeExpectedCachePct(current, prev)).toBe(100);
+ });
+
+ it("drops below 100% when the cache busted (read < prior prefix)", () => {
+ const prev = {
+ inputTokens: 1000,
+ outputTokens: 0,
+ cacheReadTokens: 100,
+ cacheWriteTokens: 900,
+ };
+ const current = { inputTokens: 1000, outputTokens: 0, cacheReadTokens: 500 };
+ // 500 / (100 + 900) = 50%
+ expect(computeExpectedCachePct(current, prev)).toBe(50);
+ });
+
+ it("clamps to 100 if read somehow exceeds the prior prefix", () => {
+ const prev = { inputTokens: 100, outputTokens: 0, cacheWriteTokens: 100 };
+ const current = { inputTokens: 100, outputTokens: 0, cacheReadTokens: 250 };
+ expect(computeExpectedCachePct(current, prev)).toBe(100);
+ });
+});
+
+describe("viewExpectedCache", () => {
+ it("null view when it cannot be derived (no prior turn)", () => {
+ expect(viewExpectedCache({ inputTokens: 100, outputTokens: 0 }, null)).toBeNull();
+ });
+
+ it("success level + hit flag for full retention", () => {
+ const prev = { inputTokens: 5149, outputTokens: 0, cacheWriteTokens: 5146 };
+ const current = { inputTokens: 8462, outputTokens: 0, cacheReadTokens: 5146 };
+ const v = viewExpectedCache(current, prev);
+ expect(v?.pct).toBe(100);
+ expect(v?.level).toBe("success");
+ expect(v?.isHit).toBe(true);
+ });
+});
diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts
index cc86976..ee8db60 100644
--- a/src/core/metrics/format.ts
+++ b/src/core/metrics/format.ts
@@ -75,6 +75,35 @@ export function viewCacheRate(u: Usage): CacheRateView {
return { pct, level: cacheLevel(pct), isHit: (u.cacheReadTokens ?? 0) > 0 };
}
+/**
+ * Expected cache (retention): of the cache that existed going INTO this turn, how
+ * much was read back — `clamp01(cacheRead_N / (cacheRead_{N-1} + cacheWrite_{N-1}))`.
+ * The denominator is the PRIOR turn's cached prefix (what it read + what it wrote).
+ * Ideally ~100% on every turn after the first; <100% = the cache busted/expired.
+ *
+ * Returns `null` when it cannot be derived: no prior turn (`prev === null`) or the
+ * prior turn cached nothing (denominator <= 0) — distinct from a real 0%.
+ */
+export function computeExpectedCachePct(current: Usage, prev: Usage | null): number | null {
+ if (prev === null) return null;
+ const denom = (prev.cacheReadTokens ?? 0) + (prev.cacheWriteTokens ?? 0);
+ if (denom <= 0) return null;
+ const read = current.cacheReadTokens ?? 0;
+ const rate = read / denom;
+ const clamped = rate < 0 ? 0 : rate > 1 ? 1 : rate;
+ return Math.round(clamped * 100);
+}
+
+/**
+ * Build a view of the cross-turn retention (percentage + colour level + hit flag),
+ * or `null` when it can't be derived (see `computeExpectedCachePct`).
+ */
+export function viewExpectedCache(current: Usage, prev: Usage | null): CacheRateView | null {
+ const pct = computeExpectedCachePct(current, prev);
+ if (pct === null) return null;
+ return { pct, level: cacheLevel(pct), isHit: (current.cacheReadTokens ?? 0) > 0 };
+}
+
/** Build a formatted view of a turn's aggregate metrics. */
export function viewTurnMetrics(turn: TurnMetrics): TurnMetricsView {
const total = totalTokens(turn.usage);
diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts
index 6997ab9..8822159 100644
--- a/src/core/metrics/index.ts
+++ b/src/core/metrics/index.ts
@@ -1,7 +1,9 @@
export {
computeCachePct,
+ computeExpectedCachePct,
computeTps,
viewCacheRate,
+ viewExpectedCache,
viewStepMetrics,
viewTurnMetrics,
} from "./format";
diff --git a/src/core/metrics/place.test.ts b/src/core/metrics/place.test.ts
index d94882d..0b9c0ec 100644
--- a/src/core/metrics/place.test.ts
+++ b/src/core/metrics/place.test.ts
@@ -526,4 +526,17 @@ describe("interleaveTurnMetrics — cumulative usage (cache total)", () => {
expect(tm[0]?.cumulativeUsage.inputTokens).toBe(1000);
expect(tm[0]?.cumulativeUsage.cacheReadTokens).toBe(500);
});
+
+ it("carries the prior finalized turn's usage as the retention baseline", () => {
+ const rows = interleaveTurnMetrics(
+ [userGroup(1, "q1"), assistantGroup(2, "a1"), userGroup(3, "q2"), assistantGroup(4, "a2")],
+ [cacheEntry("t1", 2669, 10, 384), cacheEntry("t2", 2737, 10, 2560)],
+ );
+ const tm = turnMetricsRows(rows);
+ // first finalized turn has no earlier baseline
+ expect(tm[0]?.prevTurnUsage).toBeNull();
+ // second turn's baseline is the first turn's usage
+ expect(tm[1]?.prevTurnUsage?.inputTokens).toBe(2669);
+ expect(tm[1]?.prevTurnUsage?.cacheReadTokens).toBe(384);
+ });
});
diff --git a/src/core/metrics/place.ts b/src/core/metrics/place.ts
index fc30df0..afeb84b 100644
--- a/src/core/metrics/place.ts
+++ b/src/core/metrics/place.ts
@@ -79,11 +79,19 @@ export function interleaveTurnMetrics(
}
// Running cumulative usage across finalized turns (conversation total at each
- // entry index), for the per-turn "chat total" cache rate.
+ // entry index), for the per-turn "chat total" cache rate. Alongside it, the
+ // previous finalized turn's usage at each index — the baseline for cross-turn
+ // retention (expected cache).
const cumulativeByEntry: Usage[] = [];
+ const prevUsageByEntry: (Usage | null)[] = [];
let runningUsage: Usage = { inputTokens: 0, outputTokens: 0 };
+ let lastFinalizedUsage: Usage | null = null;
for (const e of entries) {
- if (e.total !== null) runningUsage = addUsage(runningUsage, e.total.usage);
+ prevUsageByEntry.push(lastFinalizedUsage);
+ if (e.total !== null) {
+ runningUsage = addUsage(runningUsage, e.total.usage);
+ lastFinalizedUsage = e.total.usage;
+ }
cumulativeByEntry.push(runningUsage);
}
@@ -170,6 +178,7 @@ export function interleaveTurnMetrics(
kind: "turn-metrics",
turn: entry.total,
cumulativeUsage: cumulativeByEntry[seg] ?? entry.total.usage,
+ prevTurnUsage: prevUsageByEntry[seg] ?? null,
});
}
}
diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts
index cf2511c..f5557f7 100644
--- a/src/core/metrics/types.ts
+++ b/src/core/metrics/types.ts
@@ -52,6 +52,11 @@ export type MetricsRow =
readonly turn: TurnMetrics;
/** Cumulative usage across all finalized turns up to and including this one. */
readonly cumulativeUsage: Usage;
+ /**
+ * Usage of the most recent EARLIER finalized turn, or `null` when this is the
+ * first finalized turn. The baseline for cross-turn retention (expected cache).
+ */
+ readonly prevTurnUsage: Usage | null;
};
/** Formatted cache hit-rate view: percentage + colour severity + hit flag. */