diff options
| author | Adam Malczewski <[email protected]> | 2026-06-12 01:01:32 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-12 01:01:32 +0900 |
| commit | 6bd7b39f6f53dd8f3743347a1cb72c2f74424dd8 (patch) | |
| tree | b41911099883e8386ea8edbd88d42911de401d27 /src/core/metrics | |
| parent | fd565a6555e8bc9f37f21cf9d900523ef3be531b (diff) | |
| download | dispatch-web-6bd7b39f6f53dd8f3743347a1cb72c2f74424dd8.tar.gz dispatch-web-6bd7b39f6f53dd8f3743347a1cb72c2f74424dd8.zip | |
feat(metrics): consume contextSize — current context-usage readout
Backend context-size handoff: re-pin [email protected] / [email protected]
(+ re-mirror .dispatch reference snapshots). Thread the optional contextSize
through core/metrics (done fold + durable + selectCurrentContextSize: latest
turn's defined value, undefined=>unknown never 0, durable-wins-over-live).
Chat store exposes currentContextSize; ContextSizeBadge renders
"N tokens in context" / "context size unknown" above the composer.
GLOSSARY: add context size / context window. 533 tests green.
Diffstat (limited to 'src/core/metrics')
| -rw-r--r-- | src/core/metrics/format.test.ts | 15 | ||||
| -rw-r--r-- | src/core/metrics/format.ts | 11 | ||||
| -rw-r--r-- | src/core/metrics/index.ts | 2 | ||||
| -rw-r--r-- | src/core/metrics/reducer.test.ts | 76 | ||||
| -rw-r--r-- | src/core/metrics/reducer.ts | 26 | ||||
| -rw-r--r-- | src/core/metrics/types.ts | 6 |
6 files changed, 134 insertions, 2 deletions
diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts index 3eec93d..7c143d7 100644 --- a/src/core/metrics/format.test.ts +++ b/src/core/metrics/format.test.ts @@ -4,6 +4,7 @@ import { computeCachePct, computeExpectedCachePct, computeTps, + formatContextSize, viewCacheRate, viewExpectedCache, viewStepMetrics, @@ -308,3 +309,17 @@ describe("viewExpectedCache", () => { expect(v?.isHit).toBe(true); }); }); + +describe("formatContextSize", () => { + it("formats a defined count with thousands separators", () => { + expect(formatContextSize(34102)).toBe("34,102 tokens in context"); + }); + + it("renders a placeholder for undefined (never 0)", () => { + expect(formatContextSize(undefined)).toBe("context size unknown"); + }); + + it("renders an explicit 0 as zero tokens (a real reported value)", () => { + expect(formatContextSize(0)).toBe("0 tokens in context"); + }); +}); diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts index ee8db60..d8dd2cc 100644 --- a/src/core/metrics/format.ts +++ b/src/core/metrics/format.ts @@ -17,6 +17,17 @@ function formatTps(tps: number | null): string | null { return `${Math.round(tps)} tok/s`; } +/** + * Format the current context size for display. A defined count renders as + * `"<n> tokens in context"` (thousands-separated); `undefined` ("unknown" — no + * per-step usage reported yet) renders the placeholder `"context size unknown"`. + * Never renders `0` for the unknown case. + */ +export function formatContextSize(n: number | undefined): string { + if (n === undefined) return "context size unknown"; + return `${formatTokens(n)} tokens in context`; +} + /** Compute tokens-per-second. Returns null when elapsed time is absent or zero. */ export function computeTps(outputTokens: number, elapsedMs: number | undefined): number | null { if (elapsedMs === undefined || elapsedMs <= 0) return null; diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts index 8822159..773d697 100644 --- a/src/core/metrics/index.ts +++ b/src/core/metrics/index.ts @@ -2,6 +2,7 @@ export { computeCachePct, computeExpectedCachePct, computeTps, + formatContextSize, viewCacheRate, viewExpectedCache, viewStepMetrics, @@ -12,6 +13,7 @@ export { applyDurableMetrics, foldMetricsEvent, initialMetricsState, + selectCurrentContextSize, selectOrderedTurnMetrics, } from "./reducer"; export type { diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts index 16c88b3..cd9f673 100644 --- a/src/core/metrics/reducer.test.ts +++ b/src/core/metrics/reducer.test.ts @@ -4,6 +4,7 @@ import { applyDurableMetrics, foldMetricsEvent, initialMetricsState, + selectCurrentContextSize, selectOrderedTurnMetrics, } from "./reducer"; @@ -39,7 +40,11 @@ const stepCompleteEvent = ( const doneEvent = ( turnId: string, - extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {}, + extra: { + durationMs?: number; + usage?: { inputTokens: number; outputTokens: number }; + contextSize?: number; + } = {}, ): TurnDoneEvent => ({ type: "done", conversationId: "c1", @@ -366,3 +371,72 @@ describe("applyDurableMetrics", () => { expect(s.durable.get("t1")?.usage.inputTokens).toBe(99); }); }); + +describe("contextSize / selectCurrentContextSize", () => { + it("live done carries contextSize onto the turn total", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 1234 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.contextSize).toBe(1234); + expect(selectCurrentContextSize(s)).toBe(1234); + }); + + it("contextSize is NOT the aggregate usage sum (multi-step turn)", () => { + let s = initialMetricsState(); + // Two steps: usage sums to 300 in / 130 out = 430, but contextSize is the + // backend-stamped final-step occupancy, independent of the sum. + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 250 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 }); + expect(ordered[0]?.total?.contextSize).toBe(250); + expect(selectCurrentContextSize(s)).toBe(250); + }); + + it("persisted (durable) contextSize is preserved and selected", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [], contextSize: 4096 }, + ]); + expect(s.durable.get("t1")?.contextSize).toBe(4096); + expect(selectCurrentContextSize(s)).toBe(4096); + }); + + it("selectCurrentContextSize returns the LATEST turn's value", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 100 })); + s = foldMetricsEvent(s, doneEvent("t2", { contextSize: 900 })); + expect(selectCurrentContextSize(s)).toBe(900); + }); + + it("selectCurrentContextSize skips a later turn that lacks contextSize", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 700 })); + // t2 finishes but the provider reported no per-step usage → no contextSize. + s = foldMetricsEvent(s, doneEvent("t2")); + expect(selectCurrentContextSize(s)).toBe(700); + }); + + it("selectCurrentContextSize is undefined (not 0) when nothing reported", () => { + let s = initialMetricsState(); + expect(selectCurrentContextSize(s)).toBeUndefined(); + s = foldMetricsEvent(s, doneEvent("t1")); + expect(selectCurrentContextSize(s)).toBeUndefined(); + }); + + it("durable contextSize wins over live for a shared turnId", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 111 })); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 1, outputTokens: 1 }, steps: [], contextSize: 222 }, + ]); + expect(selectCurrentContextSize(s)).toBe(222); + }); +}); diff --git a/src/core/metrics/reducer.ts b/src/core/metrics/reducer.ts index d36dba1..1e66cc8 100644 --- a/src/core/metrics/reducer.ts +++ b/src/core/metrics/reducer.ts @@ -62,6 +62,9 @@ function liveTurnToMetrics(lt: LiveTurn): TurnMetrics { if (lt.durationMs !== undefined) { (base as { durationMs?: number }).durationMs = lt.durationMs; } + if (lt.doneContextSize !== undefined) { + (base as { contextSize?: number }).contextSize = lt.doneContextSize; + } return base; } @@ -74,6 +77,7 @@ function ensureLiveTurn(state: MetricsState, turnId: string): [MetricsState, Liv done: false, durationMs: undefined, doneUsage: undefined, + doneContextSize: undefined, stepMap: new Map(), stepOrder: [], }; @@ -127,7 +131,7 @@ export function initialMetricsState(): MetricsState { * - `usage` with `stepId`: upsert that step's usage. * - `usage` without `stepId`: ignored. * - `step-complete`: upsert that step's timing; default usage to zeros if absent. - * - `done`: set turn's `durationMs` and optional aggregate `usage`. + * - `done`: set turn's `durationMs`, optional aggregate `usage`, and optional `contextSize`. * - All other event types: return state unchanged. */ export function foldMetricsEvent(state: MetricsState, event: AgentEvent): MetricsState { @@ -161,6 +165,7 @@ export function foldMetricsEvent(state: MetricsState, event: AgentEvent): Metric done: true, durationMs: event.durationMs ?? lt.durationMs, doneUsage: event.usage ?? lt.doneUsage, + doneContextSize: event.contextSize ?? lt.doneContextSize, }; const newLive = new Map(s1.live); newLive.set(event.turnId, updated); @@ -237,3 +242,22 @@ export function selectOrderedTurnMetrics(state: MetricsState): readonly TurnMetr return result; } + +/** + * Select the conversation's CURRENT context size — the tokens it occupies right + * now. Per the wire contract a client reads the LATEST turn's `contextSize`; we + * scan the merged ordered turns NEWEST → OLDEST and return the first DEFINED + * `contextSize` (a finalized turn whose provider reported per-step usage). + * + * Returns `undefined` ("unknown") when no finalized turn carries a context size — + * the caller renders a placeholder, NEVER `0`. Durable (sealed) data wins over + * live for a shared `turnId` (it is the persisted, authoritative value). + */ +export function selectCurrentContextSize(state: MetricsState): number | undefined { + const ordered = selectOrderedTurnMetrics(state); + for (let i = ordered.length - 1; i >= 0; i--) { + const total = ordered[i]?.total; + if (total?.contextSize !== undefined) return total.contextSize; + } + return undefined; +} diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts index f5557f7..c22fd9f 100644 --- a/src/core/metrics/types.ts +++ b/src/core/metrics/types.ts @@ -19,6 +19,12 @@ export interface LiveTurn { readonly done: boolean; readonly durationMs: number | undefined; readonly doneUsage: Usage | undefined; + /** + * Context size carried on the turn's `done` event (the turn's FINAL step + * `inputTokens + outputTokens` — current context occupancy). `undefined` when + * the provider reported no per-step usage; never coerced to `0`. + */ + readonly doneContextSize: number | undefined; readonly stepMap: ReadonlyMap<string, BuildingStep>; readonly stepOrder: readonly string[]; } |
