diff options
| author | Adam Malczewski <[email protected]> | 2026-06-10 10:06:27 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-10 10:06:27 +0900 |
| commit | f8bf715abc8a89ec0c6370b40403c509b1ce2870 (patch) | |
| tree | 915600a766e042a8491ac57423542cde1dda1eb6 /src/core/metrics/reducer.test.ts | |
| parent | ccfd2f4157c1cbbb3d8aeceee94d9e963a82ab03 (diff) | |
| download | dispatch-web-f8bf715abc8a89ec0c6370b40403c509b1ce2870.tar.gz dispatch-web-f8bf715abc8a89ec0c6370b40403c509b1ce2870.zip | |
feat(metrics): per-turn + per-step token/timing metrics bubbles
Consume [email protected] / [email protected] metrics: usage.stepId,
step-complete (ttft/decode/genTotal), done.durationMs/usage, and the
durable GET /conversations/:id/metrics endpoint.
- core/metrics: pure live-fold + durable-merge reducer; decode-rate TPS;
head-aligned, stable placement; progressive per-step rows (each shown as
its step ends) with the turn-total row gated on the done event.
- features/chat: store folds metric events + hydrates durable TurnMetrics;
ChatView renders inline step bubbles + a turn-total bubble.
- app: MetricsSync HTTP effect (tolerates 404) injected into chat stores.
- scripts/live-probe: drives the metrics path; live-verified 17/17 vs bin/up.
- docs: regenerate .dispatch wire/transport mirrors to 0.4.0; glossary terms
(turn/step metrics, TTFT, decode time, TPS, metrics bubble); trim handoff.
Diffstat (limited to 'src/core/metrics/reducer.test.ts')
| -rw-r--r-- | src/core/metrics/reducer.test.ts | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts new file mode 100644 index 0000000..16c88b3 --- /dev/null +++ b/src/core/metrics/reducer.test.ts @@ -0,0 +1,368 @@ +import type { StepId, TurnDoneEvent, TurnStepCompleteEvent, TurnUsageEvent } from "@dispatch/wire"; +import { describe, expect, it } from "vitest"; +import { + applyDurableMetrics, + foldMetricsEvent, + initialMetricsState, + selectOrderedTurnMetrics, +} from "./reducer"; + +const usageEvent = ( + turnId: string, + inputTokens: number, + outputTokens: number, + stepId?: string, +): TurnUsageEvent => { + const base = { + type: "usage" as const, + conversationId: "c1", + turnId, + usage: { inputTokens, outputTokens }, + }; + if (stepId !== undefined) { + return { ...base, stepId: stepId as StepId }; + } + return base; +}; + +const stepCompleteEvent = ( + turnId: string, + stepId: string, + timing: { ttftMs?: number; decodeMs?: number; genTotalMs?: number } = {}, +): TurnStepCompleteEvent => ({ + type: "step-complete", + conversationId: "c1", + turnId, + stepId: stepId as StepId, + ...timing, +}); + +const doneEvent = ( + turnId: string, + extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {}, +): TurnDoneEvent => ({ + type: "done", + conversationId: "c1", + turnId, + reason: "stop", + ...extra, +}); + +describe("initialMetricsState", () => { + it("starts empty", () => { + const s = initialMetricsState(); + expect(s.live.size).toBe(0); + expect(s.liveOrder).toEqual([]); + expect(s.durable.size).toBe(0); + expect(s.durableOrder).toEqual([]); + }); +}); + +describe("foldMetricsEvent", () => { + it("folds per-step usage by stepId into a turn", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.steps).toHaveLength(2); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.steps[0]?.usage).toEqual({ inputTokens: 100, outputTokens: 50 }); + expect(ordered[0]?.steps[1]?.stepId).toBe("s2"); + expect(ordered[0]?.steps[1]?.usage).toEqual({ inputTokens: 200, outputTokens: 80 }); + }); + + it("folds step-complete timing and merges with same-step usage", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent( + s, + stepCompleteEvent("t1", "s1", { ttftMs: 200, decodeMs: 800, genTotalMs: 1000 }), + ); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + const step = ordered[0]?.steps[0]; + expect(step?.usage).toEqual({ inputTokens: 100, outputTokens: 50 }); + expect(step?.ttftMs).toBe(200); + expect(step?.decodeMs).toBe(800); + expect(step?.genTotalMs).toBe(1000); + }); + + it("step-complete before usage defaults usage to zeros", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 })); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + const step = ordered[0]?.steps[0]; + expect(step?.usage).toEqual({ inputTokens: 0, outputTokens: 0 }); + expect(step?.genTotalMs).toBe(500); + }); + + it("done sets durationMs and aggregate usage", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent( + s, + doneEvent("t1", { + durationMs: 5000, + usage: { inputTokens: 300, outputTokens: 150 }, + }), + ); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.durationMs).toBe(5000); + expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 150 }); + }); + + it("aggregate usage sums steps when done.usage absent", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 }); + }); + + it("aggregate usage includes cache only when a step had cache", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, { + type: "usage", + conversationId: "c1", + turnId: "t1", + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50, cacheReadTokens: 30 }, + }); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.usage.cacheReadTokens).toBe(30); + expect(ordered[0]?.total?.usage.cacheWriteTokens).toBeUndefined(); + }); + + it("tolerates missing clock (no genTotalMs/ttft/decode)", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + const step = ordered[0]?.steps[0]; + expect(step?.ttftMs).toBeUndefined(); + expect(step?.decodeMs).toBeUndefined(); + expect(step?.genTotalMs).toBeUndefined(); + expect(ordered[0]?.total?.durationMs).toBeUndefined(); + }); + + it("usage without stepId does not create a turn", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50)); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(0); + }); + + it("ignores non-metrics events", () => { + const s = initialMetricsState(); + const next = foldMetricsEvent(s, { + type: "status", + conversationId: "c1", + status: "running", + }); + expect(next).toBe(s); + }); + + it("preserves first-seen order of steps", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 10, 5, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.steps[0]?.stepId).toBe("s2"); + expect(ordered[0]?.steps[1]?.stepId).toBe("s1"); + }); + + it("preserves first-seen order of turns", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t2", 10, 5, "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1")); + s = foldMetricsEvent(s, doneEvent("t2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.turnId).toBe("t2"); + expect(ordered[1]?.turnId).toBe("t1"); + }); +}); + +describe("selectOrderedTurnMetrics", () => { + it("durable wins over live by turnId, live-done appended last", () => { + let s = initialMetricsState(); + + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, usageEvent("t2", 200, 80, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t2", "s1")); + s = foldMetricsEvent(s, doneEvent("t2")); + + s = applyDurableMetrics(s, [ + { + turnId: "t1", + usage: { inputTokens: 999, outputTokens: 999 }, + durationMs: 3000, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 999, outputTokens: 999 }, + genTotalMs: 3000, + }, + ], + }, + ]); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(2); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.total?.usage.inputTokens).toBe(999); + expect(ordered[0]?.total?.durationMs).toBe(3000); + expect(ordered[1]?.turnId).toBe("t2"); + expect(ordered[1]?.total?.durationMs).toBeUndefined(); + }); + + it("empty state returns empty", () => { + const s = initialMetricsState(); + expect(selectOrderedTurnMetrics(s)).toEqual([]); + }); + + it("selectOrderedTurnMetrics: in-flight turn exposes only completed steps and total=null", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 })); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.steps).toHaveLength(1); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.total).toBeNull(); + }); + + it("selectOrderedTurnMetrics: a turn with no complete step and not done is omitted", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(0); + }); + + it("selectOrderedTurnMetrics: after done, total is present", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 })); + s = foldMetricsEvent(s, doneEvent("t1", { durationMs: 2000 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.total?.durationMs).toBe(2000); + expect(ordered[0]?.steps).toHaveLength(1); + }); + + it("step-complete marks the step complete", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.steps).toHaveLength(1); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.steps[0]?.genTotalMs).toBe(500); + }); + + it("selectOrderedTurnMetrics: durable turn → steps + total present", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { + turnId: "t1", + usage: { inputTokens: 300, outputTokens: 150 }, + durationMs: 5000, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + genTotalMs: 1000, + }, + { + stepId: "s2" as StepId, + usage: { inputTokens: 200, outputTokens: 100 }, + genTotalMs: 2000, + }, + ], + }, + ]); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.steps).toHaveLength(2); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.steps[1]?.stepId).toBe("s2"); + expect(ordered[0]?.total?.usage.inputTokens).toBe(300); + expect(ordered[0]?.total?.durationMs).toBe(5000); + }); +}); + +describe("applyDurableMetrics", () => { + it("stores durable turns in order", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] }, + { turnId: "t2", usage: { inputTokens: 20, outputTokens: 8 }, steps: [] }, + ]); + expect(s.durableOrder).toEqual(["t1", "t2"]); + expect(s.durable.size).toBe(2); + }); + + it("is idempotent for same turnId", () => { + let s = initialMetricsState(); + const turn = { + turnId: "t1", + usage: { inputTokens: 10, outputTokens: 5 }, + steps: [], + }; + s = applyDurableMetrics(s, [turn]); + s = applyDurableMetrics(s, [turn]); + expect(s.durableOrder).toEqual(["t1"]); + expect(s.durable.size).toBe(1); + }); + + it("overwrites durable turn data for same turnId", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] }, + ]); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 99, outputTokens: 99 }, steps: [] }, + ]); + expect(s.durable.get("t1")?.usage.inputTokens).toBe(99); + }); +}); |
