summaryrefslogtreecommitdiffhomepage
path: root/src/core/metrics/reducer.test.ts
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-10 10:06:27 +0900
committerAdam Malczewski <[email protected]>2026-06-10 10:06:27 +0900
commitf8bf715abc8a89ec0c6370b40403c509b1ce2870 (patch)
tree915600a766e042a8491ac57423542cde1dda1eb6 /src/core/metrics/reducer.test.ts
parentccfd2f4157c1cbbb3d8aeceee94d9e963a82ab03 (diff)
downloaddispatch-web-f8bf715abc8a89ec0c6370b40403c509b1ce2870.tar.gz
dispatch-web-f8bf715abc8a89ec0c6370b40403c509b1ce2870.zip
feat(metrics): per-turn + per-step token/timing metrics bubbles
Consume [email protected] / [email protected] metrics: usage.stepId, step-complete (ttft/decode/genTotal), done.durationMs/usage, and the durable GET /conversations/:id/metrics endpoint. - core/metrics: pure live-fold + durable-merge reducer; decode-rate TPS; head-aligned, stable placement; progressive per-step rows (each shown as its step ends) with the turn-total row gated on the done event. - features/chat: store folds metric events + hydrates durable TurnMetrics; ChatView renders inline step bubbles + a turn-total bubble. - app: MetricsSync HTTP effect (tolerates 404) injected into chat stores. - scripts/live-probe: drives the metrics path; live-verified 17/17 vs bin/up. - docs: regenerate .dispatch wire/transport mirrors to 0.4.0; glossary terms (turn/step metrics, TTFT, decode time, TPS, metrics bubble); trim handoff.
Diffstat (limited to 'src/core/metrics/reducer.test.ts')
-rw-r--r--src/core/metrics/reducer.test.ts368
1 files changed, 368 insertions, 0 deletions
diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts
new file mode 100644
index 0000000..16c88b3
--- /dev/null
+++ b/src/core/metrics/reducer.test.ts
@@ -0,0 +1,368 @@
+import type { StepId, TurnDoneEvent, TurnStepCompleteEvent, TurnUsageEvent } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import {
+ applyDurableMetrics,
+ foldMetricsEvent,
+ initialMetricsState,
+ selectOrderedTurnMetrics,
+} from "./reducer";
+
+const usageEvent = (
+ turnId: string,
+ inputTokens: number,
+ outputTokens: number,
+ stepId?: string,
+): TurnUsageEvent => {
+ const base = {
+ type: "usage" as const,
+ conversationId: "c1",
+ turnId,
+ usage: { inputTokens, outputTokens },
+ };
+ if (stepId !== undefined) {
+ return { ...base, stepId: stepId as StepId };
+ }
+ return base;
+};
+
+const stepCompleteEvent = (
+ turnId: string,
+ stepId: string,
+ timing: { ttftMs?: number; decodeMs?: number; genTotalMs?: number } = {},
+): TurnStepCompleteEvent => ({
+ type: "step-complete",
+ conversationId: "c1",
+ turnId,
+ stepId: stepId as StepId,
+ ...timing,
+});
+
+const doneEvent = (
+ turnId: string,
+ extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {},
+): TurnDoneEvent => ({
+ type: "done",
+ conversationId: "c1",
+ turnId,
+ reason: "stop",
+ ...extra,
+});
+
+describe("initialMetricsState", () => {
+ it("starts empty", () => {
+ const s = initialMetricsState();
+ expect(s.live.size).toBe(0);
+ expect(s.liveOrder).toEqual([]);
+ expect(s.durable.size).toBe(0);
+ expect(s.durableOrder).toEqual([]);
+ });
+});
+
+describe("foldMetricsEvent", () => {
+ it("folds per-step usage by stepId into a turn", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.steps).toHaveLength(2);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.steps[0]?.usage).toEqual({ inputTokens: 100, outputTokens: 50 });
+ expect(ordered[0]?.steps[1]?.stepId).toBe("s2");
+ expect(ordered[0]?.steps[1]?.usage).toEqual({ inputTokens: 200, outputTokens: 80 });
+ });
+
+ it("folds step-complete timing and merges with same-step usage", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(
+ s,
+ stepCompleteEvent("t1", "s1", { ttftMs: 200, decodeMs: 800, genTotalMs: 1000 }),
+ );
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ const step = ordered[0]?.steps[0];
+ expect(step?.usage).toEqual({ inputTokens: 100, outputTokens: 50 });
+ expect(step?.ttftMs).toBe(200);
+ expect(step?.decodeMs).toBe(800);
+ expect(step?.genTotalMs).toBe(1000);
+ });
+
+ it("step-complete before usage defaults usage to zeros", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 }));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ const step = ordered[0]?.steps[0];
+ expect(step?.usage).toEqual({ inputTokens: 0, outputTokens: 0 });
+ expect(step?.genTotalMs).toBe(500);
+ });
+
+ it("done sets durationMs and aggregate usage", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(
+ s,
+ doneEvent("t1", {
+ durationMs: 5000,
+ usage: { inputTokens: 300, outputTokens: 150 },
+ }),
+ );
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.durationMs).toBe(5000);
+ expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 150 });
+ });
+
+ it("aggregate usage sums steps when done.usage absent", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 });
+ });
+
+ it("aggregate usage includes cache only when a step had cache", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, {
+ type: "usage",
+ conversationId: "c1",
+ turnId: "t1",
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50, cacheReadTokens: 30 },
+ });
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.usage.cacheReadTokens).toBe(30);
+ expect(ordered[0]?.total?.usage.cacheWriteTokens).toBeUndefined();
+ });
+
+ it("tolerates missing clock (no genTotalMs/ttft/decode)", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ const step = ordered[0]?.steps[0];
+ expect(step?.ttftMs).toBeUndefined();
+ expect(step?.decodeMs).toBeUndefined();
+ expect(step?.genTotalMs).toBeUndefined();
+ expect(ordered[0]?.total?.durationMs).toBeUndefined();
+ });
+
+ it("usage without stepId does not create a turn", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(0);
+ });
+
+ it("ignores non-metrics events", () => {
+ const s = initialMetricsState();
+ const next = foldMetricsEvent(s, {
+ type: "status",
+ conversationId: "c1",
+ status: "running",
+ });
+ expect(next).toBe(s);
+ });
+
+ it("preserves first-seen order of steps", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 10, 5, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s2");
+ expect(ordered[0]?.steps[1]?.stepId).toBe("s1");
+ });
+
+ it("preserves first-seen order of turns", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t2", 10, 5, "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1"));
+ s = foldMetricsEvent(s, doneEvent("t2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.turnId).toBe("t2");
+ expect(ordered[1]?.turnId).toBe("t1");
+ });
+});
+
+describe("selectOrderedTurnMetrics", () => {
+ it("durable wins over live by turnId, live-done appended last", () => {
+ let s = initialMetricsState();
+
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, usageEvent("t2", 200, 80, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t2", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t2"));
+
+ s = applyDurableMetrics(s, [
+ {
+ turnId: "t1",
+ usage: { inputTokens: 999, outputTokens: 999 },
+ durationMs: 3000,
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 999, outputTokens: 999 },
+ genTotalMs: 3000,
+ },
+ ],
+ },
+ ]);
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(2);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.total?.usage.inputTokens).toBe(999);
+ expect(ordered[0]?.total?.durationMs).toBe(3000);
+ expect(ordered[1]?.turnId).toBe("t2");
+ expect(ordered[1]?.total?.durationMs).toBeUndefined();
+ });
+
+ it("empty state returns empty", () => {
+ const s = initialMetricsState();
+ expect(selectOrderedTurnMetrics(s)).toEqual([]);
+ });
+
+ it("selectOrderedTurnMetrics: in-flight turn exposes only completed steps and total=null", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 }));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.steps).toHaveLength(1);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.total).toBeNull();
+ });
+
+ it("selectOrderedTurnMetrics: a turn with no complete step and not done is omitted", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(0);
+ });
+
+ it("selectOrderedTurnMetrics: after done, total is present", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 }));
+ s = foldMetricsEvent(s, doneEvent("t1", { durationMs: 2000 }));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.total?.durationMs).toBe(2000);
+ expect(ordered[0]?.steps).toHaveLength(1);
+ });
+
+ it("step-complete marks the step complete", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 }));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.steps).toHaveLength(1);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.steps[0]?.genTotalMs).toBe(500);
+ });
+
+ it("selectOrderedTurnMetrics: durable turn → steps + total present", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ {
+ turnId: "t1",
+ usage: { inputTokens: 300, outputTokens: 150 },
+ durationMs: 5000,
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ genTotalMs: 1000,
+ },
+ {
+ stepId: "s2" as StepId,
+ usage: { inputTokens: 200, outputTokens: 100 },
+ genTotalMs: 2000,
+ },
+ ],
+ },
+ ]);
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.steps).toHaveLength(2);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.steps[1]?.stepId).toBe("s2");
+ expect(ordered[0]?.total?.usage.inputTokens).toBe(300);
+ expect(ordered[0]?.total?.durationMs).toBe(5000);
+ });
+});
+
+describe("applyDurableMetrics", () => {
+ it("stores durable turns in order", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] },
+ { turnId: "t2", usage: { inputTokens: 20, outputTokens: 8 }, steps: [] },
+ ]);
+ expect(s.durableOrder).toEqual(["t1", "t2"]);
+ expect(s.durable.size).toBe(2);
+ });
+
+ it("is idempotent for same turnId", () => {
+ let s = initialMetricsState();
+ const turn = {
+ turnId: "t1",
+ usage: { inputTokens: 10, outputTokens: 5 },
+ steps: [],
+ };
+ s = applyDurableMetrics(s, [turn]);
+ s = applyDurableMetrics(s, [turn]);
+ expect(s.durableOrder).toEqual(["t1"]);
+ expect(s.durable.size).toBe(1);
+ });
+
+ it("overwrites durable turn data for same turnId", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] },
+ ]);
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 99, outputTokens: 99 }, steps: [] },
+ ]);
+ expect(s.durable.get("t1")?.usage.inputTokens).toBe(99);
+ });
+});