diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/app/App.svelte | 2 | ||||
| -rw-r--r-- | src/app/store.svelte.ts | 14 | ||||
| -rw-r--r-- | src/core/metrics/format.test.ts | 199 | ||||
| -rw-r--r-- | src/core/metrics/format.ts | 69 | ||||
| -rw-r--r-- | src/core/metrics/index.ts | 17 | ||||
| -rw-r--r-- | src/core/metrics/place.test.ts | 469 | ||||
| -rw-r--r-- | src/core/metrics/place.ts | 151 | ||||
| -rw-r--r-- | src/core/metrics/reducer.test.ts | 368 | ||||
| -rw-r--r-- | src/core/metrics/reducer.ts | 239 | ||||
| -rw-r--r-- | src/core/metrics/types.ts | 68 | ||||
| -rw-r--r-- | src/features/chat/index.ts | 3 | ||||
| -rw-r--r-- | src/features/chat/ports.ts | 9 | ||||
| -rw-r--r-- | src/features/chat/store.svelte.ts | 29 | ||||
| -rw-r--r-- | src/features/chat/store.test.ts | 308 | ||||
| -rw-r--r-- | src/features/chat/test-helpers.ts | 40 | ||||
| -rw-r--r-- | src/features/chat/ui.test.ts | 219 | ||||
| -rw-r--r-- | src/features/chat/ui/ChatView.svelte | 54 |
17 files changed, 2243 insertions, 15 deletions
diff --git a/src/app/App.svelte b/src/app/App.svelte index 61b4cb9..857a1e5 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -62,7 +62,7 @@ <div class="flex-1 overflow-y-auto"> {#key store.activeConversationId} - <ChatView chunks={store.activeChat.chunks} /> + <ChatView chunks={store.activeChat.chunks} turnMetrics={store.activeChat.turnMetrics} /> {/key} </div> diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts index 760c390..fe3c55c 100644 --- a/src/app/store.svelte.ts +++ b/src/app/store.svelte.ts @@ -2,6 +2,7 @@ import type { ChatDeltaMessage, ChatErrorMessage, ConversationHistoryResponse, + ConversationMetricsResponse, ModelsResponse, } from "@dispatch/transport-contract"; import type { SurfaceServerMessage, SurfaceSpec } from "@dispatch/ui-contract"; @@ -17,7 +18,7 @@ import { subscribe as protocolSubscribe, unsubscribe as protocolUnsubscribe, } from "../core/protocol"; -import type { ChatStore } from "../features/chat"; +import type { ChatStore, MetricsSync } from "../features/chat"; import { createChatStore } from "../features/chat"; import type { ConversationCache } from "../features/conversation-cache"; import { createConversationCache } from "../features/conversation-cache"; @@ -73,6 +74,15 @@ function createHistorySync( }; } +function createMetricsSync(httpBase: string, fetchImpl: typeof fetch): MetricsSync { + return async (conversationId: string) => { + const url = `${httpBase}/conversations/${encodeURIComponent(conversationId)}/metrics`; + const res = await fetchImpl(url); + if (!res.ok) return { turns: [] }; + return (await res.json()) as ConversationMetricsResponse; + }; +} + export function createAppStore(opts?: CreateAppStoreOptions): AppStore { let protocol = $state<ProtocolState>(protocolInitialState()); let selectedId = $state<string | null>(null); @@ -112,6 +122,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { ); const historySync = createHistorySync(httpBase, fetchImpl); + const metricsSync = createMetricsSync(httpBase, fetchImpl); const chatStores = new Map<string, ChatStore>(); @@ -125,6 +136,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { }, }, historySync, + metricsSync, cache, }); } diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts new file mode 100644 index 0000000..9881e50 --- /dev/null +++ b/src/core/metrics/format.test.ts @@ -0,0 +1,199 @@ +import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire"; +import { describe, expect, it } from "vitest"; +import { computeTps, viewStepMetrics, viewTurnMetrics } from "./format"; + +describe("computeTps", () => { + it("null when elapsed missing", () => { + expect(computeTps(100, undefined)).toBeNull(); + }); + + it("null when elapsed is zero", () => { + expect(computeTps(100, 0)).toBeNull(); + }); + + it("null when elapsed is negative", () => { + expect(computeTps(100, -100)).toBeNull(); + }); + + it("computes tokens per second", () => { + expect(computeTps(1000, 2000)).toBe(500); + }); + + it("computes fractional tps", () => { + expect(computeTps(100, 3000)).toBeCloseTo(33.33, 1); + }); +}); + +describe("viewStepMetrics", () => { + it("formats tokens with thousands separator, tps, and durations", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 1234, outputTokens: 567 }, + ttftMs: 820, + decodeMs: 1200, + genTotalMs: 2020, + }; + const view = viewStepMetrics(step, 0); + expect(view.label).toBe("step 1"); + expect(view.tokensLabel).toBe("1,801 tok"); + expect(view.tps).toBe("473 tok/s"); + expect(view.ttft).toBe("820ms"); + expect(view.decode).toBe("1.2s"); + expect(view.genTotal).toBe("2.0s"); + }); + + it("handles missing timing fields", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + }; + const view = viewStepMetrics(step, 0); + expect(view.tps).toBeNull(); + expect(view.ttft).toBeNull(); + expect(view.decode).toBeNull(); + expect(view.genTotal).toBeNull(); + }); + + it("formats duration < 1s as ms", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 10, outputTokens: 5 }, + ttftMs: 42, + }; + const view = viewStepMetrics(step, 0); + expect(view.ttft).toBe("42ms"); + }); + + it("formats duration >= 1s as seconds", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 10, outputTokens: 5 }, + genTotalMs: 3200, + }; + const view = viewStepMetrics(step, 0); + expect(view.genTotal).toBe("3.2s"); + }); + + it("uses step index for label", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 10, outputTokens: 5 }, + }; + expect(viewStepMetrics(step, 2).label).toBe("step 3"); + }); + + it("tps uses decodeMs (not genTotalMs)", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + decodeMs: 500, + genTotalMs: 800, + }; + const view = viewStepMetrics(step, 0); + // 50 / (500/1000) = 100 tok/s, NOT 50/(800/1000)=62.5 + expect(view.tps).toBe("100 tok/s"); + }); + + it("tps falls back to genTotalMs when decodeMs absent", () => { + const step: StepMetrics = { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + genTotalMs: 800, + }; + const view = viewStepMetrics(step, 0); + // 50 / (800/1000) = 62.5 → rounds to 63 + expect(view.tps).toBe("63 tok/s"); + }); +}); + +describe("viewTurnMetrics", () => { + it("formats total tokens and breakdown", () => { + const turn: TurnMetrics = { + turnId: "t1", + usage: { inputTokens: 1000, outputTokens: 234 }, + durationMs: 5000, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 1000, outputTokens: 234 }, + decodeMs: 3000, + genTotalMs: 4000, + }, + ], + }; + const view = viewTurnMetrics(turn); + expect(view.tokensLabel).toBe("1,234 tok"); + expect(view.breakdown).toBe("1,000 in / 234 out"); + expect(view.tps).toBe("78 tok/s"); + expect(view.duration).toBe("5.0s"); + }); + + it("breakdown includes cache only when present", () => { + const turn: TurnMetrics = { + turnId: "t1", + usage: { inputTokens: 1000, outputTokens: 234, cacheReadTokens: 500 }, + steps: [], + }; + const view = viewTurnMetrics(turn); + expect(view.breakdown).toBe("1,000 in / 234 out / 500 cache"); + }); + + it("breakdown omits cache when not present", () => { + const turn: TurnMetrics = { + turnId: "t1", + usage: { inputTokens: 100, outputTokens: 50 }, + steps: [], + }; + const view = viewTurnMetrics(turn); + expect(view.breakdown).toBe("100 in / 50 out"); + }); + + it("tps is null when no step has decodeMs or genTotalMs", () => { + const turn: TurnMetrics = { + turnId: "t1", + usage: { inputTokens: 100, outputTokens: 50 }, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + }, + ], + }; + const view = viewTurnMetrics(turn); + expect(view.tps).toBeNull(); + }); + + it("duration is null when durationMs absent", () => { + const turn: TurnMetrics = { + turnId: "t1", + usage: { inputTokens: 100, outputTokens: 50 }, + steps: [], + }; + const view = viewTurnMetrics(turn); + expect(view.duration).toBeNull(); + }); + + it("sums decodeMs across steps (fallback genTotalMs per step) for tps", () => { + const turn: TurnMetrics = { + turnId: "t1", + usage: { inputTokens: 300, outputTokens: 150 }, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + decodeMs: 800, + genTotalMs: 1000, + }, + { + stepId: "s2" as StepId, + usage: { inputTokens: 200, outputTokens: 100 }, + genTotalMs: 2000, + }, + ], + }; + const view = viewTurnMetrics(turn); + // step1 uses decodeMs=800, step2 falls back to genTotalMs=2000 → total=2800ms + // 150 / (2800/1000) = 53.57 → rounds to 54 + expect(view.tps).toBe("54 tok/s"); + }); +}); diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts new file mode 100644 index 0000000..3a4078c --- /dev/null +++ b/src/core/metrics/format.ts @@ -0,0 +1,69 @@ +import type { StepMetrics, TurnMetrics, Usage } from "@dispatch/wire"; +import type { StepMetricsView, TurnMetricsView } from "./types"; + +function formatTokens(n: number): string { + return n.toLocaleString("en-US"); +} + +function formatDuration(ms: number | undefined): string | null { + if (ms === undefined || ms <= 0) return null; + if (ms < 1000) return `${Math.round(ms)}ms`; + return `${(ms / 1000).toFixed(1)}s`; +} + +function formatTps(tps: number | null): string | null { + if (tps === null) return null; + if (tps < 10) return `${tps.toFixed(1)} tok/s`; + return `${Math.round(tps)} tok/s`; +} + +/** Compute tokens-per-second. Returns null when elapsed time is absent or zero. */ +export function computeTps(outputTokens: number, elapsedMs: number | undefined): number | null { + if (elapsedMs === undefined || elapsedMs <= 0) return null; + return outputTokens / (elapsedMs / 1000); +} + +function totalTokens(u: Usage): number { + return u.inputTokens + u.outputTokens; +} + +function formatBreakdown(u: Usage): string { + let s = `${formatTokens(u.inputTokens)} in / ${formatTokens(u.outputTokens)} out`; + if (u.cacheReadTokens !== undefined && u.cacheReadTokens > 0) { + s += ` / ${formatTokens(u.cacheReadTokens)} cache`; + } + return s; +} + +/** Build a formatted view of a single step's metrics. */ +export function viewStepMetrics(step: StepMetrics, index: number): StepMetricsView { + const total = totalTokens(step.usage); + const tps = computeTps(step.usage.outputTokens, step.decodeMs ?? step.genTotalMs); + return { + label: `step ${index + 1}`, + tokensLabel: `${formatTokens(total)} tok`, + tps: formatTps(tps), + ttft: formatDuration(step.ttftMs), + decode: formatDuration(step.decodeMs), + genTotal: formatDuration(step.genTotalMs), + }; +} + +/** Build a formatted view of a turn's aggregate metrics. */ +export function viewTurnMetrics(turn: TurnMetrics): TurnMetricsView { + const total = totalTokens(turn.usage); + let totalGenMs: number | undefined; + for (const step of turn.steps) { + const stepMs = step.decodeMs ?? step.genTotalMs; + if (stepMs !== undefined) { + totalGenMs = (totalGenMs ?? 0) + stepMs; + } + } + const tps = computeTps(turn.usage.outputTokens, totalGenMs); + return { + tokensLabel: `${formatTokens(total)} tok`, + breakdown: formatBreakdown(turn.usage), + tps: formatTps(tps), + duration: formatDuration(turn.durationMs), + }; +} diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts new file mode 100644 index 0000000..72d825d --- /dev/null +++ b/src/core/metrics/index.ts @@ -0,0 +1,17 @@ +export { computeTps, viewStepMetrics, viewTurnMetrics } from "./format"; +export { interleaveTurnMetrics } from "./place"; +export { + applyDurableMetrics, + foldMetricsEvent, + initialMetricsState, + selectOrderedTurnMetrics, +} from "./reducer"; +export type { + MetricsRow, + MetricsState, + StepMetrics, + StepMetricsView, + TurnMetrics, + TurnMetricsEntry, + TurnMetricsView, +} from "./types"; diff --git a/src/core/metrics/place.test.ts b/src/core/metrics/place.test.ts new file mode 100644 index 0000000..b6cb877 --- /dev/null +++ b/src/core/metrics/place.test.ts @@ -0,0 +1,469 @@ +import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire"; +import { describe, expect, it } from "vitest"; +import type { RenderGroup } from "../chunks"; +import { interleaveTurnMetrics } from "./place"; +import type { TurnMetricsEntry } from "./types"; + +function userGroup(seq: number, text: string): RenderGroup { + return { + kind: "single", + chunk: { + seq, + role: "user", + chunk: { type: "text", text }, + provisional: false, + }, + }; +} + +function assistantGroup(seq: number, text: string): RenderGroup { + return { + kind: "single", + chunk: { + seq, + role: "assistant", + chunk: { type: "text", text }, + provisional: false, + }, + }; +} + +function toolCallGroup(seq: number, stepId: string, toolCallId: string): RenderGroup { + return { + kind: "single", + chunk: { + seq, + role: "assistant", + chunk: { + type: "tool-call", + toolCallId, + toolName: "test", + input: {}, + stepId: stepId as StepId, + }, + provisional: false, + }, + }; +} + +function toolResultGroup(seq: number, stepId: string, toolCallId: string): RenderGroup { + return { + kind: "single", + chunk: { + seq, + role: "tool", + chunk: { + type: "tool-result", + toolCallId, + toolName: "test", + content: "", + isError: false, + stepId: stepId as StepId, + }, + provisional: false, + }, + }; +} + +function toolBatchGroup(stepId: string, toolCallIds: string[]): RenderGroup { + return { + kind: "tool-batch", + stepId, + entries: toolCallIds.map((id) => ({ + call: { + type: "tool-call" as const, + toolCallId: id, + toolName: "test", + input: {}, + stepId: stepId as StepId, + }, + result: null, + })), + provisional: false, + }; +} + +function makeStep(stepId: string, inputTokens: number, outputTokens: number): StepMetrics { + return { + stepId: stepId as StepId, + usage: { inputTokens, outputTokens }, + }; +} + +function makeTurn( + turnId: string, + inputTokens: number, + outputTokens: number, + steps: StepMetrics[] = [], +): TurnMetrics { + return { + turnId, + usage: { inputTokens, outputTokens }, + steps, + }; +} + +function makeEntry( + turnId: string, + inputTokens: number, + outputTokens: number, + steps: StepMetrics[] = [], +): TurnMetricsEntry { + return { + turnId, + steps, + total: makeTurn(turnId, inputTokens, outputTokens, steps), + }; +} + +function makeProgressiveEntry(turnId: string, steps: StepMetrics[]): TurnMetricsEntry { + return { + turnId, + steps, + total: null, + }; +} + +function expectGroupAt( + rows: readonly { readonly kind: string }[], + index: number, + expected: RenderGroup, +): void { + const row = rows[index]; + expect(row?.kind).toBe("group"); + expect((row as { readonly group: RenderGroup } | undefined)?.group).toBe(expected); +} + +function expectStepMetricsAt( + rows: readonly { readonly kind: string }[], + index: number, + expectedStepId: string, + expectedIndex: number, +): void { + const row = rows[index]; + expect(row?.kind).toBe("step-metrics"); + const sm = row as { readonly step: StepMetrics; readonly index: number } | undefined; + expect(sm?.step.stepId).toBe(expectedStepId); + expect(sm?.index).toBe(expectedIndex); +} + +function expectTurnMetricsAt( + rows: readonly { readonly kind: string }[], + index: number, + expectedTurnId: string, +): void { + const row = rows[index]; + expect(row?.kind).toBe("turn-metrics"); + expect((row as { readonly turn: TurnMetrics } | undefined)?.turn.turnId).toBe(expectedTurnId); +} + +describe("interleaveTurnMetrics", () => { + it("no metrics: rows are all groups, unchanged order", () => { + const g1 = userGroup(1, "q"); + const g2 = assistantGroup(2, "a"); + const rows = interleaveTurnMetrics([g1, g2], []); + expect(rows).toHaveLength(2); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + }); + + it("head-aligned: segment i gets entries[i]", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const g3 = userGroup(3, "q2"); + const g4 = assistantGroup(4, "a2"); + const step1 = makeStep("s1", 100, 50); + const step2 = makeStep("s2", 200, 80); + const rows = interleaveTurnMetrics( + [g1, g2, g3, g4], + [makeEntry("t1", 100, 50, [step1]), makeEntry("t2", 200, 80, [step2])], + ); + + expect(rows).toHaveLength(8); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectTurnMetricsAt(rows, 3, "t1"); + expectGroupAt(rows, 4, g3); + expectGroupAt(rows, 5, g4); + expectStepMetricsAt(rows, 6, "s2", 0); + expectTurnMetricsAt(rows, 7, "t2"); + }); + + it("a trailing segment with no entry (in-flight turn) renders no metrics", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const g3 = userGroup(3, "q2"); + const g4 = assistantGroup(4, "a2"); + const step = makeStep("s1", 100, 50); + const rows = interleaveTurnMetrics([g1, g2, g3, g4], [makeEntry("t1", 100, 50, [step])]); + + expect(rows).toHaveLength(6); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectTurnMetricsAt(rows, 3, "t1"); + expectGroupAt(rows, 4, g3); + expectGroupAt(rows, 5, g4); + }); + + it("single text-only turn: step row + turn-metrics both at tail", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const step = makeStep("s1", 100, 50); + const turn = makeEntry("t1", 100, 50, [step]); + const rows = interleaveTurnMetrics([g1, g2], [turn]); + + expect(rows).toHaveLength(4); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectTurnMetricsAt(rows, 3, "t1"); + }); + + it("tool step anchors inline after its tool-batch group", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolBatchGroup("t#0", ["c1", "c2"]); + const g3 = assistantGroup(3, "a1"); + const step0 = makeStep("t#0", 100, 50); + const step1 = makeStep("t#1", 200, 80); + const turn = makeEntry("t1", 300, 130, [step0, step1]); + const rows = interleaveTurnMetrics([g1, g2, g3], [turn]); + + expect(rows).toHaveLength(6); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "t#0", 0); + expectGroupAt(rows, 3, g3); + expectStepMetricsAt(rows, 4, "t#1", 1); + expectTurnMetricsAt(rows, 5, "t1"); + }); + + it("single tool-call group anchors its step", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolCallGroup(2, "s1", "c1"); + const g3 = assistantGroup(3, "a1"); + const step = makeStep("s1", 100, 50); + const turn = makeEntry("t1", 100, 50, [step]); + const rows = interleaveTurnMetrics([g1, g2, g3], [turn]); + + expect(rows).toHaveLength(5); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectGroupAt(rows, 3, g3); + expectTurnMetricsAt(rows, 4, "t1"); + }); + + it("single tool-result group anchors its step", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolResultGroup(2, "s1", "c1"); + const g3 = assistantGroup(3, "a1"); + const step = makeStep("s1", 100, 50); + const turn = makeEntry("t1", 100, 50, [step]); + const rows = interleaveTurnMetrics([g1, g2, g3], [turn]); + + expect(rows).toHaveLength(5); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectGroupAt(rows, 3, g3); + expectTurnMetricsAt(rows, 4, "t1"); + }); + + it("multi-step: each tool step inline, final step + total at tail", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolBatchGroup("t#0", ["c1"]); + const g3 = assistantGroup(2, "thinking"); + const g4 = toolBatchGroup("t#1", ["c2", "c3"]); + const g5 = assistantGroup(3, "a1"); + const step0 = makeStep("t#0", 100, 50); + const step1 = makeStep("t#1", 200, 80); + const step2 = makeStep("t#2", 50, 20); + const turn = makeEntry("t1", 350, 150, [step0, step1, step2]); + const rows = interleaveTurnMetrics([g1, g2, g3, g4, g5], [turn]); + + expect(rows).toHaveLength(9); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "t#0", 0); + expectGroupAt(rows, 3, g3); + expectGroupAt(rows, 4, g4); + expectStepMetricsAt(rows, 5, "t#1", 1); + expectGroupAt(rows, 6, g5); + expectStepMetricsAt(rows, 7, "t#2", 2); + expectTurnMetricsAt(rows, 8, "t1"); + }); + + it("multiple turns head-aligned with inline steps", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolBatchGroup("s1", ["c1"]); + const g3 = assistantGroup(2, "a1"); + const g4 = userGroup(3, "q2"); + const g5 = assistantGroup(4, "a2"); + const step1 = makeStep("s1", 100, 50); + const step2 = makeStep("s2", 200, 80); + const rows = interleaveTurnMetrics( + [g1, g2, g3, g4, g5], + [makeEntry("t1", 100, 50, [step1]), makeEntry("t2", 200, 80, [step2])], + ); + + expect(rows).toHaveLength(9); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectGroupAt(rows, 3, g3); + expectTurnMetricsAt(rows, 4, "t1"); + expectGroupAt(rows, 5, g4); + expectGroupAt(rows, 6, g5); + expectStepMetricsAt(rows, 7, "s2", 0); + expectTurnMetricsAt(rows, 8, "t2"); + }); + + it("unanchored step (stepId not in groups) falls back to tail before turn-metrics", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const step0 = makeStep("orphan", 100, 50); + const turn = makeEntry("t1", 100, 50, [step0]); + const rows = interleaveTurnMetrics([g1, g2], [turn]); + + expect(rows).toHaveLength(4); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "orphan", 0); + expectTurnMetricsAt(rows, 3, "t1"); + }); + + it("fewer metrics than segments: trailing segments are bare", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const g3 = userGroup(3, "q2"); + const g4 = assistantGroup(4, "a2"); + const g5 = userGroup(5, "q3"); + const g6 = assistantGroup(6, "a3"); + const step = makeStep("s1", 300, 120); + const rows = interleaveTurnMetrics( + [g1, g2, g3, g4, g5, g6], + [makeEntry("t1", 300, 120, [step])], + ); + + expect(rows).toHaveLength(8); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectTurnMetricsAt(rows, 3, "t1"); + expectGroupAt(rows, 4, g3); + expectGroupAt(rows, 5, g4); + expectGroupAt(rows, 6, g5); + expectGroupAt(rows, 7, g6); + }); + + it("in-flight turn (no durationMs) still produces step + turn rows", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const step = makeStep("s1", 100, 50); + const turn: TurnMetricsEntry = { + turnId: "t1", + steps: [step], + total: { + turnId: "t1", + usage: { inputTokens: 100, outputTokens: 50 }, + steps: [step], + }, + }; + const rows = interleaveTurnMetrics([g1, g2], [turn]); + + expect(rows).toHaveLength(4); + expectStepMetricsAt(rows, 2, "s1", 0); + expectTurnMetricsAt(rows, 3, "t1"); + const metricsRow = rows[3] as { readonly turn: TurnMetrics } | undefined; + expect(metricsRow?.turn.durationMs).toBeUndefined(); + }); + + it("leading non-turn groups emit as plain group rows", () => { + const g0 = assistantGroup(1, "system msg"); + const g1 = userGroup(2, "q1"); + const g2 = assistantGroup(3, "a1"); + const step = makeStep("s1", 100, 50); + const rows = interleaveTurnMetrics([g0, g1, g2], [makeEntry("t1", 100, 50, [step])]); + + expect(rows).toHaveLength(5); + expectGroupAt(rows, 0, g0); + expect(rows[1]?.kind).toBe("group"); + expect(rows[2]?.kind).toBe("group"); + expectStepMetricsAt(rows, 3, "s1", 0); + expectTurnMetricsAt(rows, 4, "t1"); + }); + + it("more metrics than segments: only T entries placed (extra ignored)", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const step1 = makeStep("s1", 100, 50); + const step2 = makeStep("s2", 200, 80); + const rows = interleaveTurnMetrics( + [g1, g2], + [makeEntry("t1", 100, 50, [step1]), makeEntry("t2", 200, 80, [step2])], + ); + + expect(rows).toHaveLength(4); + expectStepMetricsAt(rows, 2, "s1", 0); + expectTurnMetricsAt(rows, 3, "t1"); + }); + + it("turn with no steps emits only turn-metrics (no step-metrics)", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const rows = interleaveTurnMetrics([g1, g2], [makeEntry("t1", 100, 50)]); + + expect(rows).toHaveLength(3); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectTurnMetricsAt(rows, 2, "t1"); + }); + + it("progressive: entry with steps but total=null emits step rows and NO turn-metrics row", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolBatchGroup("s1", ["c1"]); + const g3 = assistantGroup(2, "a1"); + const step1 = makeStep("s1", 100, 50); + const entry = makeProgressiveEntry("t1", [step1]); + const rows = interleaveTurnMetrics([g1, g2, g3], [entry]); + + expect(rows).toHaveLength(4); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectGroupAt(rows, 3, g3); + }); + + it("entry with total emits step rows + a turn-metrics row", () => { + const g1 = userGroup(1, "q1"); + const g2 = toolBatchGroup("s1", ["c1"]); + const g3 = assistantGroup(2, "a1"); + const step1 = makeStep("s1", 100, 50); + const entry = makeEntry("t1", 100, 50, [step1]); + const rows = interleaveTurnMetrics([g1, g2, g3], [entry]); + + expect(rows).toHaveLength(5); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectGroupAt(rows, 3, g3); + expectTurnMetricsAt(rows, 4, "t1"); + }); + + it("progressive multi-step: unanchored steps at tail, no turn-metrics", () => { + const g1 = userGroup(1, "q1"); + const g2 = assistantGroup(2, "a1"); + const step0 = makeStep("s1", 100, 50); + const step1 = makeStep("s2", 200, 80); + const entry = makeProgressiveEntry("t1", [step0, step1]); + const rows = interleaveTurnMetrics([g1, g2], [entry]); + + expect(rows).toHaveLength(4); + expectGroupAt(rows, 0, g1); + expectGroupAt(rows, 1, g2); + expectStepMetricsAt(rows, 2, "s1", 0); + expectStepMetricsAt(rows, 3, "s2", 1); + }); +}); diff --git a/src/core/metrics/place.ts b/src/core/metrics/place.ts new file mode 100644 index 0000000..2481a16 --- /dev/null +++ b/src/core/metrics/place.ts @@ -0,0 +1,151 @@ +import type { RenderGroup } from "../chunks"; +import type { MetricsRow, TurnMetricsEntry } from "./types"; + +function groupStepId(g: RenderGroup): string | undefined { + if (g.kind === "tool-batch") return g.stepId; + const c = g.chunk.chunk; + return c.type === "tool-call" || c.type === "tool-result" ? c.stepId : undefined; +} + +/** + * Interleave turn metrics into the rendered transcript. + * + * Splits groups into per-turn segments: a new segment begins at each `single` + * group with `group.chunk.role === "user"`. Head-aligns: segment `i` receives + * `entries[i]` (the first `min(K, T)` segments get the first `min(K, T)` entries). + * + * Within a segment that has an aligned turn entry, each completed step's metrics + * are placed INLINE right after the last group bearing that step's `stepId` (tool-call/ + * tool-result chunks and tool-batch groups carry `stepId`). Steps whose `stepId` does + * not appear in any group ("unanchored") fall back to the segment tail, before the + * turn-metrics row (if present). + * + * A `turn-metrics` row is emitted ONLY when `entry.total !== null` (i.e. the turn + * is finalized via `done` or durable data). A still-generating turn emits its + * completed step rows but NO turn-total row. + * + * Head-alignment is stable: the durable `/metrics` endpoint returns every + * SEALED turn in turn order (a contiguous prefix from turn 0), and we append + * only the just-finished live turn — so `entries[i]` is turn `i`, and existing + * turns never move when a new turn is appended. + */ +export function interleaveTurnMetrics( + groups: readonly RenderGroup[], + entries: readonly TurnMetricsEntry[], +): readonly MetricsRow[] { + if (entries.length === 0) { + return groups.map((g) => ({ kind: "group" as const, group: g })); + } + + const segmentStarts: number[] = []; + for (let i = 0; i < groups.length; i++) { + const g = groups[i]; + if (g !== undefined && g.kind === "single" && g.chunk.role === "user") { + segmentStarts.push(i); + } + } + + const T = segmentStarts.length; + + if (T === 0) { + return groups.map((g) => ({ kind: "group" as const, group: g })); + } + + const K = entries.length; + const matched = Math.min(K, T); + + // Head-alignment: segment i ↔ entries[i] for i in [0, matched). + // A trailing segment with no corresponding entry renders no metrics. + const segmentEntries = new Map<number, TurnMetricsEntry>(); + for (let i = 0; i < matched; i++) { + const entry = entries[i]; + if (entry !== undefined) { + segmentEntries.set(i, entry); + } + } + + const rows: MetricsRow[] = []; + + const firstUserIdx = segmentStarts[0] ?? 0; + for (let i = 0; i < firstUserIdx; i++) { + const g = groups[i]; + if (g !== undefined) { + rows.push({ kind: "group", group: g }); + } + } + + for (let seg = 0; seg < T; seg++) { + const start = segmentStarts[seg] ?? 0; + const end = seg + 1 < T ? (segmentStarts[seg + 1] ?? groups.length) : groups.length; + + const entry = segmentEntries.get(seg); + + if (entry === undefined) { + for (let i = start; i < end; i++) { + const g = groups[i]; + if (g !== undefined) { + rows.push({ kind: "group", group: g }); + } + } + continue; + } + + // Build anchor map: for each stepId, the LAST group index in this segment. + const anchorByStepId = new Map<string, number>(); + for (let i = start; i < end; i++) { + const g = groups[i]; + if (g === undefined) continue; + const sid = groupStepId(g); + if (sid !== undefined) { + anchorByStepId.set(sid, i); + } + } + + // Classify each step as anchored (at a group index) or unanchored. + const anchored: Map<number, { stepIndex: number; step: (typeof entry.steps)[number] }[]> = + new Map(); + const unanchored: { stepIndex: number; step: (typeof entry.steps)[number] }[] = []; + + for (let i = 0; i < entry.steps.length; i++) { + const step = entry.steps[i]; + if (step === undefined) continue; + const anchorGroupIdx = anchorByStepId.get(step.stepId); + if (anchorGroupIdx !== undefined) { + let arr = anchored.get(anchorGroupIdx); + if (arr === undefined) { + arr = []; + anchored.set(anchorGroupIdx, arr); + } + arr.push({ stepIndex: i, step }); + } else { + unanchored.push({ stepIndex: i, step }); + } + } + + // Emit groups; after each anchored group, emit its step-metrics rows. + for (let i = start; i < end; i++) { + const g = groups[i]; + if (g !== undefined) { + rows.push({ kind: "group", group: g }); + } + const stepsHere = anchored.get(i); + if (stepsHere !== undefined) { + stepsHere.sort((a, b) => a.stepIndex - b.stepIndex); + for (const { step, stepIndex } of stepsHere) { + rows.push({ kind: "step-metrics", step, index: stepIndex }); + } + } + } + + // Segment tail: unanchored steps, then turn-metrics (only when total is present). + unanchored.sort((a, b) => a.stepIndex - b.stepIndex); + for (const { step, stepIndex } of unanchored) { + rows.push({ kind: "step-metrics", step, index: stepIndex }); + } + if (entry.total !== null) { + rows.push({ kind: "turn-metrics", turn: entry.total }); + } + } + + return rows; +} diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts new file mode 100644 index 0000000..16c88b3 --- /dev/null +++ b/src/core/metrics/reducer.test.ts @@ -0,0 +1,368 @@ +import type { StepId, TurnDoneEvent, TurnStepCompleteEvent, TurnUsageEvent } from "@dispatch/wire"; +import { describe, expect, it } from "vitest"; +import { + applyDurableMetrics, + foldMetricsEvent, + initialMetricsState, + selectOrderedTurnMetrics, +} from "./reducer"; + +const usageEvent = ( + turnId: string, + inputTokens: number, + outputTokens: number, + stepId?: string, +): TurnUsageEvent => { + const base = { + type: "usage" as const, + conversationId: "c1", + turnId, + usage: { inputTokens, outputTokens }, + }; + if (stepId !== undefined) { + return { ...base, stepId: stepId as StepId }; + } + return base; +}; + +const stepCompleteEvent = ( + turnId: string, + stepId: string, + timing: { ttftMs?: number; decodeMs?: number; genTotalMs?: number } = {}, +): TurnStepCompleteEvent => ({ + type: "step-complete", + conversationId: "c1", + turnId, + stepId: stepId as StepId, + ...timing, +}); + +const doneEvent = ( + turnId: string, + extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {}, +): TurnDoneEvent => ({ + type: "done", + conversationId: "c1", + turnId, + reason: "stop", + ...extra, +}); + +describe("initialMetricsState", () => { + it("starts empty", () => { + const s = initialMetricsState(); + expect(s.live.size).toBe(0); + expect(s.liveOrder).toEqual([]); + expect(s.durable.size).toBe(0); + expect(s.durableOrder).toEqual([]); + }); +}); + +describe("foldMetricsEvent", () => { + it("folds per-step usage by stepId into a turn", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.steps).toHaveLength(2); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.steps[0]?.usage).toEqual({ inputTokens: 100, outputTokens: 50 }); + expect(ordered[0]?.steps[1]?.stepId).toBe("s2"); + expect(ordered[0]?.steps[1]?.usage).toEqual({ inputTokens: 200, outputTokens: 80 }); + }); + + it("folds step-complete timing and merges with same-step usage", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent( + s, + stepCompleteEvent("t1", "s1", { ttftMs: 200, decodeMs: 800, genTotalMs: 1000 }), + ); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + const step = ordered[0]?.steps[0]; + expect(step?.usage).toEqual({ inputTokens: 100, outputTokens: 50 }); + expect(step?.ttftMs).toBe(200); + expect(step?.decodeMs).toBe(800); + expect(step?.genTotalMs).toBe(1000); + }); + + it("step-complete before usage defaults usage to zeros", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 })); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + const step = ordered[0]?.steps[0]; + expect(step?.usage).toEqual({ inputTokens: 0, outputTokens: 0 }); + expect(step?.genTotalMs).toBe(500); + }); + + it("done sets durationMs and aggregate usage", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent( + s, + doneEvent("t1", { + durationMs: 5000, + usage: { inputTokens: 300, outputTokens: 150 }, + }), + ); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.durationMs).toBe(5000); + expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 150 }); + }); + + it("aggregate usage sums steps when done.usage absent", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 }); + }); + + it("aggregate usage includes cache only when a step had cache", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, { + type: "usage", + conversationId: "c1", + turnId: "t1", + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50, cacheReadTokens: 30 }, + }); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.usage.cacheReadTokens).toBe(30); + expect(ordered[0]?.total?.usage.cacheWriteTokens).toBeUndefined(); + }); + + it("tolerates missing clock (no genTotalMs/ttft/decode)", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + const step = ordered[0]?.steps[0]; + expect(step?.ttftMs).toBeUndefined(); + expect(step?.decodeMs).toBeUndefined(); + expect(step?.genTotalMs).toBeUndefined(); + expect(ordered[0]?.total?.durationMs).toBeUndefined(); + }); + + it("usage without stepId does not create a turn", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50)); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(0); + }); + + it("ignores non-metrics events", () => { + const s = initialMetricsState(); + const next = foldMetricsEvent(s, { + type: "status", + conversationId: "c1", + status: "running", + }); + expect(next).toBe(s); + }); + + it("preserves first-seen order of steps", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 10, 5, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.steps[0]?.stepId).toBe("s2"); + expect(ordered[0]?.steps[1]?.stepId).toBe("s1"); + }); + + it("preserves first-seen order of turns", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t2", 10, 5, "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1")); + s = foldMetricsEvent(s, doneEvent("t2")); + s = foldMetricsEvent(s, doneEvent("t1")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.turnId).toBe("t2"); + expect(ordered[1]?.turnId).toBe("t1"); + }); +}); + +describe("selectOrderedTurnMetrics", () => { + it("durable wins over live by turnId, live-done appended last", () => { + let s = initialMetricsState(); + + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, usageEvent("t2", 200, 80, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t2", "s1")); + s = foldMetricsEvent(s, doneEvent("t2")); + + s = applyDurableMetrics(s, [ + { + turnId: "t1", + usage: { inputTokens: 999, outputTokens: 999 }, + durationMs: 3000, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 999, outputTokens: 999 }, + genTotalMs: 3000, + }, + ], + }, + ]); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(2); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.total?.usage.inputTokens).toBe(999); + expect(ordered[0]?.total?.durationMs).toBe(3000); + expect(ordered[1]?.turnId).toBe("t2"); + expect(ordered[1]?.total?.durationMs).toBeUndefined(); + }); + + it("empty state returns empty", () => { + const s = initialMetricsState(); + expect(selectOrderedTurnMetrics(s)).toEqual([]); + }); + + it("selectOrderedTurnMetrics: in-flight turn exposes only completed steps and total=null", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 })); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.steps).toHaveLength(1); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.total).toBeNull(); + }); + + it("selectOrderedTurnMetrics: a turn with no complete step and not done is omitted", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(0); + }); + + it("selectOrderedTurnMetrics: after done, total is present", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 })); + s = foldMetricsEvent(s, doneEvent("t1", { durationMs: 2000 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.total?.durationMs).toBe(2000); + expect(ordered[0]?.steps).toHaveLength(1); + }); + + it("step-complete marks the step complete", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.steps).toHaveLength(1); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.steps[0]?.genTotalMs).toBe(500); + }); + + it("selectOrderedTurnMetrics: durable turn → steps + total present", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { + turnId: "t1", + usage: { inputTokens: 300, outputTokens: 150 }, + durationMs: 5000, + steps: [ + { + stepId: "s1" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + genTotalMs: 1000, + }, + { + stepId: "s2" as StepId, + usage: { inputTokens: 200, outputTokens: 100 }, + genTotalMs: 2000, + }, + ], + }, + ]); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered).toHaveLength(1); + expect(ordered[0]?.turnId).toBe("t1"); + expect(ordered[0]?.steps).toHaveLength(2); + expect(ordered[0]?.steps[0]?.stepId).toBe("s1"); + expect(ordered[0]?.steps[1]?.stepId).toBe("s2"); + expect(ordered[0]?.total?.usage.inputTokens).toBe(300); + expect(ordered[0]?.total?.durationMs).toBe(5000); + }); +}); + +describe("applyDurableMetrics", () => { + it("stores durable turns in order", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] }, + { turnId: "t2", usage: { inputTokens: 20, outputTokens: 8 }, steps: [] }, + ]); + expect(s.durableOrder).toEqual(["t1", "t2"]); + expect(s.durable.size).toBe(2); + }); + + it("is idempotent for same turnId", () => { + let s = initialMetricsState(); + const turn = { + turnId: "t1", + usage: { inputTokens: 10, outputTokens: 5 }, + steps: [], + }; + s = applyDurableMetrics(s, [turn]); + s = applyDurableMetrics(s, [turn]); + expect(s.durableOrder).toEqual(["t1"]); + expect(s.durable.size).toBe(1); + }); + + it("overwrites durable turn data for same turnId", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] }, + ]); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 99, outputTokens: 99 }, steps: [] }, + ]); + expect(s.durable.get("t1")?.usage.inputTokens).toBe(99); + }); +}); diff --git a/src/core/metrics/reducer.ts b/src/core/metrics/reducer.ts new file mode 100644 index 0000000..d36dba1 --- /dev/null +++ b/src/core/metrics/reducer.ts @@ -0,0 +1,239 @@ +import type { AgentEvent, StepId, StepMetrics, TurnMetrics, Usage } from "@dispatch/wire"; +import type { BuildingStep, LiveTurn, MetricsState, TurnMetricsEntry } from "./types"; + +function sumStepUsages(steps: readonly BuildingStep[]): Usage { + let inputTokens = 0; + let outputTokens = 0; + let hasCacheRead = false; + let hasCacheWrite = false; + let cacheReadTokens = 0; + let cacheWriteTokens = 0; + + for (const step of steps) { + if (step.usage === undefined) continue; + inputTokens += step.usage.inputTokens; + outputTokens += step.usage.outputTokens; + if (step.usage.cacheReadTokens !== undefined && step.usage.cacheReadTokens > 0) { + hasCacheRead = true; + cacheReadTokens += step.usage.cacheReadTokens; + } + if (step.usage.cacheWriteTokens !== undefined && step.usage.cacheWriteTokens > 0) { + hasCacheWrite = true; + cacheWriteTokens += step.usage.cacheWriteTokens; + } + } + + const base: Usage = { inputTokens, outputTokens }; + if (hasCacheRead) { + (base as { cacheReadTokens?: number }).cacheReadTokens = cacheReadTokens; + } + if (hasCacheWrite) { + (base as { cacheWriteTokens?: number }).cacheWriteTokens = cacheWriteTokens; + } + return base; +} + +function buildingStepToMetrics(bs: BuildingStep): StepMetrics { + const usage: Usage = bs.usage ?? { inputTokens: 0, outputTokens: 0 }; + const base: StepMetrics = { stepId: bs.stepId as StepId, usage }; + if (bs.ttftMs !== undefined) { + (base as { ttftMs?: number }).ttftMs = bs.ttftMs; + } + if (bs.decodeMs !== undefined) { + (base as { decodeMs?: number }).decodeMs = bs.decodeMs; + } + if (bs.genTotalMs !== undefined) { + (base as { genTotalMs?: number }).genTotalMs = bs.genTotalMs; + } + return base; +} + +function getStep(lt: LiveTurn, id: string): BuildingStep { + const step = lt.stepMap.get(id); + if (step === undefined) throw new Error(`Missing step ${id} in live turn`); + return step; +} + +function liveTurnToMetrics(lt: LiveTurn): TurnMetrics { + const buildingSteps = lt.stepOrder.map((id) => getStep(lt, id)); + const steps = buildingSteps.map((bs) => buildingStepToMetrics(bs)); + const usage = lt.doneUsage ?? sumStepUsages(buildingSteps); + const base: TurnMetrics = { turnId: lt.turnId, usage, steps }; + if (lt.durationMs !== undefined) { + (base as { durationMs?: number }).durationMs = lt.durationMs; + } + return base; +} + +function ensureLiveTurn(state: MetricsState, turnId: string): [MetricsState, LiveTurn] { + const existing = state.live.get(turnId); + if (existing !== undefined) return [state, existing]; + + const newTurn: LiveTurn = { + turnId, + done: false, + durationMs: undefined, + doneUsage: undefined, + stepMap: new Map(), + stepOrder: [], + }; + const newLive = new Map(state.live); + newLive.set(turnId, newTurn); + return [{ ...state, live: newLive, liveOrder: [...state.liveOrder, turnId] }, newTurn]; +} + +function upsertStep(lt: LiveTurn, stepId: string, update: Partial<BuildingStep>): LiveTurn { + const existing = lt.stepMap.get(stepId); + if (existing !== undefined) { + const merged: BuildingStep = { + stepId, + usage: update.usage ?? existing.usage, + ttftMs: update.ttftMs ?? existing.ttftMs, + decodeMs: update.decodeMs ?? existing.decodeMs, + genTotalMs: update.genTotalMs ?? existing.genTotalMs, + complete: update.complete ?? existing.complete, + }; + const newMap = new Map(lt.stepMap); + newMap.set(stepId, merged); + return { ...lt, stepMap: newMap }; + } + + const fresh: BuildingStep = { + stepId, + usage: update.usage, + ttftMs: update.ttftMs, + decodeMs: update.decodeMs, + genTotalMs: update.genTotalMs, + complete: update.complete ?? false, + }; + const newMap = new Map(lt.stepMap); + newMap.set(stepId, fresh); + return { ...lt, stepMap: newMap, stepOrder: [...lt.stepOrder, stepId] }; +} + +/** The initial empty metrics state. */ +export function initialMetricsState(): MetricsState { + return { + live: new Map(), + liveOrder: [], + durable: new Map(), + durableOrder: [], + }; +} + +/** + * Fold one live AgentEvent into the metrics state. + * + * - `usage` with `stepId`: upsert that step's usage. + * - `usage` without `stepId`: ignored. + * - `step-complete`: upsert that step's timing; default usage to zeros if absent. + * - `done`: set turn's `durationMs` and optional aggregate `usage`. + * - All other event types: return state unchanged. + */ +export function foldMetricsEvent(state: MetricsState, event: AgentEvent): MetricsState { + switch (event.type) { + case "usage": { + if (event.stepId === undefined) return state; + const [s1, lt] = ensureLiveTurn(state, event.turnId); + const updated = upsertStep(lt, event.stepId, { usage: event.usage }); + const newLive = new Map(s1.live); + newLive.set(event.turnId, updated); + return { ...s1, live: newLive }; + } + + case "step-complete": { + const [s1, lt] = ensureLiveTurn(state, event.turnId); + const updated = upsertStep(lt, event.stepId, { + ttftMs: event.ttftMs, + decodeMs: event.decodeMs, + genTotalMs: event.genTotalMs, + complete: true, + }); + const newLive = new Map(s1.live); + newLive.set(event.turnId, updated); + return { ...s1, live: newLive }; + } + + case "done": { + const [s1, lt] = ensureLiveTurn(state, event.turnId); + const updated: LiveTurn = { + ...lt, + done: true, + durationMs: event.durationMs ?? lt.durationMs, + doneUsage: event.usage ?? lt.doneUsage, + }; + const newLive = new Map(s1.live); + newLive.set(event.turnId, updated); + return { ...s1, live: newLive }; + } + + default: + return state; + } +} + +/** + * Store durable (sealed) metrics from the backend. These win over live data + * for any shared `turnId`. + */ +export function applyDurableMetrics( + state: MetricsState, + turns: readonly TurnMetrics[], +): MetricsState { + const newDurable = new Map(state.durable); + const newDurableOrder = [...state.durableOrder]; + for (const turn of turns) { + if (!newDurable.has(turn.turnId)) { + newDurableOrder.push(turn.turnId); + } + newDurable.set(turn.turnId, turn); + } + return { + ...state, + durable: newDurable, + durableOrder: newDurableOrder, + }; +} + +/** + * Select the merged ordered list of turn metrics entries. + * Durable turns come first (in their order), then any live turns whose + * `turnId` is not in durable (in live first-seen order). + * + * Each entry contains the completed steps so far and an optional total + * (null until the turn is finalized via `done` or durable data). + * Live turns with no completed steps and not done are omitted. + */ +export function selectOrderedTurnMetrics(state: MetricsState): readonly TurnMetricsEntry[] { + const result: TurnMetricsEntry[] = []; + const seen = new Set<string>(); + + for (const turnId of state.durableOrder) { + const tm = state.durable.get(turnId); + if (tm !== undefined) { + result.push({ turnId, steps: tm.steps, total: tm }); + seen.add(turnId); + } + } + + for (const turnId of state.liveOrder) { + if (seen.has(turnId)) continue; + const lt = state.live.get(turnId); + if (lt === undefined) continue; + + const completeSteps = lt.stepOrder + .map((id) => lt.stepMap.get(id)) + .filter((s): s is BuildingStep => s?.complete === true) + .map((s) => buildingStepToMetrics(s)); + + if (completeSteps.length === 0 && !lt.done) continue; + + result.push({ + turnId, + steps: completeSteps, + total: lt.done ? liveTurnToMetrics(lt) : null, + }); + } + + return result; +} diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts new file mode 100644 index 0000000..2b26e8d --- /dev/null +++ b/src/core/metrics/types.ts @@ -0,0 +1,68 @@ +import type { StepMetrics, TurnMetrics, Usage } from "@dispatch/wire"; +import type { RenderGroup } from "../chunks"; + +export type { StepMetrics, TurnMetrics }; + +/** A step being built from live events (may be incomplete). */ +export interface BuildingStep { + readonly stepId: string; + readonly usage: Usage | undefined; + readonly ttftMs: number | undefined; + readonly decodeMs: number | undefined; + readonly genTotalMs: number | undefined; + readonly complete: boolean; +} + +/** A turn being built from live events (in-flight). */ +export interface LiveTurn { + readonly turnId: string; + readonly done: boolean; + readonly durationMs: number | undefined; + readonly doneUsage: Usage | undefined; + readonly stepMap: ReadonlyMap<string, BuildingStep>; + readonly stepOrder: readonly string[]; +} + +/** + * Reducer state for per-turn / per-step token + timing metrics. + * + * - `live`: in-flight turns keyed by `turnId` in FIRST-SEEN order. + * - `durable`: sealed turns keyed by `turnId` in the order they arrived. + */ +export interface MetricsState { + readonly live: ReadonlyMap<string, LiveTurn>; + readonly liveOrder: readonly string[]; + readonly durable: ReadonlyMap<string, TurnMetrics>; + readonly durableOrder: readonly string[]; +} + +/** Per-turn placement entry: completed steps so far + optional turn total. */ +export interface TurnMetricsEntry { + readonly turnId: string; + readonly steps: readonly StepMetrics[]; + readonly total: TurnMetrics | null; +} + +/** A row in the interleaved transcript: a render group, per-step metrics, or turn metrics. */ +export type MetricsRow = + | { readonly kind: "group"; readonly group: RenderGroup } + | { readonly kind: "step-metrics"; readonly step: StepMetrics; readonly index: number } + | { readonly kind: "turn-metrics"; readonly turn: TurnMetrics }; + +/** Formatted per-step view for display. */ +export interface StepMetricsView { + readonly label: string; + readonly tokensLabel: string; + readonly tps: string | null; + readonly ttft: string | null; + readonly decode: string | null; + readonly genTotal: string | null; +} + +/** Formatted per-turn view for display. */ +export interface TurnMetricsView { + readonly tokensLabel: string; + readonly breakdown: string; + readonly tps: string | null; + readonly duration: string | null; +} diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts index 4f2091a..ae3e1f8 100644 --- a/src/features/chat/index.ts +++ b/src/features/chat/index.ts @@ -1,6 +1,7 @@ export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chunks"; export { groupRenderedChunks } from "../../core/chunks"; -export type { ChatTransport, HistorySync } from "./ports"; +export type { TurnMetricsEntry } from "../../core/metrics"; +export type { ChatTransport, HistorySync, MetricsSync } from "./ports"; export type { ChatStore, ChatStoreDependencies } from "./store.svelte"; export { createChatStore } from "./store.svelte"; export { default as ChatView } from "./ui/ChatView.svelte"; diff --git a/src/features/chat/ports.ts b/src/features/chat/ports.ts index 07943c7..e28ebf6 100644 --- a/src/features/chat/ports.ts +++ b/src/features/chat/ports.ts @@ -1,4 +1,8 @@ -import type { ChatSendMessage, ConversationHistoryResponse } from "@dispatch/transport-contract"; +import type { + ChatSendMessage, + ConversationHistoryResponse, + ConversationMetricsResponse, +} from "@dispatch/transport-contract"; /** Injected transport port — sends chat messages to the server. */ export interface ChatTransport { @@ -10,3 +14,6 @@ export type HistorySync = ( conversationId: string, sinceSeq: number, ) => Promise<ConversationHistoryResponse>; + +/** Injected metrics-sync port — fetches persisted per-turn metrics from the server. */ +export type MetricsSync = (conversationId: string) => Promise<ConversationMetricsResponse>; diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts index 1d8ab17..f4ad07b 100644 --- a/src/features/chat/store.svelte.ts +++ b/src/features/chat/store.svelte.ts @@ -13,20 +13,29 @@ import { selectChunks, selectMessages, } from "../../core/chunks"; +import type { MetricsState, TurnMetricsEntry } from "../../core/metrics"; +import { + applyDurableMetrics, + foldMetricsEvent, + initialMetricsState, + selectOrderedTurnMetrics, +} from "../../core/metrics"; import type { ConversationCache } from "../conversation-cache"; -import type { ChatTransport, HistorySync } from "./ports"; +import type { ChatTransport, HistorySync, MetricsSync } from "./ports"; export interface ChatStoreDependencies { readonly conversationId: string; readonly model?: string; readonly transport: ChatTransport; readonly historySync: HistorySync; + readonly metricsSync: MetricsSync; readonly cache: ConversationCache; } export interface ChatStore { readonly messages: readonly ChatMessage[]; readonly chunks: readonly RenderedChunk[]; + readonly turnMetrics: readonly TurnMetricsEntry[]; readonly pendingSync: boolean; readonly error: string | null; readonly model: string | undefined; @@ -39,6 +48,7 @@ export interface ChatStore { export function createChatStore(deps: ChatStoreDependencies): ChatStore { let transcript = $state<TranscriptState>(initialState()); + let metrics = $state<MetricsState>(initialMetricsState()); let _pendingSync = $state(false); let _error = $state<string | null>(null); let _model = $state<string | undefined>(deps.model); @@ -60,6 +70,17 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { } } + async function syncMetrics(): Promise<void> { + if (disposed) return; + try { + const res = await deps.metricsSync(deps.conversationId); + metrics = applyDurableMetrics(metrics, res.turns); + } catch { + // Metrics fetch failure must not block history sync or throw; + // live-folded metrics remain intact. + } + } + return { get messages(): readonly ChatMessage[] { return selectMessages(transcript); @@ -67,6 +88,9 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { get chunks(): readonly RenderedChunk[] { return selectChunks(transcript); }, + get turnMetrics(): readonly TurnMetricsEntry[] { + return selectOrderedTurnMetrics(metrics); + }, get pendingSync(): boolean { return _pendingSync; }, @@ -89,8 +113,10 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { return; } transcript = foldEvent(transcript, msg.event); + metrics = foldMetricsEvent(metrics, msg.event); if (transcript.sealedTurnId !== null) { void syncTail(); + void syncMetrics(); } }, @@ -115,6 +141,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { transcript = applyHistory(transcript, cached); } await syncTail(); + await syncMetrics(); }, dispose(): void { diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts index 71781ac..1c99e7c 100644 --- a/src/features/chat/store.test.ts +++ b/src/features/chat/store.test.ts @@ -1,7 +1,12 @@ import type { AgentEvent, StepId, StoredChunk } from "@dispatch/wire"; import { describe, expect, it, vi } from "vitest"; import { createChatStore } from "./store.svelte"; -import { createFakeCache, createFakeHistorySync, createFakeTransport } from "./test-helpers"; +import { + createFakeCache, + createFakeHistorySync, + createFakeMetricsSync, + createFakeTransport, +} from "./test-helpers"; const CONV_ID = "test-conv-1"; @@ -21,11 +26,13 @@ describe("createChatStore", () => { it("folding a chat.delta updates messages", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -51,11 +58,13 @@ describe("createChatStore", () => { it("turn-sealed triggers a history sync, commits to cache, and applies merged history", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -92,11 +101,13 @@ describe("createChatStore", () => { it("send posts a chat.send with conversationId", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -114,12 +125,14 @@ describe("createChatStore", () => { it("send posts a chat.send with model when set", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, model: "openai/gpt-4", transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -134,11 +147,13 @@ describe("createChatStore", () => { it("chat.error sets error", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -154,6 +169,7 @@ describe("createChatStore", () => { it("load hydrates from cache then syncs the tail", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); // Pre-populate cache @@ -166,6 +182,7 @@ describe("createChatStore", () => { conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -184,6 +201,7 @@ describe("createChatStore", () => { it("load with empty cache still syncs", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); historySync.returnChunks = [makeStoredChunk(1, "assistant")]; @@ -192,6 +210,7 @@ describe("createChatStore", () => { conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -206,11 +225,13 @@ describe("createChatStore", () => { it("error is cleared on successful sync", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -236,11 +257,13 @@ describe("createChatStore", () => { it("dispose prevents further syncs", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -262,6 +285,7 @@ describe("createChatStore", () => { it("overlapping syncs are guarded", async () => { const transport = createFakeTransport(); const _historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); // Make the first sync slow @@ -283,6 +307,7 @@ describe("createChatStore", () => { conversationId: CONV_ID, transport: transport.impl, historySync: slowHistorySync, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -310,11 +335,13 @@ describe("createChatStore", () => { it("handles tool-call and tool-result chunks", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -353,12 +380,14 @@ describe("createChatStore", () => { it("setModel changes the model used by the next send", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, model: "openai/gpt-4", transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -375,11 +404,13 @@ describe("createChatStore", () => { it("setModel from undefined to a model", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -396,11 +427,13 @@ describe("createChatStore", () => { it("handleDelta ignores a chat.delta for a different conversationId", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -424,11 +457,13 @@ describe("createChatStore", () => { it("handleDelta ignores a chat.error for a different conversationId", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -442,11 +477,13 @@ describe("createChatStore", () => { it("send optimistically shows the user message immediately", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -464,11 +501,13 @@ describe("createChatStore", () => { it("the optimistic user message is replaced after turn-sealed + history sync", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); const cache = createFakeCache(); const store = createChatStore({ conversationId: CONV_ID, transport: transport.impl, historySync: historySync.impl, + metricsSync: metricsSync.impl, cache: cache.impl, }); @@ -496,4 +535,271 @@ describe("createChatStore", () => { store.dispose(); }); + + it("folding usage/step-complete/done deltas exposes turnMetrics", () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + }); + + expect(store.turnMetrics).toHaveLength(0); + + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta( + deltaEvent({ + type: "usage", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + store.handleDelta( + deltaEvent({ + type: "step-complete", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + ttftMs: 200, + genTotalMs: 800, + }), + ); + store.handleDelta( + deltaEvent({ + type: "done", + conversationId: CONV_ID, + turnId: "t1", + reason: "end-turn", + durationMs: 1200, + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + + expect(store.turnMetrics).toHaveLength(1); + const entry = store.turnMetrics[0]; + expect(entry?.turnId).toBe("t1"); + expect(entry?.steps).toHaveLength(1); + expect(entry?.steps[0]?.stepId).toBe("t1#0" as StepId); + expect(entry?.steps[0]?.usage.inputTokens).toBe(100); + expect(entry?.steps[0]?.genTotalMs).toBe(800); + expect(entry?.total).not.toBeNull(); + expect(entry?.total?.usage.inputTokens).toBe(100); + expect(entry?.total?.usage.outputTokens).toBe(50); + expect(entry?.total?.durationMs).toBe(1200); + + store.dispose(); + }); + + it("turnMetrics entry has total: null before done (progressive turn)", () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + }); + + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta( + deltaEvent({ + type: "usage", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + store.handleDelta( + deltaEvent({ + type: "step-complete", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + ttftMs: 200, + genTotalMs: 800, + }), + ); + + expect(store.turnMetrics).toHaveLength(1); + const entry = store.turnMetrics[0]; + expect(entry?.turnId).toBe("t1"); + expect(entry?.steps).toHaveLength(1); + expect(entry?.steps[0]?.stepId).toBe("t1#0" as StepId); + expect(entry?.total).toBeNull(); + + store.dispose(); + }); + + it("metricsSync durable result overrides live by turnId", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + }); + + // Live fold gives some metrics + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta( + deltaEvent({ + type: "usage", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + store.handleDelta( + deltaEvent({ + type: "done", + conversationId: CONV_ID, + turnId: "t1", + reason: "end-turn", + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + + expect(store.turnMetrics).toHaveLength(1); + expect(store.turnMetrics[0]?.total?.usage.outputTokens).toBe(50); + + // Durable sync returns different numbers for the same turnId + metricsSync.returnTurns = [ + { + turnId: "t1", + usage: { inputTokens: 200, outputTokens: 80 }, + durationMs: 500, + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 200, outputTokens: 80 }, + genTotalMs: 400, + }, + ], + }, + ]; + + // Trigger metrics sync via turn-sealed + historySync.returnChunks = []; + store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" })); + + await vi.waitFor(() => { + expect(metricsSync.calls).toHaveLength(1); + }); + + // Durable should now override live (syncMetrics is async, wait for it) + await vi.waitFor(() => { + expect(store.turnMetrics[0]?.total?.usage.outputTokens).toBe(80); + }); + + expect(store.turnMetrics).toHaveLength(1); + expect(store.turnMetrics[0]?.total?.durationMs).toBe(500); + + store.dispose(); + }); + + it("rejected metricsSync leaves live metrics intact and does not throw", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + }); + + // Live fold some metrics + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta( + deltaEvent({ + type: "usage", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + store.handleDelta( + deltaEvent({ + type: "done", + conversationId: CONV_ID, + turnId: "t1", + reason: "end-turn", + usage: { inputTokens: 100, outputTokens: 50 }, + }), + ); + + expect(store.turnMetrics).toHaveLength(1); + + // Make the metrics sync reject + metricsSync.nextError = "metrics endpoint unavailable"; + + historySync.returnChunks = []; + store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" })); + + await vi.waitFor(() => { + expect(metricsSync.calls).toHaveLength(1); + }); + + // Live metrics should still be intact + expect(store.turnMetrics).toHaveLength(1); + expect(store.turnMetrics[0]?.total?.usage.outputTokens).toBe(50); + + // No error should have been thrown to the store + expect(store.error).toBeNull(); + + store.dispose(); + }); + + it("load calls metricsSync after history sync", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + + metricsSync.returnTurns = [ + { + turnId: "t1", + usage: { inputTokens: 300, outputTokens: 100 }, + durationMs: 900, + steps: [], + }, + ]; + + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + }); + + await store.load(); + + expect(historySync.calls).toHaveLength(1); + expect(metricsSync.calls).toHaveLength(1); + expect(metricsSync.calls[0]).toBe(CONV_ID); + expect(store.turnMetrics).toHaveLength(1); + expect(store.turnMetrics[0]?.total?.usage.inputTokens).toBe(300); + + store.dispose(); + }); }); diff --git a/src/features/chat/test-helpers.ts b/src/features/chat/test-helpers.ts index d37b59e..07dad26 100644 --- a/src/features/chat/test-helpers.ts +++ b/src/features/chat/test-helpers.ts @@ -1,6 +1,6 @@ import type { StoredChunk } from "@dispatch/wire"; import type { ConversationCache } from "../conversation-cache"; -import type { ChatTransport, HistorySync } from "./ports"; +import type { ChatTransport, HistorySync, MetricsSync } from "./ports"; export interface FakeTransport { readonly sent: import("@dispatch/transport-contract").ChatSendMessage[]; @@ -46,6 +46,44 @@ export function createFakeHistorySync(): FakeHistorySync { }; } +export interface FakeMetricsSync { + readonly calls: string[]; + returnTurns: import("@dispatch/wire").TurnMetrics[]; + /** If set, the next call will reject with this error. */ + nextError: string | undefined; + readonly impl: MetricsSync; +} + +export function createFakeMetricsSync(): FakeMetricsSync { + const calls: string[] = []; + let returnTurns: import("@dispatch/wire").TurnMetrics[] = []; + let nextError: string | undefined; + return { + calls, + get returnTurns() { + return returnTurns; + }, + set returnTurns(v: import("@dispatch/wire").TurnMetrics[]) { + returnTurns = v; + }, + get nextError() { + return nextError; + }, + set nextError(v: string | undefined) { + nextError = v; + }, + impl: async (conversationId) => { + calls.push(conversationId); + if (nextError !== undefined) { + const err = nextError; + nextError = undefined; + throw new Error(err); + } + return { turns: returnTurns }; + }, + }; +} + export interface FakeCache { readonly store: Map<string, StoredChunk[]>; readonly impl: ConversationCache; diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts index b31cbf1..4abf717 100644 --- a/src/features/chat/ui.test.ts +++ b/src/features/chat/ui.test.ts @@ -3,6 +3,7 @@ import { render, screen } from "@testing-library/svelte"; import userEvent from "@testing-library/user-event"; import { describe, expect, it, vi } from "vitest"; import type { RenderedChunk } from "../../core/chunks"; +import type { TurnMetricsEntry } from "../../core/metrics"; import ChatView from "./ui/ChatView.svelte"; import Composer from "./ui/Composer.svelte"; import ModelSelector from "./ui/ModelSelector.svelte"; @@ -278,6 +279,224 @@ describe("ChatView", () => { expect(screen.getByRole("checkbox", { name: "Toggle thoughts" })).toBeChecked(); expect(container).toHaveTextContent("hmm, all done"); }); + + it("renders step and turn metrics as separate rows", () => { + const chunks: RenderedChunk[] = [ + { seq: 1, role: "user", chunk: { type: "text", text: "Hi" }, provisional: false }, + { + seq: 2, + role: "assistant", + chunk: { type: "text", text: "Hello!" }, + provisional: false, + }, + ]; + + const turnMetrics: TurnMetricsEntry[] = [ + { + turnId: "t1", + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + genTotalMs: 800, + }, + ], + total: { + turnId: "t1", + usage: { inputTokens: 100, outputTokens: 50 }, + durationMs: 1200, + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + genTotalMs: 800, + }, + ], + }, + }, + ]; + + render(ChatView, { props: { chunks, turnMetrics } }); + + expect(screen.getByText("Hi")).toBeInTheDocument(); + expect(screen.getByText("Hello!")).toBeInTheDocument(); + expect(screen.getByText(/step 1/)).toBeInTheDocument(); + expect(screen.getAllByText(/150 tok/)).toHaveLength(2); + expect(screen.getByText(/turn · 150 tok \(100 in \/ 50 out\)/)).toBeInTheDocument(); + expect(screen.getByText(/1\.2s/)).toBeInTheDocument(); + }); + + it("renders step-metrics inline after tool group", () => { + const chunks: RenderedChunk[] = [ + { seq: 1, role: "user", chunk: { type: "text", text: "Run it" }, provisional: false }, + { + seq: 2, + role: "assistant", + chunk: { + type: "tool-call", + toolCallId: "tc1", + toolName: "bash", + input: { command: "ls" }, + stepId: "t1#0" as StepId, + }, + provisional: false, + }, + { + seq: 3, + role: "tool", + chunk: { + type: "tool-result", + toolCallId: "tc1", + toolName: "bash", + content: "file.txt", + isError: false, + stepId: "t1#0" as StepId, + }, + provisional: false, + }, + { + seq: 4, + role: "assistant", + chunk: { type: "text", text: "Done!" }, + provisional: false, + }, + ]; + + const turnMetrics: TurnMetricsEntry[] = [ + { + turnId: "t1", + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 80, outputTokens: 20 }, + genTotalMs: 300, + }, + ], + total: { + turnId: "t1", + usage: { inputTokens: 80, outputTokens: 20 }, + durationMs: 500, + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 80, outputTokens: 20 }, + genTotalMs: 300, + }, + ], + }, + }, + ]; + + render(ChatView, { props: { chunks, turnMetrics } }); + + // Both step-metrics and turn-metrics render + expect(screen.getByText(/step 1/)).toBeInTheDocument(); + expect(screen.getByText(/turn · 100 tok/)).toBeInTheDocument(); + + // They are in separate elements (different rows) + const stepEl = screen.getByText(/step 1 · 100 tok/).closest("div"); + const turnEl = screen.getByText(/turn · 100 tok/).closest("div"); + expect(stepEl).not.toBe(turnEl); + }); + + it("renders no metrics bubble when turnMetrics is empty", () => { + const chunks: RenderedChunk[] = [ + { seq: 1, role: "user", chunk: { type: "text", text: "Hi" }, provisional: false }, + { + seq: 2, + role: "assistant", + chunk: { type: "text", text: "Hello!" }, + provisional: false, + }, + ]; + + render(ChatView, { props: { chunks, turnMetrics: [] } }); + + expect(screen.getByText("Hi")).toBeInTheDocument(); + expect(screen.getByText("Hello!")).toBeInTheDocument(); + expect(screen.queryByText(/step 1/)).toBeNull(); + expect(screen.queryByText(/^turn/)).toBeNull(); + }); + + it("omits null view values from metrics bubbles", () => { + const chunks: RenderedChunk[] = [ + { seq: 1, role: "user", chunk: { type: "text", text: "Test" }, provisional: false }, + { + seq: 2, + role: "assistant", + chunk: { type: "text", text: "Response" }, + provisional: false, + }, + ]; + + const turnMetrics: TurnMetricsEntry[] = [ + { + turnId: "t1", + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 10, outputTokens: 5 }, + }, + ], + total: { + turnId: "t1", + usage: { inputTokens: 10, outputTokens: 5 }, + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 10, outputTokens: 5 }, + }, + ], + }, + }, + ]; + + render(ChatView, { props: { chunks, turnMetrics } }); + + // Step metrics rendered + expect(screen.getByText(/step 1/)).toBeInTheDocument(); + expect(screen.getAllByText(/15 tok/)).toHaveLength(2); + // Turn metrics rendered + expect(screen.getByText(/turn · 15 tok \(10 in \/ 5 out\)/)).toBeInTheDocument(); + // No "null" or "undefined" in the DOM + expect(screen.queryByText("null")).toBeNull(); + expect(screen.queryByText("undefined")).toBeNull(); + }); + + it("renders step text but no turn total for a progressive turn (total: null)", () => { + const chunks: RenderedChunk[] = [ + { seq: 1, role: "user", chunk: { type: "text", text: "Hi" }, provisional: false }, + { + seq: 2, + role: "assistant", + chunk: { type: "text", text: "Hello!" }, + provisional: false, + }, + ]; + + const turnMetrics: TurnMetricsEntry[] = [ + { + turnId: "t1", + steps: [ + { + stepId: "t1#0" as StepId, + usage: { inputTokens: 100, outputTokens: 50 }, + genTotalMs: 800, + }, + ], + total: null, + }, + ]; + + render(ChatView, { props: { chunks, turnMetrics } }); + + // Step metrics should render + expect(screen.getByText(/step 1/)).toBeInTheDocument(); + expect(screen.getByText(/150 tok/)).toBeInTheDocument(); + + // Turn total should NOT render (total is null — turn still in progress) + expect(screen.queryByText(/^turn/)).toBeNull(); + }); }); describe("Composer", () => { diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte index 3a078fb..ba6e961 100644 --- a/src/features/chat/ui/ChatView.svelte +++ b/src/features/chat/ui/ChatView.svelte @@ -1,17 +1,33 @@ <script lang="ts"> import { groupRenderedChunks, type RenderedChunk } from "../index"; + import { interleaveTurnMetrics, viewStepMetrics, viewTurnMetrics, type TurnMetricsEntry } from "../../../core/metrics"; - let { chunks }: { chunks: readonly RenderedChunk[] } = $props(); + let { + chunks, + turnMetrics = [], + }: { + chunks: readonly RenderedChunk[]; + turnMetrics?: readonly TurnMetricsEntry[]; + } = $props(); const groups = $derived(groupRenderedChunks(chunks)); + const rows = $derived(interleaveTurnMetrics(groups, turnMetrics)); + // Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that // survives the provisional→committed (seq null → seq N) transition, so the // collapse's open/close state is NOT lost when a turn seals. (App isolates // these keys per conversation via {#key}.) - const rows = $derived.by(() => { + const keyedRows = $derived.by(() => { let thinking = 0; - return groups.map((group, i) => { + return rows.map((row, i) => { + if (row.kind === "step-metrics") { + return { row, key: `s${row.step.stepId}` }; + } + if (row.kind === "turn-metrics") { + return { row, key: `m${row.turn.turnId}` }; + } + const group = row.group; let key: string; if (group.kind === "tool-batch") { key = `b${group.stepId}`; @@ -22,7 +38,7 @@ } else { key = `p${i}`; } - return { group, key }; + return { row, key }; }); }); </script> @@ -102,9 +118,31 @@ {/snippet} <div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite"> - {#each rows as { group, key } (key)} - {#if group.kind === "single"} - {@render chunkRow(group.chunk)} + {#each keyedRows as { row, key } (key)} + {#if row.kind === "step-metrics"} + {@const sv = viewStepMetrics(row.step, row.index)} + <div class="chat chat-start"> + <div class="chat-bubble w-full max-w-5xl bg-transparent p-0"> + <div class="text-xs opacity-70"> + {sv.label} · {sv.tokensLabel} + {#if sv.tps} · {sv.tps}{/if} + {#if sv.genTotal} · {sv.genTotal}{/if} + </div> + </div> + </div> + {:else if row.kind === "turn-metrics"} + {@const turnView = viewTurnMetrics(row.turn)} + <div class="chat chat-start"> + <div class="chat-bubble w-full max-w-5xl bg-transparent p-0"> + <div class="text-xs opacity-70"> + turn · {turnView.tokensLabel} ({turnView.breakdown}) + {#if turnView.tps} · {turnView.tps}{/if} + {#if turnView.duration} · {turnView.duration}{/if} + </div> + </div> + </div> + {:else if row.group.kind === "single"} + {@render chunkRow(row.group.chunk)} {:else} <!-- Batched tool calls (one step): a single bubble holding a DaisyUI list, one row per call paired with its result. Same chat-start grid shim as @@ -112,7 +150,7 @@ <div class="chat chat-start [&>.chat-bubble]:max-w-full [&>.chat-bubble]:p-0"> <div class="chat-bubble bg-transparent"> <ul class="list w-fit max-w-full rounded-box bg-base-200 text-sm"> - {#each group.entries as entry (entry.call.toolCallId)} + {#each row.group.entries as entry (entry.call.toolCallId)} <li class="list-row"> <div> <strong>{entry.call.toolName}</strong> |
