diff options
| author | Adam Malczewski <[email protected]> | 2026-06-07 18:41:27 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-07 18:41:27 +0900 |
| commit | 48c6d85c3cc5a57a729f14068e2346b17ed62088 (patch) | |
| tree | ec56590653f399f4a5feae0245652eba8f352ad5 | |
| parent | 2e79dd122e5664353e02e0d33715ae8c1041a379 (diff) | |
| download | dispatch-web-48c6d85c3cc5a57a729f14068e2346b17ed62088.tar.gz dispatch-web-48c6d85c3cc5a57a729f14068e2346b17ed62088.zip | |
feat(chat): live turn metrics — telemetry reducer + rendering
Consume wire/transport-contract 0.3.0 (step-complete event + timing
fields on usage/tool-result/done). Pure core/telemetry module:
foldMetricEvent (reducer) + derived selectors (stepTps, turnTps, etc).
TelemetryState is pure data, no active-turn tracking — consumers pass
turnId to selectors. ChatStore wires foldMetricEvent into handleDelta
and exposes telemetry + currentTurnId. ChatView shows step-metrics
footer (time/TPS/tokens) on assistant text bubbles and durationMs badge
on tool cards. New TurnSummary component renders turn-level stats
(wall-clock, tokens, steps, TPS) in a DaisyUI stats block. Extended
live-probe to verify telemetry events against bin/up (pending backend
restart). 336 tests, typecheck 0, biome clean, build ok.
| -rw-r--r-- | .dispatch/transport-contract.reference.md | 14 | ||||
| -rw-r--r-- | .dispatch/wire.reference.md | 113 | ||||
| -rw-r--r-- | scripts/live-probe.ts | 77 | ||||
| -rw-r--r-- | src/app/App.svelte | 12 | ||||
| -rw-r--r-- | src/core/chunks/reducer.ts | 4 | ||||
| -rw-r--r-- | src/core/telemetry/index.ts | 14 | ||||
| -rw-r--r-- | src/core/telemetry/reducer.test.ts | 252 | ||||
| -rw-r--r-- | src/core/telemetry/reducer.ts | 122 | ||||
| -rw-r--r-- | src/core/telemetry/selectors.ts | 95 | ||||
| -rw-r--r-- | src/core/telemetry/types.ts | 35 | ||||
| -rw-r--r-- | src/core/wire/conformance.test.ts | 14 | ||||
| -rw-r--r-- | src/core/wire/conformance.ts | 2 | ||||
| -rw-r--r-- | src/features/chat/index.ts | 2 | ||||
| -rw-r--r-- | src/features/chat/store.svelte.ts | 12 | ||||
| -rw-r--r-- | src/features/chat/store.test.ts | 46 | ||||
| -rw-r--r-- | src/features/chat/ui.test.ts | 150 | ||||
| -rw-r--r-- | src/features/chat/ui/ChatView.svelte | 93 | ||||
| -rw-r--r-- | src/features/chat/ui/TurnSummary.svelte | 75 |
18 files changed, 1034 insertions, 98 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md index fcc2cbf..ef0235a 100644 --- a/.dispatch/transport-contract.reference.md +++ b/.dispatch/transport-contract.reference.md @@ -5,15 +5,15 @@ > hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally — > this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes. -> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract` +> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes. +> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract` > (see `ui-contract.reference.md`). > -> **0.2.0 change (step grouping):** no shape change HERE — this contract's own types are -> identical. It only re-exports the bumped `@dispatch/wire`, whose `AgentEvent` tool variants -> now carry a required `stepId` and whose tool `Chunk`s carry an optional `stepId`. The -> `chat.delta` events streamed over WS and the `ConversationHistoryResponse.chunks` you already -> consume therefore now carry the step grouping key (see `wire.reference.md`). +> **0.3.0 change (live metrics):** no shape change HERE — this contract's own types are identical. +> It re-exports the bumped `@dispatch/wire`, whose `AgentEvent` union gained a `step-complete` +> variant and timing fields on `usage`/`tool-result`/`done`. So the `chat.delta` events you stream +> over WS now also carry the live metrics. See `frontend-metrics-handoff.md` for the full guide. +> (0.2.0: tool-call `stepId` grouping.) ## Endpoints (backend, confirmed live — CORS wildcard `*`, HTTP port 24203, WS port 24205) diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md index ed95351..7814bc3 100644 --- a/.dispatch/wire.reference.md +++ b/.dispatch/wire.reference.md @@ -4,13 +4,14 @@ > types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission > prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes. +> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes. > -> **0.2.0 change (step grouping):** `ToolCallChunk`/`ToolResultChunk` gained an OPTIONAL -> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` gained a REQUIRED `stepId: StepId`. -> A `StepId` is the per-step grouping key for batched/parallel tool calls — group by equality. -> Live: read `event.stepId`. Replay: read `storedChunk.chunk.stepId` (NOT the envelope; absent on -> pre-0.2.0 rows / non-tool chunks — tolerate absence). `StoredChunk` envelope is UNCHANGED. +> **0.3.0 change (live metrics — see `frontend-metrics-handoff.md` for the full guide):** new +> `TurnStepCompleteEvent` (`type:"step-complete"`) in the `AgentEvent` union with per-step +> `ttftMs?`/`decodeMs?`/`genTotalMs?`; `TurnUsageEvent` gained `stepId?`; `TurnToolResultEvent` +> gained `durationMs?` (tool exec time); `TurnDoneEvent` gained `durationMs?` (turn wall-clock) + +> `usage?` (turn total). All additive/optional — existing handling is unaffected. (0.2.0 added +> `stepId` for tool-call grouping.) ```ts /** @@ -75,17 +76,7 @@ export interface ToolCallChunk { readonly toolCallId: string; readonly toolName: string; readonly input: unknown; - /** - * The step that produced this call — generation provenance stamped by the - * runtime when the model emits the call (NOT storage metadata like `seq`, - * which is why it lives on the chunk and travels with it through persistence - * and replay). Tool calls a model batches together in one step share the same - * `stepId`: the grouping key for rendering a parallel batch as one unit, and - * equal to the `stepId` on the matching `tool-call` AgentEvent. Optional: - * absent on chunks reconstructed outside a turn and on rows persisted before - * this field existed, so a consumer must tolerate its absence (render - * ungrouped). - */ + /** Step grouping key (generation provenance). Optional — tolerate absence. */ readonly stepId?: StepId; } @@ -100,14 +91,7 @@ export interface ToolResultChunk { readonly toolName: string; readonly content: string; readonly isError: boolean; - /** - * The step that produced the originating call — equal to the `stepId` on the - * matching `tool-call` chunk (same `toolCallId`) and on the `tool-result` - * AgentEvent, so a consumer groups a step's calls with their results. - * Generation provenance, not storage metadata (see `ToolCallChunk.stepId`). - * Optional for the same reasons; `reconcile` copies it from the originating - * call onto a synthesized (interrupted) result. - */ + /** Step grouping key — equals the originating call's. Optional. */ readonly stepId?: StepId; } @@ -138,16 +122,10 @@ export interface ChatMessage { } /** - * A persisted chunk plus its sync metadata. The append-only conversation log - * stamps every chunk with a monotonic, gap-free, per-conversation `seq` (the - * sync cursor, assigned in append order) and records the `role` of the message - * it belongs to. This makes a flat seq-ordered stream both incrementally - * syncable ("give me chunks after seq N") and regroupable into messages by the - * client. `chunk` is the content unit — `Chunk` carries no storage/sync cursor - * (`seq` lives here on the envelope, not on the chunk, since it is assigned by - * the store and the provider has no use for it). A chunk MAY still carry - * generation provenance assigned at production time (e.g. a tool chunk's - * `stepId`), which is intrinsic to the content and so travels with it. + * A persisted chunk plus its sync metadata: `{ seq, role, chunk }`. `seq` is the + * per-conversation sync cursor (envelope); a tool chunk's `stepId` rides on + * `chunk` (generation provenance). NOTE: usage/timing metrics are NOT persisted — + * they exist only on the live stream (see `frontend-metrics-handoff.md`). */ export interface StoredChunk { readonly seq: number; @@ -183,6 +161,7 @@ export type AgentEvent = | TurnToolResultEvent | TurnToolOutputEvent | TurnUsageEvent + | TurnStepCompleteEvent | TurnErrorEvent | TurnDoneEvent | TurnSealedEvent; @@ -222,13 +201,7 @@ export interface TurnToolCallEvent { readonly type: "tool-call"; readonly conversationId: string; readonly turnId: string; - /** - * The step that produced this call. Tool calls a model batches together in - * one step share the same `stepId` — the grouping key for rendering a - * parallel batch as one unit. Matches the `stepId` on the matching - * `tool-result` event and on the persisted tool chunk - * (`StoredChunk.chunk.stepId`). - */ + /** Step grouping key (matches the tool-result event + persisted chunk). */ readonly stepId: StepId; readonly toolCallId: string; readonly toolName: string; @@ -240,17 +213,18 @@ export interface TurnToolResultEvent { readonly type: "tool-result"; readonly conversationId: string; readonly turnId: string; - /** - * The step that produced the originating call. Equal to the `stepId` on the - * matching `tool-call` event (same `toolCallId`) and on the persisted tool - * chunk (`StoredChunk.chunk.stepId`), so a client groups a step's calls with - * their results. - */ + /** Step grouping key — equals the matching tool-call's. */ readonly stepId: StepId; readonly toolCallId: string; readonly toolName: string; readonly content: string; readonly isError: boolean; + /** + * How long the tool took to execute (dispatch → result), in milliseconds — + * the backend's authoritative execution time, distinct from any client-side + * wall-clock. Optional: present only when the runtime was given a clock. + */ + readonly durationMs?: number; } /** Streaming output from a tool execution (e.g. shell stdout/stderr). */ @@ -268,9 +242,42 @@ export interface TurnUsageEvent { readonly type: "usage"; readonly conversationId: string; readonly turnId: string; + /** + * The step this usage report belongs to, so a consumer can attribute tokens + * per step (and join with the matching `step-complete` timing by `stepId`). + * Optional: absent when the runtime had no step context. + */ + readonly stepId?: StepId; readonly usage: Usage; } +/** + * A step (one LLM round-trip) has completed — the authoritative per-step metrics + * packet, emitted once at the step's end (after the generation stream finishes), + * so its timing is final (unlike `usage`, which may arrive mid-stream). Carries + * the step's generation timing; join to the step's tokens via `stepId` on the + * `usage` event. All timing fields are optional: present only when the runtime + * was given a clock, and `ttftMs`/`decodeMs` additionally require that a first + * content token (text or reasoning) was observed this step. + */ +export interface TurnStepCompleteEvent { + readonly type: "step-complete"; + readonly conversationId: string; + readonly turnId: string; + readonly stepId: StepId; + /** Time to first token: stream start → first text/reasoning delta. */ + readonly ttftMs?: number; + /** Decode time: first token → stream end (generation total − TTFT). */ + readonly decodeMs?: number; + /** + * Total generation time for the step: stream start → stream end. Present + * whenever a clock was available, even if no first token was seen (then + * `ttftMs`/`decodeMs` are absent). When a first token was seen, + * `genTotalMs === ttftMs + decodeMs`. + */ + readonly genTotalMs?: number; +} + /** An error occurred during the turn. */ export interface TurnErrorEvent { readonly type: "error"; @@ -286,6 +293,16 @@ export interface TurnDoneEvent { readonly conversationId: string; readonly turnId: string; readonly reason: string; + /** + * Total wall-clock duration of the turn (turn start → turn end), in + * milliseconds. Optional: present only when the runtime was given a clock. + */ + readonly durationMs?: number; + /** + * Aggregate token usage across all steps in the turn — a convenience total so + * a consumer need not sum the per-step `usage` events. Optional. + */ + readonly usage?: Usage; } /** diff --git a/scripts/live-probe.ts b/scripts/live-probe.ts index 2c4dfb9..f38c907 100644 --- a/scripts/live-probe.ts +++ b/scripts/live-probe.ts @@ -43,6 +43,13 @@ import { selectMessages, type TranscriptState, } from "../src/core/chunks/index.ts"; +import { + foldMetricEvent, + stepMetrics, + type TelemetryState, + initialState as telemetryInitialState, + turnMetrics, +} from "../src/core/telemetry/index.ts"; import { createConversationCache } from "../src/features/conversation-cache/index.ts"; const WS_URL = process.env.PROBE_WS ?? "ws://localhost:24205"; @@ -87,8 +94,15 @@ async function runTurn( socket: Socket, conversationId: string, prompt: string, -): Promise<{ state: TranscriptState; deltas: number; sealed: boolean; error: string | null }> { +): Promise<{ + state: TranscriptState; + telemetry: TelemetryState; + deltas: number; + sealed: boolean; + error: string | null; +}> { let state = initialState(); + let telemetry = telemetryInitialState(); let deltas = 0; let sealed = false; let error: string | null = null; @@ -102,6 +116,7 @@ async function runTurn( } deltas++; state = foldEvent(state, msg.event); + telemetry = foldMetricEvent(telemetry, msg.event); if (msg.event.type === "turn-sealed") { sealed = true; done.resolve(); @@ -113,7 +128,7 @@ async function runTurn( await done.promise; clearTimeout(timeout); handlers.delete(conversationId); - return { state, deltas, sealed, error }; + return { state, telemetry, deltas, sealed, error }; } function toolChunksOf(state: TranscriptState) { @@ -178,6 +193,44 @@ async function main() { .join(""); record("turn 1 committed transcript has assistant text", committedText.length > 0); + // ─── Turn 1 telemetry: verify step metrics populated ─────────────────────── + const t1Turn = turnMetrics(t1.telemetry, textConv); + const t1StepCount = t1Turn?.steps.length ?? 0; + record("turn 1 telemetry accumulated steps", t1StepCount > 0, `${t1StepCount} step(s)`); + if (t1StepCount > 0) { + const s0 = stepMetrics(t1.telemetry, textConv, 0); + const hasTiming = s0?.genTotalMs !== undefined || s0?.ttftMs !== undefined; + if (hasTiming) { + record( + "turn 1 step 0 has timing metrics", + true, + `ttftMs=${s0?.ttftMs ?? "–"} decodeMs=${s0?.decodeMs ?? "–"} genTotalMs=${s0?.genTotalMs ?? "–"}`, + ); + } else { + note( + "turn 1 step 0 has no timing (backend may not have a clock) — telemetry path verified but no timing to assert", + ); + } + const hasTokens = s0?.usage?.outputTokens !== undefined; + if (hasTokens) { + record( + "turn 1 step 0 has token usage", + true, + `in=${s0?.usage?.inputTokens ?? "–"} out=${s0?.usage?.outputTokens ?? "–"}`, + ); + } else { + note( + "turn 1 step 0 has no usage (stepId may not have been on the usage event) — telemetry path verified", + ); + } + } + const t1Done = t1Turn?.wallMs; + if (t1Done !== undefined) { + record("turn 1 done event recorded wall-clock", true, `${t1Done}ms`); + } else { + note("turn 1 done.durationMs absent (backend clock unavailable)"); + } + // ─── Turn 2: tool-call batching ([email protected] stepId) ───────────────────────── console.log(`\n[live-probe] TURN 2 (tools): "${TOOL_PROMPT}"`); const toolConv = crypto.randomUUID(); @@ -185,6 +238,26 @@ async function main() { if (t2.error !== null) record("turn 2 had no chat.error", false, t2.error); record("turn 2 reached turn-sealed", t2.sealed); + // ─── Turn 2 telemetry: verify step + tool metrics ────────────────────────── + const t2Turn = turnMetrics(t2.telemetry, toolConv); + const t2StepCount = t2Turn?.steps.length ?? 0; + record("turn 2 telemetry accumulated steps", t2StepCount > 0, `${t2StepCount} step(s)`); + if (t2StepCount > 0) { + const s0 = stepMetrics(t2.telemetry, toolConv, 0); + if (s0?.toolDurationMs !== undefined && s0.toolDurationMs > 0) { + record("turn 2 step 0 has tool execution time", true, `toolDurationMs=${s0.toolDurationMs}`); + } else { + note("turn 2 step 0 has no toolDurationMs (tool-result.durationMs may be absent)"); + } + if (s0?.genTotalMs !== undefined) { + record("turn 2 step 0 has generation timing", true, `genTotalMs=${s0.genTotalMs}`); + } + } + const t2Done = t2Turn?.wallMs; + if (t2Done !== undefined) { + record("turn 2 done event recorded wall-clock", true, `${t2Done}ms`); + } + const liveTool = toolChunksOf(t2.state); const liveCalls = liveTool.filter((c) => c.chunk.type === "tool-call"); diff --git a/src/app/App.svelte b/src/app/App.svelte index 61b4cb9..e1d59f9 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -1,6 +1,6 @@ <script lang="ts"> import type { InvokeMessage } from "@dispatch/ui-contract"; - import { ChatView, Composer, ModelSelector } from "../features/chat"; + import { ChatView, Composer, ModelSelector, TurnSummary } from "../features/chat"; import { TabBar } from "../features/tabs"; import { SurfaceView } from "../features/surface-host"; import type { AppStore } from "./store.svelte"; @@ -62,7 +62,15 @@ <div class="flex-1 overflow-y-auto"> {#key store.activeConversationId} - <ChatView chunks={store.activeChat.chunks} /> + <ChatView + chunks={store.activeChat.chunks} + telemetry={store.activeChat.telemetry} + currentTurnId={store.activeChat.currentTurnId} + /> + <TurnSummary + telemetry={store.activeChat.telemetry} + turnId={store.activeChat.currentTurnId} + /> {/key} </div> diff --git a/src/core/chunks/reducer.ts b/src/core/chunks/reducer.ts index 1dcfa39..54b1922 100644 --- a/src/core/chunks/reducer.ts +++ b/src/core/chunks/reducer.ts @@ -148,6 +148,10 @@ export function foldEvent(state: TranscriptState, event: AgentEvent): Transcript case "usage": return { ...state, latestUsage: event.usage }; + case "step-complete": + // Timing metadata — no content chunk; handled by the telemetry reducer. + return state; + case "done": { const provisional = flushAccumulating(state.provisional, state.accumulating); return { diff --git a/src/core/telemetry/index.ts b/src/core/telemetry/index.ts new file mode 100644 index 0000000..a528b0d --- /dev/null +++ b/src/core/telemetry/index.ts @@ -0,0 +1,14 @@ +export { foldMetricEvent, initialState } from "./reducer"; +export { + stepCount, + stepMetrics, + stepToolDuration, + stepTps, + totalDecodeMs, + totalInputTokens, + totalOutputTokens, + turnMetrics, + turnTps, + turnTtft, +} from "./selectors"; +export type { StepMetrics, TelemetryState, TurnMetrics } from "./types"; diff --git a/src/core/telemetry/reducer.test.ts b/src/core/telemetry/reducer.test.ts new file mode 100644 index 0000000..119bf96 --- /dev/null +++ b/src/core/telemetry/reducer.test.ts @@ -0,0 +1,252 @@ +import type { StepId, Usage } from "@dispatch/wire"; +import { describe, expect, it } from "vitest"; +import { foldMetricEvent, initialState } from "./reducer"; +import { + stepCount, + stepMetrics, + stepToolDuration, + stepTps, + totalDecodeMs, + totalInputTokens, + totalOutputTokens, + turnMetrics, + turnTps, + turnTtft, +} from "./selectors"; + +const sid = (s: string) => s as StepId; + +const usage = (turnId: string, stepId: string, u: Usage) => ({ + type: "usage" as const, + conversationId: "c1", + turnId, + stepId: sid(stepId), + usage: u, +}); + +const stepComplete = ( + turnId: string, + stepId: string, + timing: { ttftMs?: number; decodeMs?: number; genTotalMs?: number }, +) => ({ + type: "step-complete" as const, + conversationId: "c1", + turnId, + stepId: sid(stepId), + ...timing, +}); + +describe("foldMetricEvent", () => { + it("turn-start initializes an empty turn", () => { + const s = foldMetricEvent(initialState(), { + type: "turn-start", + conversationId: "c1", + turnId: "t1", + }); + expect(s.turns.get("t1")?.steps).toEqual([]); + }); + + it("step-complete populates timing on a new step", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent( + s, + stepComplete("t1", "s0", { ttftMs: 300, decodeMs: 800, genTotalMs: 1100 }), + ); + + const step = stepMetrics(s, "t1", 0); + expect(step?.ttftMs).toBe(300); + expect(step?.decodeMs).toBe(800); + expect(step?.genTotalMs).toBe(1100); + }); + + it("usage merges tokens into a step (joined by stepId)", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent(s, stepComplete("t1", "s0", { genTotalMs: 500 })); + s = foldMetricEvent(s, usage("t1", "s0", { inputTokens: 100, outputTokens: 50 })); + + const step = stepMetrics(s, "t1", 0); + expect(step?.usage?.inputTokens).toBe(100); + expect(step?.usage?.outputTokens).toBe(50); + expect(step?.genTotalMs).toBe(500); // timing preserved + }); + + it("usage without stepId is ignored", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent(s, { + type: "usage", + conversationId: "c1", + turnId: "t1", + usage: { inputTokens: 100, outputTokens: 50 }, + // no stepId + }); + expect(s.turns.get("t1")?.steps).toEqual([]); + }); + + it("tool-result accumulates durationMs into its step", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent(s, stepComplete("t1", "s0", {})); + s = foldMetricEvent(s, { + type: "tool-result", + conversationId: "c1", + turnId: "t1", + stepId: sid("s0"), + toolCallId: "tc1", + toolName: "bash", + content: "", + isError: false, + durationMs: 120, + }); + s = foldMetricEvent(s, { + type: "tool-result", + conversationId: "c1", + turnId: "t1", + stepId: sid("s0"), + toolCallId: "tc2", + toolName: "bash", + content: "", + isError: false, + durationMs: 80, + }); + + const step = stepMetrics(s, "t1", 0); + expect(step?.toolDurationMs).toBe(200); + }); + + it("done records turn wall-clock and aggregate usage", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent(s, { + type: "done", + conversationId: "c1", + turnId: "t1", + reason: "complete", + durationMs: 4200, + usage: { inputTokens: 800, outputTokens: 200 }, + }); + + const turn = turnMetrics(s, "t1"); + expect(turn?.wallMs).toBe(4200); + expect(turn?.doneUsage?.outputTokens).toBe(200); + }); + + it("events for an unknown turn are handled gracefully (step-complete, usage)", () => { + const s = initialState(); + // step-complete for a turn we haven't started — creates the turn. + const s2 = foldMetricEvent(s, stepComplete("t1", "s0", { ttftMs: 100 })); + expect(s2.turns.get("t1")?.steps[0]?.ttftMs).toBe(100); + }); + + it("multiple steps accumulate in order", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent(s, stepComplete("t1", "s0", { genTotalMs: 100 })); + s = foldMetricEvent(s, stepComplete("t1", "s1", { genTotalMs: 200 })); + + expect(stepCount(s, "t1")).toBe(2); + expect(stepMetrics(s, "t1", 0)?.genTotalMs).toBe(100); + expect(stepMetrics(s, "t1", 1)?.genTotalMs).toBe(200); + }); + + it("non-metric events are no-ops", () => { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent(s, { + type: "text-delta", + conversationId: "c1", + turnId: "t1", + delta: "hi", + }); + s = foldMetricEvent(s, { + type: "turn-sealed", + conversationId: "c1", + turnId: "t1", + }); + expect(s.turns.get("t1")?.steps).toEqual([]); + }); +}); + +describe("selectors — derived metrics", () => { + function populatedState() { + let s = initialState(); + s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" }); + s = foldMetricEvent( + s, + stepComplete("t1", "s0", { ttftMs: 300, decodeMs: 700, genTotalMs: 1000 }), + ); + s = foldMetricEvent(s, usage("t1", "s0", { inputTokens: 500, outputTokens: 100 })); + s = foldMetricEvent( + s, + stepComplete("t1", "s1", { ttftMs: 200, decodeMs: 500, genTotalMs: 700 }), + ); + s = foldMetricEvent(s, usage("t1", "s1", { inputTokens: 600, outputTokens: 80 })); + s = foldMetricEvent(s, { + type: "done", + conversationId: "c1", + turnId: "t1", + reason: "complete", + durationMs: 3500, + usage: { inputTokens: 1100, outputTokens: 180 }, + }); + return s; + } + + it("stepTps = outputTokens / (decodeMs / 1000)", () => { + const s = populatedState(); + const step = stepMetrics(s, "t1", 0)!; + expect(stepTps(step)).toBeCloseTo(100 / 0.7, 2); + }); + + it("turnTtft returns first step's ttftMs", () => { + expect(turnTtft(populatedState(), "t1")).toBe(300); + }); + + it("totalDecodeMs sums all steps' decodeMs", () => { + expect(totalDecodeMs(populatedState(), "t1")).toBe(1200); + }); + + it("turnTps = outputTokens / (totalDecodeMs / 1000)", () => { + const s = populatedState(); + expect(turnTps(s, "t1")).toBeCloseTo(180 / 1.2, 2); + }); + + it("totalOutputTokens prefers done.usage over step sum", () => { + const s = populatedState(); + expect(totalOutputTokens(s, "t1")).toBe(180); // from done.usage + }); + + it("totalInputTokens prefers done.usage over step sum", () => { + const s = populatedState(); + expect(totalInputTokens(s, "t1")).toBe(1100); + }); + + it("stepToolDuration returns sum only when > 0", () => { + const withTools = foldMetricEvent( + foldMetricEvent(initialState(), { type: "turn-start", conversationId: "c1", turnId: "t1" }), + { + type: "tool-result", + conversationId: "c1", + turnId: "t1", + stepId: sid("s0"), + toolCallId: "tc1", + toolName: "bash", + content: "", + isError: false, + durationMs: 50, + }, + ); + const step = stepMetrics(withTools, "t1", 0)!; + expect(stepToolDuration(step)).toBe(50); + expect(stepToolDuration({ stepId: sid("s0") })).toBeUndefined(); + }); + + it("returns undefined for absent fields gracefully", () => { + const s = initialState(); + expect(turnMetrics(s, "missing")).toBeUndefined(); + expect(turnTtft(s, "missing")).toBeUndefined(); + expect(turnTps(s, "missing")).toBeUndefined(); + }); +}); diff --git a/src/core/telemetry/reducer.ts b/src/core/telemetry/reducer.ts new file mode 100644 index 0000000..4083231 --- /dev/null +++ b/src/core/telemetry/reducer.ts @@ -0,0 +1,122 @@ +import type { AgentEvent, StepId, Usage } from "@dispatch/wire"; +import type { StepMetrics, TelemetryState, TurnMetrics } from "./types"; + +/** The initial empty telemetry state. */ +export function initialState(): TelemetryState { + return { turns: new Map() }; +} + +function mergeStep(existing: StepMetrics, patch: StepMetrics): StepMetrics { + const merged: StepMetrics = { ...existing }; + if (patch.ttftMs !== undefined) (merged as { ttftMs?: number }).ttftMs = patch.ttftMs; + if (patch.decodeMs !== undefined) (merged as { decodeMs?: number }).decodeMs = patch.decodeMs; + if (patch.genTotalMs !== undefined) + (merged as { genTotalMs?: number }).genTotalMs = patch.genTotalMs; + if (patch.usage !== undefined) { + (merged as { usage?: Usage }).usage = { ...existing.usage, ...patch.usage }; + } + if (patch.toolDurationMs !== undefined) { + (merged as { toolDurationMs?: number }).toolDurationMs = + (existing.toolDurationMs ?? 0) + patch.toolDurationMs; + } + return merged; +} + +function upsertStep( + steps: readonly StepMetrics[], + stepId: StepId, + patch: StepMetrics, +): readonly StepMetrics[] { + const idx = steps.findIndex((s) => s.stepId === stepId); + if (idx === -1) { + return [...steps, patch]; + } + return [...steps.slice(0, idx), mergeStep(steps[idx]!, patch), ...steps.slice(idx + 1)]; +} + +function setTurn( + turns: ReadonlyMap<string, TurnMetrics>, + turnId: string, + turn: TurnMetrics, +): ReadonlyMap<string, TurnMetrics> { + const next = new Map(turns); + next.set(turnId, turn); + return next; +} + +/** + * Fold one live AgentEvent into the telemetry state. + * + * - `turn-start` records the active turnId. + * - `step-complete` creates/updates the step's timing metrics. + * - `usage` merges token counts into the step (joined by `stepId`). + * - `tool-result` accumulates `durationMs` into the step. + * - `done` records turn-level wall-clock + token totals. + * - All other event types are no-ops (content events belong to the transcript). + * + * Pure: input → output, no DOM, no side effects. + */ +export function foldMetricEvent(state: TelemetryState, event: AgentEvent): TelemetryState { + switch (event.type) { + case "turn-start": { + return { + ...state, + turns: setTurn(state.turns, event.turnId, { steps: [] }), + }; + } + + case "step-complete": { + const turnId = event.turnId; + const existing = state.turns.get(turnId); + const patch: StepMetrics = { stepId: event.stepId }; + if (event.ttftMs !== undefined) (patch as { ttftMs?: number }).ttftMs = event.ttftMs; + if (event.decodeMs !== undefined) (patch as { decodeMs?: number }).decodeMs = event.decodeMs; + if (event.genTotalMs !== undefined) + (patch as { genTotalMs?: number }).genTotalMs = event.genTotalMs; + const steps = + existing !== undefined ? upsertStep(existing.steps, event.stepId, patch) : [patch]; + return { + ...state, + turns: setTurn(state.turns, turnId, { ...existing, steps } as TurnMetrics), + }; + } + + case "usage": { + if (event.stepId === undefined) return state; + const turnId = event.turnId; + const existing = state.turns.get(turnId); + const patch: StepMetrics = { stepId: event.stepId, usage: event.usage }; + const steps = + existing !== undefined ? upsertStep(existing.steps, event.stepId, patch) : [patch]; + return { + ...state, + turns: setTurn(state.turns, turnId, { ...existing, steps } as TurnMetrics), + }; + } + + case "tool-result": { + if (event.durationMs === undefined) return state; + const turnId = event.turnId; + const existing = state.turns.get(turnId); + if (existing === undefined) return state; + const patch: StepMetrics = { stepId: event.stepId, toolDurationMs: event.durationMs }; + const steps = upsertStep(existing.steps, event.stepId, patch); + return { ...state, turns: setTurn(state.turns, turnId, { ...existing, steps }) }; + } + + case "done": { + const turnId = event.turnId; + const existing = state.turns.get(turnId); + const updated: TurnMetrics = { + ...(existing ?? { steps: [] }), + }; + if (event.durationMs !== undefined) + (updated as { wallMs?: number }).wallMs = event.durationMs; + if (event.usage !== undefined) (updated as { doneUsage?: Usage }).doneUsage = event.usage; + return { ...state, turns: setTurn(state.turns, turnId, updated) }; + } + + default: + return state; + } +} diff --git a/src/core/telemetry/selectors.ts b/src/core/telemetry/selectors.ts new file mode 100644 index 0000000..ecf1794 --- /dev/null +++ b/src/core/telemetry/selectors.ts @@ -0,0 +1,95 @@ +import type { Usage } from "@dispatch/wire"; +import type { StepMetrics, TelemetryState, TurnMetrics } from "./types"; + +/** Get the metrics for a specific step within a turn. */ +export function stepMetrics( + state: TelemetryState, + turnId: string, + stepIndex: number, +): StepMetrics | undefined { + return state.turns.get(turnId)?.steps[stepIndex]; +} + +/** Get the metrics for a turn. */ +export function turnMetrics(state: TelemetryState, turnId: string): TurnMetrics | undefined { + return state.turns.get(turnId); +} + +/** The number of steps in a turn. */ +export function stepCount(state: TelemetryState, turnId: string): number { + return state.turns.get(turnId)?.steps.length ?? 0; +} + +/** TTFT of the first step in a turn (the turn-visible first-token latency). */ +export function turnTtft(state: TelemetryState, turnId: string): number | undefined { + return state.turns.get(turnId)?.steps[0]?.ttftMs; +} + +/** Sum of all steps' decode times in a turn. */ +export function totalDecodeMs(state: TelemetryState, turnId: string): number | undefined { + const steps = state.turns.get(turnId)?.steps; + if (steps === undefined || steps.length === 0) return undefined; + let total = 0; + let found = false; + for (const s of steps) { + if (s.decodeMs !== undefined) { + total += s.decodeMs; + found = true; + } + } + return found ? total : undefined; +} + +/** Aggregate output tokens across all steps in a turn. */ +export function totalOutputTokens(state: TelemetryState, turnId: string): number | undefined { + const turn = state.turns.get(turnId); + if (turn === undefined) return undefined; + if (turn.doneUsage !== undefined) return turn.doneUsage.outputTokens; + let total = 0; + let found = false; + for (const s of turn.steps) { + if (s.usage?.outputTokens !== undefined) { + total += s.usage.outputTokens; + found = true; + } + } + return found ? total : undefined; +} + +/** Aggregate input tokens across all steps in a turn. */ +export function totalInputTokens(state: TelemetryState, turnId: string): number | undefined { + const turn = state.turns.get(turnId); + if (turn === undefined) return undefined; + if (turn.doneUsage !== undefined) return turn.doneUsage.inputTokens; + let total = 0; + let found = false; + for (const s of turn.steps) { + if (s.usage?.inputTokens !== undefined) { + total += s.usage.inputTokens; + found = true; + } + } + return found ? total : undefined; +} + +/** Derived TPS for a step: outputTokens / (decodeMs / 1000). */ +export function stepTps(step: StepMetrics): number | undefined { + if (step.usage?.outputTokens === undefined || step.decodeMs === undefined) return undefined; + if (step.decodeMs === 0) return undefined; + return step.usage.outputTokens / (step.decodeMs / 1000); +} + +/** Derived aggregate TPS for a turn. */ +export function turnTps(state: TelemetryState, turnId: string): number | undefined { + const outTokens = totalOutputTokens(state, turnId); + const decode = totalDecodeMs(state, turnId); + if (outTokens === undefined || decode === undefined || decode === 0) return undefined; + return outTokens / (decode / 1000); +} + +/** Sum of tool execution durations within a step. */ +export function stepToolDuration(step: StepMetrics): number | undefined { + return step.toolDurationMs !== undefined && step.toolDurationMs > 0 + ? step.toolDurationMs + : undefined; +} diff --git a/src/core/telemetry/types.ts b/src/core/telemetry/types.ts new file mode 100644 index 0000000..395ec93 --- /dev/null +++ b/src/core/telemetry/types.ts @@ -0,0 +1,35 @@ +import type { StepId, Usage } from "@dispatch/wire"; + +/** + * Per-step metrics, accumulated from `step-complete` + `usage` events. + * All fields optional — absent when the backend had no clock or the step + * produced no text/reasoning token. + */ +export interface StepMetrics { + readonly stepId: StepId; + readonly ttftMs?: number; + readonly decodeMs?: number; + readonly genTotalMs?: number; + readonly usage?: Usage; + readonly toolDurationMs?: number; // sum of tool-result.durationMs in this step +} + +/** + * Per-turn metrics, accumulated from `done` events + per-step aggregation. + */ +export interface TurnMetrics { + readonly wallMs?: number; + readonly doneUsage?: Usage; + readonly steps: readonly StepMetrics[]; +} + +/** + * Pure telemetry state — lives alongside but separate from TranscriptState. + * Accumulates live-only metric events; never persisted (history has no metrics). + * No "active turn" tracking — the consumer (store) passes the relevant turnId + * to the selectors. Pure: events flow in, derived values flow out. + */ +export interface TelemetryState { + /** turnId → TurnMetrics. Multiple turns accumulate (tab switching). */ + readonly turns: ReadonlyMap<string, TurnMetrics>; +} diff --git a/src/core/wire/conformance.test.ts b/src/core/wire/conformance.test.ts index 50b7f35..690ba4e 100644 --- a/src/core/wire/conformance.test.ts +++ b/src/core/wire/conformance.test.ts @@ -62,6 +62,15 @@ describe("classifies every AgentEvent type", () => { turnId: "t1", usage: { inputTokens: 10, outputTokens: 20 }, }, + { + type: "step-complete", + conversationId: "c1", + turnId: "t1", + stepId: "t1#0" as StepId, + ttftMs: 300, + decodeMs: 700, + genTotalMs: 1000, + }, { type: "error", conversationId: "c1", turnId: "t1", message: "oops" }, { type: "done", conversationId: "c1", turnId: "t1", reason: "complete" }, { type: "turn-sealed", conversationId: "c1", turnId: "t1" }, @@ -78,14 +87,15 @@ describe("classifies every AgentEvent type", () => { "tool-result", "tool-output", "usage", + "step-complete", "error", "done", "turn-sealed", ]); }); - it("covers all 11 AgentEvent variants", () => { - expect(samples).toHaveLength(11); + it("covers all 12 AgentEvent variants", () => { + expect(samples).toHaveLength(12); }); }); diff --git a/src/core/wire/conformance.ts b/src/core/wire/conformance.ts index 5d75a60..d89772e 100644 --- a/src/core/wire/conformance.ts +++ b/src/core/wire/conformance.ts @@ -30,6 +30,8 @@ export function assertAgentEventExhaustive(event: AgentEvent): string { return "done"; case "turn-sealed": return "turn-sealed"; + case "step-complete": + return "step-complete"; default: return event satisfies never; } diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts index 4f2091a..b096cca 100644 --- a/src/features/chat/index.ts +++ b/src/features/chat/index.ts @@ -1,8 +1,10 @@ export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chunks"; export { groupRenderedChunks } from "../../core/chunks"; +export type { StepMetrics, TelemetryState, TurnMetrics } from "../../core/telemetry"; export type { ChatTransport, HistorySync } from "./ports"; export type { ChatStore, ChatStoreDependencies } from "./store.svelte"; export { createChatStore } from "./store.svelte"; export { default as ChatView } from "./ui/ChatView.svelte"; export { default as Composer } from "./ui/Composer.svelte"; export { default as ModelSelector } from "./ui/ModelSelector.svelte"; +export { default as TurnSummary } from "./ui/TurnSummary.svelte"; diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts index 1d8ab17..58c165f 100644 --- a/src/features/chat/store.svelte.ts +++ b/src/features/chat/store.svelte.ts @@ -13,6 +13,8 @@ import { selectChunks, selectMessages, } from "../../core/chunks"; +import type { TelemetryState } from "../../core/telemetry"; +import { foldMetricEvent, initialState as telemetryInitialState } from "../../core/telemetry"; import type { ConversationCache } from "../conversation-cache"; import type { ChatTransport, HistorySync } from "./ports"; @@ -30,6 +32,8 @@ export interface ChatStore { readonly pendingSync: boolean; readonly error: string | null; readonly model: string | undefined; + readonly telemetry: TelemetryState; + readonly currentTurnId: string | null; handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void; send(text: string): void; setModel(model: string): void; @@ -42,6 +46,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { let _pendingSync = $state(false); let _error = $state<string | null>(null); let _model = $state<string | undefined>(deps.model); + let telemetry = $state<TelemetryState>(telemetryInitialState()); let disposed = false; async function syncTail(): Promise<void> { @@ -76,6 +81,12 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { get model(): string | undefined { return _model; }, + get telemetry(): TelemetryState { + return telemetry; + }, + get currentTurnId(): string | null { + return transcript.currentTurnId; + }, handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void { if (msg.type === "chat.error") { @@ -89,6 +100,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { return; } transcript = foldEvent(transcript, msg.event); + telemetry = foldMetricEvent(telemetry, msg.event); if (transcript.sealedTurnId !== null) { void syncTail(); } diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts index 71781ac..347cdd7 100644 --- a/src/features/chat/store.test.ts +++ b/src/features/chat/store.test.ts @@ -393,6 +393,52 @@ describe("createChatStore", () => { store.dispose(); }); + it("folding step-complete and usage events populates telemetry", () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const cache = createFakeCache(); + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + cache: cache.impl, + }); + + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta( + deltaEvent({ + type: "step-complete", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + ttftMs: 300, + decodeMs: 700, + genTotalMs: 1000, + }), + ); + store.handleDelta( + deltaEvent({ + type: "usage", + conversationId: CONV_ID, + turnId: "t1", + stepId: "t1#0" as StepId, + usage: { inputTokens: 50, outputTokens: 20 }, + }), + ); + + const turn = store.telemetry.turns.get("t1"); + expect(turn).toBeDefined(); + expect(turn?.steps).toHaveLength(1); + const step = turn?.steps.find((s) => s.stepId === ("t1#0" as StepId)); + expect(step).toBeDefined(); + expect(step?.ttftMs).toBe(300); + expect(step?.decodeMs).toBe(700); + expect(step?.usage?.inputTokens).toBe(50); + expect(step?.usage?.outputTokens).toBe(20); + + store.dispose(); + }); + it("handleDelta ignores a chat.delta for a different conversationId", () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts index b31cbf1..02d3c5a 100644 --- a/src/features/chat/ui.test.ts +++ b/src/features/chat/ui.test.ts @@ -3,9 +3,15 @@ import { render, screen } from "@testing-library/svelte"; import userEvent from "@testing-library/user-event"; import { describe, expect, it, vi } from "vitest"; import type { RenderedChunk } from "../../core/chunks"; +import type { TelemetryState } from "../../core/telemetry"; +import { initialState } from "../../core/telemetry"; import ChatView from "./ui/ChatView.svelte"; import Composer from "./ui/Composer.svelte"; import ModelSelector from "./ui/ModelSelector.svelte"; +import TurnSummary from "./ui/TurnSummary.svelte"; + +const emptyTelemetry = initialState(); +const noTurnId = null; describe("ChatView", () => { it("renders a message's text chunk", () => { @@ -18,7 +24,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); expect(screen.getByText("Hello world")).toBeInTheDocument(); }); @@ -34,7 +40,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); expect(screen.getByText("Hi there")).toBeInTheDocument(); expect(screen.getByText("Hello!")).toBeInTheDocument(); @@ -55,7 +61,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); expect(screen.getByText("read_file")).toBeInTheDocument(); const pre = screen.getByText((content, element) => { @@ -80,7 +86,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); expect(screen.getByText("read_file")).toBeInTheDocument(); expect(screen.getByText("file contents here")).toBeInTheDocument(); @@ -96,7 +102,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); const alert = screen.getByRole("alert"); expect(alert).toHaveTextContent("Something failed"); @@ -112,7 +118,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); expect(screen.getByText("Rate limited")).toBeInTheDocument(); expect(screen.getByText("[RATE_LIMIT]")).toBeInTheDocument(); @@ -128,7 +134,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); expect(screen.getByText("System context loaded")).toBeInTheDocument(); }); @@ -143,7 +149,7 @@ describe("ChatView", () => { }, ]; - render(ChatView, { props: { chunks } }); + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } }); // In-flight chunks render at full opacity (no faded "disabled" look). const wrapper = screen.getByText("Streaming...").closest("div"); @@ -151,7 +157,7 @@ describe("ChatView", () => { }); it("renders empty transcript", () => { - render(ChatView, { props: { chunks: [] } }); + render(ChatView, { props: { chunks: [], telemetry: emptyTelemetry, currentTurnId: noTurnId } }); const log = screen.getByRole("log"); expect(log).toBeInTheDocument(); @@ -199,7 +205,9 @@ describe("ChatView", () => { }, ]; - const { container } = render(ChatView, { props: { chunks } }); + const { container } = render(ChatView, { + props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId }, + }); // One DaisyUI list with two rows (one per call), not separate cards. const lists = container.querySelectorAll("ul.list"); @@ -224,7 +232,9 @@ describe("ChatView", () => { }, ]; - const { container } = render(ChatView, { props: { chunks } }); + const { container } = render(ChatView, { + props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId }, + }); const collapse = container.querySelector(".collapse"); expect(collapse).not.toBeNull(); @@ -247,7 +257,9 @@ describe("ChatView", () => { }, ]; - const { container, rerender } = render(ChatView, { props: { chunks: streaming } }); + const { container, rerender } = render(ChatView, { + props: { chunks: streaming, telemetry: emptyTelemetry, currentTurnId: noTurnId }, + }); // Streaming: "Thinking" + loading dots. expect(screen.getByText("Thinking")).toBeInTheDocument(); @@ -269,6 +281,8 @@ describe("ChatView", () => { provisional: false, }, ], + telemetry: emptyTelemetry, + currentTurnId: noTurnId, }); // Completed: "Thoughts", no dots — and the open state survived the transition. @@ -278,6 +292,118 @@ describe("ChatView", () => { expect(screen.getByRole("checkbox", { name: "Toggle thoughts" })).toBeChecked(); expect(container).toHaveTextContent("hmm, all done"); }); + + it("assistant text shows step metrics footer when step-complete data is available", () => { + const chunks: RenderedChunk[] = [ + { + seq: 1, + role: "assistant", + chunk: { type: "text", text: "Here is my answer" }, + provisional: false, + }, + ]; + + const telemetry: TelemetryState = { + turns: new Map([ + [ + "turn-1", + { + wallMs: 2500, + steps: [ + { + stepId: "turn-1#0" as StepId, + genTotalMs: 1200, + decodeMs: 1000, + usage: { inputTokens: 100, outputTokens: 86 }, + }, + ], + }, + ], + ]), + }; + + render(ChatView, { props: { chunks, telemetry, currentTurnId: "turn-1" } }); + + expect(screen.getByText("Here is my answer")).toBeInTheDocument(); + expect(screen.getByText("1.2s")).toBeInTheDocument(); + expect(screen.getByText("86 t/s")).toBeInTheDocument(); + expect(screen.getByText("86 tok")).toBeInTheDocument(); + }); + + it("does not show metrics footer when no step data exists", () => { + const chunks: RenderedChunk[] = [ + { + seq: 1, + role: "assistant", + chunk: { type: "text", text: "Still streaming" }, + provisional: true, + }, + ]; + + render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: "turn-1" } }); + + expect(screen.getByText("Still streaming")).toBeInTheDocument(); + expect(screen.queryByText("t/s")).toBeNull(); + expect(screen.queryByText("tok")).toBeNull(); + }); +}); + +describe("TurnSummary", () => { + it("renders turn stats when telemetry has data", () => { + const telemetry: TelemetryState = { + turns: new Map([ + [ + "turn-1", + { + wallMs: 4200, + steps: [ + { + stepId: "turn-1#0" as StepId, + genTotalMs: 2000, + decodeMs: 1500, + usage: { inputTokens: 500, outputTokens: 300 }, + }, + { + stepId: "turn-1#1" as StepId, + genTotalMs: 1800, + decodeMs: 1200, + usage: { inputTokens: 600, outputTokens: 200 }, + }, + ], + }, + ], + ]), + }; + + render(TurnSummary, { props: { telemetry, turnId: "turn-1" } }); + + expect(screen.getByText("Turn")).toBeInTheDocument(); + expect(screen.getByText("4.2s")).toBeInTheDocument(); + expect(screen.getByText("Tokens")).toBeInTheDocument(); + expect(screen.getByText("1,600")).toBeInTheDocument(); + expect(screen.getByText("Output")).toBeInTheDocument(); + expect(screen.getByText("500")).toBeInTheDocument(); + expect(screen.getByText("Input")).toBeInTheDocument(); + expect(screen.getByText("1,100")).toBeInTheDocument(); + expect(screen.getByText("Steps")).toBeInTheDocument(); + expect(screen.getByText("2")).toBeInTheDocument(); + expect(screen.getByText("TPS")).toBeInTheDocument(); + expect(screen.getByText("185 t/s")).toBeInTheDocument(); + }); + + it("renders nothing when turnId is null", () => { + const { container } = render(TurnSummary, { + props: { telemetry: emptyTelemetry, turnId: null }, + }); + expect(container.querySelector(".stats")).toBeNull(); + }); + + it("renders nothing when turn metrics not found", () => { + const { container } = render(TurnSummary, { + props: { telemetry: emptyTelemetry, turnId: "nonexistent" }, + }); + expect(container.querySelector(".stats")).toBeNull(); + }); }); describe("Composer", () => { diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte index 3a078fb..6acda53 100644 --- a/src/features/chat/ui/ChatView.svelte +++ b/src/features/chat/ui/ChatView.svelte @@ -1,16 +1,27 @@ <script lang="ts"> import { groupRenderedChunks, type RenderedChunk } from "../index"; + import type { TelemetryState } from "../../../core/telemetry"; + import { stepMetrics, stepTps } from "../../../core/telemetry"; - let { chunks }: { chunks: readonly RenderedChunk[] } = $props(); + interface Props { + chunks: readonly RenderedChunk[]; + telemetry: TelemetryState; + currentTurnId: string | null; + } + + let { chunks, telemetry, currentTurnId }: Props = $props(); const groups = $derived(groupRenderedChunks(chunks)); - // Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that - // survives the provisional→committed (seq null → seq N) transition, so the - // collapse's open/close state is NOT lost when a turn seals. (App isolates - // these keys per conversation via {#key}.) + function formatMs(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + const s = ms / 1000; + return s < 60 ? `${s.toFixed(1)}s` : `${Math.floor(s / 60)}m${Math.round(s % 60)}s`; + } + const rows = $derived.by(() => { let thinking = 0; + let stepIdx = 0; return groups.map((group, i) => { let key: string; if (group.kind === "tool-batch") { @@ -22,14 +33,17 @@ } else { key = `p${i}`; } - return { group, key }; + const si = stepIdx; + if (group.kind === "tool-batch" || (group.kind === "single" && (group.chunk.chunk.type === "tool-call" || group.chunk.chunk.type === "tool-result"))) { + stepIdx++; + } + return { group, key, stepIdx: si }; }); }); </script> -{#snippet chunkRow(rendered: RenderedChunk)} +{#snippet chunkRow(rendered: RenderedChunk, sIdx: number)} {#if rendered.role === "user"} - <!-- User: a speech bubble, left-aligned --> <div class="chat chat-start"> <div class="chat-bubble chat-bubble-primary"> {#if rendered.chunk.type === "text"} @@ -38,9 +52,6 @@ </div> </div> {:else if rendered.chunk.type === "thinking"} - <!-- Thinking: a visible bubble (like tool cards), holding a checkbox collapse - (no arrow icon, smooth open/close). Title reads "Thinking" + loading dots - while generating, then "Thoughts" with no dots once complete. --> <div class="chat chat-start [&>.chat-bubble]:max-w-5xl [&>.chat-bubble]:p-0"> <div class="chat-bubble w-full bg-transparent"> <div class="collapse w-full rounded-box bg-base-200 text-sm"> @@ -58,14 +69,18 @@ </div> </div> {:else if rendered.chunk.type === "tool-call" || rendered.chunk.type === "tool-result"} - <!-- Single tool call/result: a regular (non-speech) card. Nested in the - chat-start grid via a transparent, padding-stripped chat-bubble shim so - the card inherits the same left offset as the bubble bodies. --> + {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, sIdx) : undefined} + {@const toolDur = step?.toolDurationMs} <div class="chat chat-start [&>.chat-bubble]:max-w-full [&>.chat-bubble]:p-0"> <div class="chat-bubble bg-transparent"> {#if rendered.chunk.type === "tool-call"} <div class="w-fit max-w-full rounded-box bg-base-200 p-3 text-sm"> - <strong>{rendered.chunk.toolName}</strong> + <div class="flex items-center gap-2"> + <strong>{rendered.chunk.toolName}</strong> + {#if toolDur !== undefined && toolDur > 0} + <span class="badge badge-ghost badge-xs ml-auto">{formatMs(toolDur)}</span> + {/if} + </div> <pre class="text-xs mt-1">{JSON.stringify(rendered.chunk.input, null, 2)}</pre> </div> {:else} @@ -73,19 +88,43 @@ class="w-fit max-w-full rounded-box bg-base-200 p-3 text-sm" class:text-error={rendered.chunk.isError} > - <strong>{rendered.chunk.toolName}</strong> + <div class="flex items-center gap-2"> + <strong>{rendered.chunk.toolName}</strong> + {#if toolDur !== undefined && toolDur > 0} + <span class="badge badge-ghost badge-xs ml-auto">{formatMs(toolDur)}</span> + {/if} + </div> <pre class="text-xs mt-1">{rendered.chunk.content}</pre> </div> {/if} </div> </div> {:else} - <!-- Assistant text / system / error: an INVISIBLE speech bubble — same - chat-start grid as the user bubble, so it inherits identical left spacing. --> + {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, sIdx) : undefined} + {@const tps = step ? stepTps(step) : undefined} <div class="chat chat-start [&>.chat-bubble]:max-w-5xl"> <div class="chat-bubble w-full bg-transparent"> {#if rendered.chunk.type === "text"} - <p>{rendered.chunk.text}</p> + <ul class="list rounded-box text-sm"> + <li class="list-row"> + <p>{rendered.chunk.text}</p> + </li> + {#if step && (step.genTotalMs !== undefined || tps !== undefined || step.usage?.outputTokens !== undefined)} + <li class="list-row"> + {#if step.genTotalMs !== undefined} + <span class="badge badge-ghost badge-xs">{formatMs(step.genTotalMs)}</span> + {/if} + <span>·</span> + {#if tps !== undefined} + <span class="badge badge-ghost badge-xs">{Math.round(tps)} t/s</span> + {/if} + <span>·</span> + {#if step.usage?.outputTokens !== undefined} + <span class="badge badge-ghost badge-xs">{step.usage.outputTokens} tok</span> + {/if} + </li> + {/if} + </ul> {:else if rendered.chunk.type === "error"} <div class="text-error" role="alert"> {rendered.chunk.message} @@ -102,20 +141,24 @@ {/snippet} <div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite"> - {#each rows as { group, key } (key)} + {#each rows as { group, key, stepIdx } (key)} {#if group.kind === "single"} - {@render chunkRow(group.chunk)} + {@render chunkRow(group.chunk, stepIdx)} {:else} - <!-- Batched tool calls (one step): a single bubble holding a DaisyUI list, - one row per call paired with its result. Same chat-start grid shim as - the single tool card so it lines up with the other messages. --> + {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, stepIdx) : undefined} + {@const toolDur = step?.toolDurationMs} <div class="chat chat-start [&>.chat-bubble]:max-w-full [&>.chat-bubble]:p-0"> <div class="chat-bubble bg-transparent"> <ul class="list w-fit max-w-full rounded-box bg-base-200 text-sm"> {#each group.entries as entry (entry.call.toolCallId)} <li class="list-row"> <div> - <strong>{entry.call.toolName}</strong> + <div class="flex items-center gap-2"> + <strong>{entry.call.toolName}</strong> + {#if toolDur !== undefined && toolDur > 0} + <span class="badge badge-ghost badge-xs ml-auto">{formatMs(toolDur)}</span> + {/if} + </div> <pre class="text-xs mt-1">{JSON.stringify(entry.call.input, null, 2)}</pre> {#if entry.result} <pre diff --git a/src/features/chat/ui/TurnSummary.svelte b/src/features/chat/ui/TurnSummary.svelte new file mode 100644 index 0000000..eedb0cc --- /dev/null +++ b/src/features/chat/ui/TurnSummary.svelte @@ -0,0 +1,75 @@ +<script lang="ts"> + import type { TelemetryState } from "../../../core/telemetry"; + import { + stepCount, + totalInputTokens, + totalOutputTokens, + turnMetrics, + turnTps, + } from "../../../core/telemetry"; + + interface Props { + telemetry: TelemetryState; + turnId: string | null; + } + + let { telemetry, turnId }: Props = $props(); + + function formatMs(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + const s = ms / 1000; + return s < 60 ? `${s.toFixed(1)}s` : `${Math.floor(s / 60)}m${Math.round(s % 60)}s`; + } + + const stats = $derived.by(() => { + if (turnId === null) return null; + const metrics = turnMetrics(telemetry, turnId); + if (metrics === undefined) return null; + + const items: { label: string; value: string }[] = []; + + if (metrics.wallMs !== undefined) { + items.push({ label: "Turn", value: formatMs(metrics.wallMs) }); + } + + const outTokens = totalOutputTokens(telemetry, turnId); + const inTokens = totalInputTokens(telemetry, turnId); + if (outTokens !== undefined || inTokens !== undefined) { + const total = (outTokens ?? 0) + (inTokens ?? 0); + items.push({ label: "Tokens", value: total.toLocaleString() }); + } + if (outTokens !== undefined) { + items.push({ label: "Output", value: outTokens.toLocaleString() }); + } + if (inTokens !== undefined) { + items.push({ label: "Input", value: inTokens.toLocaleString() }); + } + + const count = stepCount(telemetry, turnId); + if (count > 0) { + items.push({ label: "Steps", value: String(count) }); + } + + const tps = turnTps(telemetry, turnId); + if (tps !== undefined) { + items.push({ label: "TPS", value: `${Math.round(tps)} t/s` }); + } + + return items; + }); +</script> + +{#if stats !== null} + <div class="chat chat-start [&>.chat-bubble]:max-w-5xl"> + <div class="chat-bubble w-full bg-transparent"> + <div class="stats stats-vertical lg:stats-horizontal"> + {#each stats as stat} + <div class="stat"> + <div class="stat-title">{stat.label}</div> + <div class="stat-value text-sm">{stat.value}</div> + </div> + {/each} + </div> + </div> + </div> +{/if} |
