summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-07 18:41:27 +0900
committerAdam Malczewski <[email protected]>2026-06-07 18:41:27 +0900
commit48c6d85c3cc5a57a729f14068e2346b17ed62088 (patch)
treeec56590653f399f4a5feae0245652eba8f352ad5
parent2e79dd122e5664353e02e0d33715ae8c1041a379 (diff)
downloaddispatch-web-48c6d85c3cc5a57a729f14068e2346b17ed62088.tar.gz
dispatch-web-48c6d85c3cc5a57a729f14068e2346b17ed62088.zip
feat(chat): live turn metrics — telemetry reducer + rendering
Consume wire/transport-contract 0.3.0 (step-complete event + timing fields on usage/tool-result/done). Pure core/telemetry module: foldMetricEvent (reducer) + derived selectors (stepTps, turnTps, etc). TelemetryState is pure data, no active-turn tracking — consumers pass turnId to selectors. ChatStore wires foldMetricEvent into handleDelta and exposes telemetry + currentTurnId. ChatView shows step-metrics footer (time/TPS/tokens) on assistant text bubbles and durationMs badge on tool cards. New TurnSummary component renders turn-level stats (wall-clock, tokens, steps, TPS) in a DaisyUI stats block. Extended live-probe to verify telemetry events against bin/up (pending backend restart). 336 tests, typecheck 0, biome clean, build ok.
-rw-r--r--.dispatch/transport-contract.reference.md14
-rw-r--r--.dispatch/wire.reference.md113
-rw-r--r--scripts/live-probe.ts77
-rw-r--r--src/app/App.svelte12
-rw-r--r--src/core/chunks/reducer.ts4
-rw-r--r--src/core/telemetry/index.ts14
-rw-r--r--src/core/telemetry/reducer.test.ts252
-rw-r--r--src/core/telemetry/reducer.ts122
-rw-r--r--src/core/telemetry/selectors.ts95
-rw-r--r--src/core/telemetry/types.ts35
-rw-r--r--src/core/wire/conformance.test.ts14
-rw-r--r--src/core/wire/conformance.ts2
-rw-r--r--src/features/chat/index.ts2
-rw-r--r--src/features/chat/store.svelte.ts12
-rw-r--r--src/features/chat/store.test.ts46
-rw-r--r--src/features/chat/ui.test.ts150
-rw-r--r--src/features/chat/ui/ChatView.svelte93
-rw-r--r--src/features/chat/ui/TurnSummary.svelte75
18 files changed, 1034 insertions, 98 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index fcc2cbf..ef0235a 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -5,15 +5,15 @@
> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
> this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes.
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract`
+> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes.
+> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract`
> (see `ui-contract.reference.md`).
>
-> **0.2.0 change (step grouping):** no shape change HERE — this contract's own types are
-> identical. It only re-exports the bumped `@dispatch/wire`, whose `AgentEvent` tool variants
-> now carry a required `stepId` and whose tool `Chunk`s carry an optional `stepId`. The
-> `chat.delta` events streamed over WS and the `ConversationHistoryResponse.chunks` you already
-> consume therefore now carry the step grouping key (see `wire.reference.md`).
+> **0.3.0 change (live metrics):** no shape change HERE — this contract's own types are identical.
+> It re-exports the bumped `@dispatch/wire`, whose `AgentEvent` union gained a `step-complete`
+> variant and timing fields on `usage`/`tool-result`/`done`. So the `chat.delta` events you stream
+> over WS now also carry the live metrics. See `frontend-metrics-handoff.md` for the full guide.
+> (0.2.0: tool-call `stepId` grouping.)
## Endpoints (backend, confirmed live — CORS wildcard `*`, HTTP port 24203, WS port 24205)
diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md
index ed95351..7814bc3 100644
--- a/.dispatch/wire.reference.md
+++ b/.dispatch/wire.reference.md
@@ -4,13 +4,14 @@
> types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission
> prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes.
+> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes.
>
-> **0.2.0 change (step grouping):** `ToolCallChunk`/`ToolResultChunk` gained an OPTIONAL
-> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` gained a REQUIRED `stepId: StepId`.
-> A `StepId` is the per-step grouping key for batched/parallel tool calls — group by equality.
-> Live: read `event.stepId`. Replay: read `storedChunk.chunk.stepId` (NOT the envelope; absent on
-> pre-0.2.0 rows / non-tool chunks — tolerate absence). `StoredChunk` envelope is UNCHANGED.
+> **0.3.0 change (live metrics — see `frontend-metrics-handoff.md` for the full guide):** new
+> `TurnStepCompleteEvent` (`type:"step-complete"`) in the `AgentEvent` union with per-step
+> `ttftMs?`/`decodeMs?`/`genTotalMs?`; `TurnUsageEvent` gained `stepId?`; `TurnToolResultEvent`
+> gained `durationMs?` (tool exec time); `TurnDoneEvent` gained `durationMs?` (turn wall-clock) +
+> `usage?` (turn total). All additive/optional — existing handling is unaffected. (0.2.0 added
+> `stepId` for tool-call grouping.)
```ts
/**
@@ -75,17 +76,7 @@ export interface ToolCallChunk {
readonly toolCallId: string;
readonly toolName: string;
readonly input: unknown;
- /**
- * The step that produced this call — generation provenance stamped by the
- * runtime when the model emits the call (NOT storage metadata like `seq`,
- * which is why it lives on the chunk and travels with it through persistence
- * and replay). Tool calls a model batches together in one step share the same
- * `stepId`: the grouping key for rendering a parallel batch as one unit, and
- * equal to the `stepId` on the matching `tool-call` AgentEvent. Optional:
- * absent on chunks reconstructed outside a turn and on rows persisted before
- * this field existed, so a consumer must tolerate its absence (render
- * ungrouped).
- */
+ /** Step grouping key (generation provenance). Optional — tolerate absence. */
readonly stepId?: StepId;
}
@@ -100,14 +91,7 @@ export interface ToolResultChunk {
readonly toolName: string;
readonly content: string;
readonly isError: boolean;
- /**
- * The step that produced the originating call — equal to the `stepId` on the
- * matching `tool-call` chunk (same `toolCallId`) and on the `tool-result`
- * AgentEvent, so a consumer groups a step's calls with their results.
- * Generation provenance, not storage metadata (see `ToolCallChunk.stepId`).
- * Optional for the same reasons; `reconcile` copies it from the originating
- * call onto a synthesized (interrupted) result.
- */
+ /** Step grouping key — equals the originating call's. Optional. */
readonly stepId?: StepId;
}
@@ -138,16 +122,10 @@ export interface ChatMessage {
}
/**
- * A persisted chunk plus its sync metadata. The append-only conversation log
- * stamps every chunk with a monotonic, gap-free, per-conversation `seq` (the
- * sync cursor, assigned in append order) and records the `role` of the message
- * it belongs to. This makes a flat seq-ordered stream both incrementally
- * syncable ("give me chunks after seq N") and regroupable into messages by the
- * client. `chunk` is the content unit — `Chunk` carries no storage/sync cursor
- * (`seq` lives here on the envelope, not on the chunk, since it is assigned by
- * the store and the provider has no use for it). A chunk MAY still carry
- * generation provenance assigned at production time (e.g. a tool chunk's
- * `stepId`), which is intrinsic to the content and so travels with it.
+ * A persisted chunk plus its sync metadata: `{ seq, role, chunk }`. `seq` is the
+ * per-conversation sync cursor (envelope); a tool chunk's `stepId` rides on
+ * `chunk` (generation provenance). NOTE: usage/timing metrics are NOT persisted —
+ * they exist only on the live stream (see `frontend-metrics-handoff.md`).
*/
export interface StoredChunk {
readonly seq: number;
@@ -183,6 +161,7 @@ export type AgentEvent =
| TurnToolResultEvent
| TurnToolOutputEvent
| TurnUsageEvent
+ | TurnStepCompleteEvent
| TurnErrorEvent
| TurnDoneEvent
| TurnSealedEvent;
@@ -222,13 +201,7 @@ export interface TurnToolCallEvent {
readonly type: "tool-call";
readonly conversationId: string;
readonly turnId: string;
- /**
- * The step that produced this call. Tool calls a model batches together in
- * one step share the same `stepId` — the grouping key for rendering a
- * parallel batch as one unit. Matches the `stepId` on the matching
- * `tool-result` event and on the persisted tool chunk
- * (`StoredChunk.chunk.stepId`).
- */
+ /** Step grouping key (matches the tool-result event + persisted chunk). */
readonly stepId: StepId;
readonly toolCallId: string;
readonly toolName: string;
@@ -240,17 +213,18 @@ export interface TurnToolResultEvent {
readonly type: "tool-result";
readonly conversationId: string;
readonly turnId: string;
- /**
- * The step that produced the originating call. Equal to the `stepId` on the
- * matching `tool-call` event (same `toolCallId`) and on the persisted tool
- * chunk (`StoredChunk.chunk.stepId`), so a client groups a step's calls with
- * their results.
- */
+ /** Step grouping key — equals the matching tool-call's. */
readonly stepId: StepId;
readonly toolCallId: string;
readonly toolName: string;
readonly content: string;
readonly isError: boolean;
+ /**
+ * How long the tool took to execute (dispatch → result), in milliseconds —
+ * the backend's authoritative execution time, distinct from any client-side
+ * wall-clock. Optional: present only when the runtime was given a clock.
+ */
+ readonly durationMs?: number;
}
/** Streaming output from a tool execution (e.g. shell stdout/stderr). */
@@ -268,9 +242,42 @@ export interface TurnUsageEvent {
readonly type: "usage";
readonly conversationId: string;
readonly turnId: string;
+ /**
+ * The step this usage report belongs to, so a consumer can attribute tokens
+ * per step (and join with the matching `step-complete` timing by `stepId`).
+ * Optional: absent when the runtime had no step context.
+ */
+ readonly stepId?: StepId;
readonly usage: Usage;
}
+/**
+ * A step (one LLM round-trip) has completed — the authoritative per-step metrics
+ * packet, emitted once at the step's end (after the generation stream finishes),
+ * so its timing is final (unlike `usage`, which may arrive mid-stream). Carries
+ * the step's generation timing; join to the step's tokens via `stepId` on the
+ * `usage` event. All timing fields are optional: present only when the runtime
+ * was given a clock, and `ttftMs`/`decodeMs` additionally require that a first
+ * content token (text or reasoning) was observed this step.
+ */
+export interface TurnStepCompleteEvent {
+ readonly type: "step-complete";
+ readonly conversationId: string;
+ readonly turnId: string;
+ readonly stepId: StepId;
+ /** Time to first token: stream start → first text/reasoning delta. */
+ readonly ttftMs?: number;
+ /** Decode time: first token → stream end (generation total − TTFT). */
+ readonly decodeMs?: number;
+ /**
+ * Total generation time for the step: stream start → stream end. Present
+ * whenever a clock was available, even if no first token was seen (then
+ * `ttftMs`/`decodeMs` are absent). When a first token was seen,
+ * `genTotalMs === ttftMs + decodeMs`.
+ */
+ readonly genTotalMs?: number;
+}
+
/** An error occurred during the turn. */
export interface TurnErrorEvent {
readonly type: "error";
@@ -286,6 +293,16 @@ export interface TurnDoneEvent {
readonly conversationId: string;
readonly turnId: string;
readonly reason: string;
+ /**
+ * Total wall-clock duration of the turn (turn start → turn end), in
+ * milliseconds. Optional: present only when the runtime was given a clock.
+ */
+ readonly durationMs?: number;
+ /**
+ * Aggregate token usage across all steps in the turn — a convenience total so
+ * a consumer need not sum the per-step `usage` events. Optional.
+ */
+ readonly usage?: Usage;
}
/**
diff --git a/scripts/live-probe.ts b/scripts/live-probe.ts
index 2c4dfb9..f38c907 100644
--- a/scripts/live-probe.ts
+++ b/scripts/live-probe.ts
@@ -43,6 +43,13 @@ import {
selectMessages,
type TranscriptState,
} from "../src/core/chunks/index.ts";
+import {
+ foldMetricEvent,
+ stepMetrics,
+ type TelemetryState,
+ initialState as telemetryInitialState,
+ turnMetrics,
+} from "../src/core/telemetry/index.ts";
import { createConversationCache } from "../src/features/conversation-cache/index.ts";
const WS_URL = process.env.PROBE_WS ?? "ws://localhost:24205";
@@ -87,8 +94,15 @@ async function runTurn(
socket: Socket,
conversationId: string,
prompt: string,
-): Promise<{ state: TranscriptState; deltas: number; sealed: boolean; error: string | null }> {
+): Promise<{
+ state: TranscriptState;
+ telemetry: TelemetryState;
+ deltas: number;
+ sealed: boolean;
+ error: string | null;
+}> {
let state = initialState();
+ let telemetry = telemetryInitialState();
let deltas = 0;
let sealed = false;
let error: string | null = null;
@@ -102,6 +116,7 @@ async function runTurn(
}
deltas++;
state = foldEvent(state, msg.event);
+ telemetry = foldMetricEvent(telemetry, msg.event);
if (msg.event.type === "turn-sealed") {
sealed = true;
done.resolve();
@@ -113,7 +128,7 @@ async function runTurn(
await done.promise;
clearTimeout(timeout);
handlers.delete(conversationId);
- return { state, deltas, sealed, error };
+ return { state, telemetry, deltas, sealed, error };
}
function toolChunksOf(state: TranscriptState) {
@@ -178,6 +193,44 @@ async function main() {
.join("");
record("turn 1 committed transcript has assistant text", committedText.length > 0);
+ // ─── Turn 1 telemetry: verify step metrics populated ───────────────────────
+ const t1Turn = turnMetrics(t1.telemetry, textConv);
+ const t1StepCount = t1Turn?.steps.length ?? 0;
+ record("turn 1 telemetry accumulated steps", t1StepCount > 0, `${t1StepCount} step(s)`);
+ if (t1StepCount > 0) {
+ const s0 = stepMetrics(t1.telemetry, textConv, 0);
+ const hasTiming = s0?.genTotalMs !== undefined || s0?.ttftMs !== undefined;
+ if (hasTiming) {
+ record(
+ "turn 1 step 0 has timing metrics",
+ true,
+ `ttftMs=${s0?.ttftMs ?? "–"} decodeMs=${s0?.decodeMs ?? "–"} genTotalMs=${s0?.genTotalMs ?? "–"}`,
+ );
+ } else {
+ note(
+ "turn 1 step 0 has no timing (backend may not have a clock) — telemetry path verified but no timing to assert",
+ );
+ }
+ const hasTokens = s0?.usage?.outputTokens !== undefined;
+ if (hasTokens) {
+ record(
+ "turn 1 step 0 has token usage",
+ true,
+ `in=${s0?.usage?.inputTokens ?? "–"} out=${s0?.usage?.outputTokens ?? "–"}`,
+ );
+ } else {
+ note(
+ "turn 1 step 0 has no usage (stepId may not have been on the usage event) — telemetry path verified",
+ );
+ }
+ }
+ const t1Done = t1Turn?.wallMs;
+ if (t1Done !== undefined) {
+ record("turn 1 done event recorded wall-clock", true, `${t1Done}ms`);
+ } else {
+ note("turn 1 done.durationMs absent (backend clock unavailable)");
+ }
+
// ─── Turn 2: tool-call batching ([email protected] stepId) ─────────────────────────
console.log(`\n[live-probe] TURN 2 (tools): "${TOOL_PROMPT}"`);
const toolConv = crypto.randomUUID();
@@ -185,6 +238,26 @@ async function main() {
if (t2.error !== null) record("turn 2 had no chat.error", false, t2.error);
record("turn 2 reached turn-sealed", t2.sealed);
+ // ─── Turn 2 telemetry: verify step + tool metrics ──────────────────────────
+ const t2Turn = turnMetrics(t2.telemetry, toolConv);
+ const t2StepCount = t2Turn?.steps.length ?? 0;
+ record("turn 2 telemetry accumulated steps", t2StepCount > 0, `${t2StepCount} step(s)`);
+ if (t2StepCount > 0) {
+ const s0 = stepMetrics(t2.telemetry, toolConv, 0);
+ if (s0?.toolDurationMs !== undefined && s0.toolDurationMs > 0) {
+ record("turn 2 step 0 has tool execution time", true, `toolDurationMs=${s0.toolDurationMs}`);
+ } else {
+ note("turn 2 step 0 has no toolDurationMs (tool-result.durationMs may be absent)");
+ }
+ if (s0?.genTotalMs !== undefined) {
+ record("turn 2 step 0 has generation timing", true, `genTotalMs=${s0.genTotalMs}`);
+ }
+ }
+ const t2Done = t2Turn?.wallMs;
+ if (t2Done !== undefined) {
+ record("turn 2 done event recorded wall-clock", true, `${t2Done}ms`);
+ }
+
const liveTool = toolChunksOf(t2.state);
const liveCalls = liveTool.filter((c) => c.chunk.type === "tool-call");
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 61b4cb9..e1d59f9 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -1,6 +1,6 @@
<script lang="ts">
import type { InvokeMessage } from "@dispatch/ui-contract";
- import { ChatView, Composer, ModelSelector } from "../features/chat";
+ import { ChatView, Composer, ModelSelector, TurnSummary } from "../features/chat";
import { TabBar } from "../features/tabs";
import { SurfaceView } from "../features/surface-host";
import type { AppStore } from "./store.svelte";
@@ -62,7 +62,15 @@
<div class="flex-1 overflow-y-auto">
{#key store.activeConversationId}
- <ChatView chunks={store.activeChat.chunks} />
+ <ChatView
+ chunks={store.activeChat.chunks}
+ telemetry={store.activeChat.telemetry}
+ currentTurnId={store.activeChat.currentTurnId}
+ />
+ <TurnSummary
+ telemetry={store.activeChat.telemetry}
+ turnId={store.activeChat.currentTurnId}
+ />
{/key}
</div>
diff --git a/src/core/chunks/reducer.ts b/src/core/chunks/reducer.ts
index 1dcfa39..54b1922 100644
--- a/src/core/chunks/reducer.ts
+++ b/src/core/chunks/reducer.ts
@@ -148,6 +148,10 @@ export function foldEvent(state: TranscriptState, event: AgentEvent): Transcript
case "usage":
return { ...state, latestUsage: event.usage };
+ case "step-complete":
+ // Timing metadata — no content chunk; handled by the telemetry reducer.
+ return state;
+
case "done": {
const provisional = flushAccumulating(state.provisional, state.accumulating);
return {
diff --git a/src/core/telemetry/index.ts b/src/core/telemetry/index.ts
new file mode 100644
index 0000000..a528b0d
--- /dev/null
+++ b/src/core/telemetry/index.ts
@@ -0,0 +1,14 @@
+export { foldMetricEvent, initialState } from "./reducer";
+export {
+ stepCount,
+ stepMetrics,
+ stepToolDuration,
+ stepTps,
+ totalDecodeMs,
+ totalInputTokens,
+ totalOutputTokens,
+ turnMetrics,
+ turnTps,
+ turnTtft,
+} from "./selectors";
+export type { StepMetrics, TelemetryState, TurnMetrics } from "./types";
diff --git a/src/core/telemetry/reducer.test.ts b/src/core/telemetry/reducer.test.ts
new file mode 100644
index 0000000..119bf96
--- /dev/null
+++ b/src/core/telemetry/reducer.test.ts
@@ -0,0 +1,252 @@
+import type { StepId, Usage } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import { foldMetricEvent, initialState } from "./reducer";
+import {
+ stepCount,
+ stepMetrics,
+ stepToolDuration,
+ stepTps,
+ totalDecodeMs,
+ totalInputTokens,
+ totalOutputTokens,
+ turnMetrics,
+ turnTps,
+ turnTtft,
+} from "./selectors";
+
+const sid = (s: string) => s as StepId;
+
+const usage = (turnId: string, stepId: string, u: Usage) => ({
+ type: "usage" as const,
+ conversationId: "c1",
+ turnId,
+ stepId: sid(stepId),
+ usage: u,
+});
+
+const stepComplete = (
+ turnId: string,
+ stepId: string,
+ timing: { ttftMs?: number; decodeMs?: number; genTotalMs?: number },
+) => ({
+ type: "step-complete" as const,
+ conversationId: "c1",
+ turnId,
+ stepId: sid(stepId),
+ ...timing,
+});
+
+describe("foldMetricEvent", () => {
+ it("turn-start initializes an empty turn", () => {
+ const s = foldMetricEvent(initialState(), {
+ type: "turn-start",
+ conversationId: "c1",
+ turnId: "t1",
+ });
+ expect(s.turns.get("t1")?.steps).toEqual([]);
+ });
+
+ it("step-complete populates timing on a new step", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(
+ s,
+ stepComplete("t1", "s0", { ttftMs: 300, decodeMs: 800, genTotalMs: 1100 }),
+ );
+
+ const step = stepMetrics(s, "t1", 0);
+ expect(step?.ttftMs).toBe(300);
+ expect(step?.decodeMs).toBe(800);
+ expect(step?.genTotalMs).toBe(1100);
+ });
+
+ it("usage merges tokens into a step (joined by stepId)", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(s, stepComplete("t1", "s0", { genTotalMs: 500 }));
+ s = foldMetricEvent(s, usage("t1", "s0", { inputTokens: 100, outputTokens: 50 }));
+
+ const step = stepMetrics(s, "t1", 0);
+ expect(step?.usage?.inputTokens).toBe(100);
+ expect(step?.usage?.outputTokens).toBe(50);
+ expect(step?.genTotalMs).toBe(500); // timing preserved
+ });
+
+ it("usage without stepId is ignored", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(s, {
+ type: "usage",
+ conversationId: "c1",
+ turnId: "t1",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ // no stepId
+ });
+ expect(s.turns.get("t1")?.steps).toEqual([]);
+ });
+
+ it("tool-result accumulates durationMs into its step", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(s, stepComplete("t1", "s0", {}));
+ s = foldMetricEvent(s, {
+ type: "tool-result",
+ conversationId: "c1",
+ turnId: "t1",
+ stepId: sid("s0"),
+ toolCallId: "tc1",
+ toolName: "bash",
+ content: "",
+ isError: false,
+ durationMs: 120,
+ });
+ s = foldMetricEvent(s, {
+ type: "tool-result",
+ conversationId: "c1",
+ turnId: "t1",
+ stepId: sid("s0"),
+ toolCallId: "tc2",
+ toolName: "bash",
+ content: "",
+ isError: false,
+ durationMs: 80,
+ });
+
+ const step = stepMetrics(s, "t1", 0);
+ expect(step?.toolDurationMs).toBe(200);
+ });
+
+ it("done records turn wall-clock and aggregate usage", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(s, {
+ type: "done",
+ conversationId: "c1",
+ turnId: "t1",
+ reason: "complete",
+ durationMs: 4200,
+ usage: { inputTokens: 800, outputTokens: 200 },
+ });
+
+ const turn = turnMetrics(s, "t1");
+ expect(turn?.wallMs).toBe(4200);
+ expect(turn?.doneUsage?.outputTokens).toBe(200);
+ });
+
+ it("events for an unknown turn are handled gracefully (step-complete, usage)", () => {
+ const s = initialState();
+ // step-complete for a turn we haven't started — creates the turn.
+ const s2 = foldMetricEvent(s, stepComplete("t1", "s0", { ttftMs: 100 }));
+ expect(s2.turns.get("t1")?.steps[0]?.ttftMs).toBe(100);
+ });
+
+ it("multiple steps accumulate in order", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(s, stepComplete("t1", "s0", { genTotalMs: 100 }));
+ s = foldMetricEvent(s, stepComplete("t1", "s1", { genTotalMs: 200 }));
+
+ expect(stepCount(s, "t1")).toBe(2);
+ expect(stepMetrics(s, "t1", 0)?.genTotalMs).toBe(100);
+ expect(stepMetrics(s, "t1", 1)?.genTotalMs).toBe(200);
+ });
+
+ it("non-metric events are no-ops", () => {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(s, {
+ type: "text-delta",
+ conversationId: "c1",
+ turnId: "t1",
+ delta: "hi",
+ });
+ s = foldMetricEvent(s, {
+ type: "turn-sealed",
+ conversationId: "c1",
+ turnId: "t1",
+ });
+ expect(s.turns.get("t1")?.steps).toEqual([]);
+ });
+});
+
+describe("selectors — derived metrics", () => {
+ function populatedState() {
+ let s = initialState();
+ s = foldMetricEvent(s, { type: "turn-start", conversationId: "c1", turnId: "t1" });
+ s = foldMetricEvent(
+ s,
+ stepComplete("t1", "s0", { ttftMs: 300, decodeMs: 700, genTotalMs: 1000 }),
+ );
+ s = foldMetricEvent(s, usage("t1", "s0", { inputTokens: 500, outputTokens: 100 }));
+ s = foldMetricEvent(
+ s,
+ stepComplete("t1", "s1", { ttftMs: 200, decodeMs: 500, genTotalMs: 700 }),
+ );
+ s = foldMetricEvent(s, usage("t1", "s1", { inputTokens: 600, outputTokens: 80 }));
+ s = foldMetricEvent(s, {
+ type: "done",
+ conversationId: "c1",
+ turnId: "t1",
+ reason: "complete",
+ durationMs: 3500,
+ usage: { inputTokens: 1100, outputTokens: 180 },
+ });
+ return s;
+ }
+
+ it("stepTps = outputTokens / (decodeMs / 1000)", () => {
+ const s = populatedState();
+ const step = stepMetrics(s, "t1", 0)!;
+ expect(stepTps(step)).toBeCloseTo(100 / 0.7, 2);
+ });
+
+ it("turnTtft returns first step's ttftMs", () => {
+ expect(turnTtft(populatedState(), "t1")).toBe(300);
+ });
+
+ it("totalDecodeMs sums all steps' decodeMs", () => {
+ expect(totalDecodeMs(populatedState(), "t1")).toBe(1200);
+ });
+
+ it("turnTps = outputTokens / (totalDecodeMs / 1000)", () => {
+ const s = populatedState();
+ expect(turnTps(s, "t1")).toBeCloseTo(180 / 1.2, 2);
+ });
+
+ it("totalOutputTokens prefers done.usage over step sum", () => {
+ const s = populatedState();
+ expect(totalOutputTokens(s, "t1")).toBe(180); // from done.usage
+ });
+
+ it("totalInputTokens prefers done.usage over step sum", () => {
+ const s = populatedState();
+ expect(totalInputTokens(s, "t1")).toBe(1100);
+ });
+
+ it("stepToolDuration returns sum only when > 0", () => {
+ const withTools = foldMetricEvent(
+ foldMetricEvent(initialState(), { type: "turn-start", conversationId: "c1", turnId: "t1" }),
+ {
+ type: "tool-result",
+ conversationId: "c1",
+ turnId: "t1",
+ stepId: sid("s0"),
+ toolCallId: "tc1",
+ toolName: "bash",
+ content: "",
+ isError: false,
+ durationMs: 50,
+ },
+ );
+ const step = stepMetrics(withTools, "t1", 0)!;
+ expect(stepToolDuration(step)).toBe(50);
+ expect(stepToolDuration({ stepId: sid("s0") })).toBeUndefined();
+ });
+
+ it("returns undefined for absent fields gracefully", () => {
+ const s = initialState();
+ expect(turnMetrics(s, "missing")).toBeUndefined();
+ expect(turnTtft(s, "missing")).toBeUndefined();
+ expect(turnTps(s, "missing")).toBeUndefined();
+ });
+});
diff --git a/src/core/telemetry/reducer.ts b/src/core/telemetry/reducer.ts
new file mode 100644
index 0000000..4083231
--- /dev/null
+++ b/src/core/telemetry/reducer.ts
@@ -0,0 +1,122 @@
+import type { AgentEvent, StepId, Usage } from "@dispatch/wire";
+import type { StepMetrics, TelemetryState, TurnMetrics } from "./types";
+
+/** The initial empty telemetry state. */
+export function initialState(): TelemetryState {
+ return { turns: new Map() };
+}
+
+function mergeStep(existing: StepMetrics, patch: StepMetrics): StepMetrics {
+ const merged: StepMetrics = { ...existing };
+ if (patch.ttftMs !== undefined) (merged as { ttftMs?: number }).ttftMs = patch.ttftMs;
+ if (patch.decodeMs !== undefined) (merged as { decodeMs?: number }).decodeMs = patch.decodeMs;
+ if (patch.genTotalMs !== undefined)
+ (merged as { genTotalMs?: number }).genTotalMs = patch.genTotalMs;
+ if (patch.usage !== undefined) {
+ (merged as { usage?: Usage }).usage = { ...existing.usage, ...patch.usage };
+ }
+ if (patch.toolDurationMs !== undefined) {
+ (merged as { toolDurationMs?: number }).toolDurationMs =
+ (existing.toolDurationMs ?? 0) + patch.toolDurationMs;
+ }
+ return merged;
+}
+
+function upsertStep(
+ steps: readonly StepMetrics[],
+ stepId: StepId,
+ patch: StepMetrics,
+): readonly StepMetrics[] {
+ const idx = steps.findIndex((s) => s.stepId === stepId);
+ if (idx === -1) {
+ return [...steps, patch];
+ }
+ return [...steps.slice(0, idx), mergeStep(steps[idx]!, patch), ...steps.slice(idx + 1)];
+}
+
+function setTurn(
+ turns: ReadonlyMap<string, TurnMetrics>,
+ turnId: string,
+ turn: TurnMetrics,
+): ReadonlyMap<string, TurnMetrics> {
+ const next = new Map(turns);
+ next.set(turnId, turn);
+ return next;
+}
+
+/**
+ * Fold one live AgentEvent into the telemetry state.
+ *
+ * - `turn-start` records the active turnId.
+ * - `step-complete` creates/updates the step's timing metrics.
+ * - `usage` merges token counts into the step (joined by `stepId`).
+ * - `tool-result` accumulates `durationMs` into the step.
+ * - `done` records turn-level wall-clock + token totals.
+ * - All other event types are no-ops (content events belong to the transcript).
+ *
+ * Pure: input → output, no DOM, no side effects.
+ */
+export function foldMetricEvent(state: TelemetryState, event: AgentEvent): TelemetryState {
+ switch (event.type) {
+ case "turn-start": {
+ return {
+ ...state,
+ turns: setTurn(state.turns, event.turnId, { steps: [] }),
+ };
+ }
+
+ case "step-complete": {
+ const turnId = event.turnId;
+ const existing = state.turns.get(turnId);
+ const patch: StepMetrics = { stepId: event.stepId };
+ if (event.ttftMs !== undefined) (patch as { ttftMs?: number }).ttftMs = event.ttftMs;
+ if (event.decodeMs !== undefined) (patch as { decodeMs?: number }).decodeMs = event.decodeMs;
+ if (event.genTotalMs !== undefined)
+ (patch as { genTotalMs?: number }).genTotalMs = event.genTotalMs;
+ const steps =
+ existing !== undefined ? upsertStep(existing.steps, event.stepId, patch) : [patch];
+ return {
+ ...state,
+ turns: setTurn(state.turns, turnId, { ...existing, steps } as TurnMetrics),
+ };
+ }
+
+ case "usage": {
+ if (event.stepId === undefined) return state;
+ const turnId = event.turnId;
+ const existing = state.turns.get(turnId);
+ const patch: StepMetrics = { stepId: event.stepId, usage: event.usage };
+ const steps =
+ existing !== undefined ? upsertStep(existing.steps, event.stepId, patch) : [patch];
+ return {
+ ...state,
+ turns: setTurn(state.turns, turnId, { ...existing, steps } as TurnMetrics),
+ };
+ }
+
+ case "tool-result": {
+ if (event.durationMs === undefined) return state;
+ const turnId = event.turnId;
+ const existing = state.turns.get(turnId);
+ if (existing === undefined) return state;
+ const patch: StepMetrics = { stepId: event.stepId, toolDurationMs: event.durationMs };
+ const steps = upsertStep(existing.steps, event.stepId, patch);
+ return { ...state, turns: setTurn(state.turns, turnId, { ...existing, steps }) };
+ }
+
+ case "done": {
+ const turnId = event.turnId;
+ const existing = state.turns.get(turnId);
+ const updated: TurnMetrics = {
+ ...(existing ?? { steps: [] }),
+ };
+ if (event.durationMs !== undefined)
+ (updated as { wallMs?: number }).wallMs = event.durationMs;
+ if (event.usage !== undefined) (updated as { doneUsage?: Usage }).doneUsage = event.usage;
+ return { ...state, turns: setTurn(state.turns, turnId, updated) };
+ }
+
+ default:
+ return state;
+ }
+}
diff --git a/src/core/telemetry/selectors.ts b/src/core/telemetry/selectors.ts
new file mode 100644
index 0000000..ecf1794
--- /dev/null
+++ b/src/core/telemetry/selectors.ts
@@ -0,0 +1,95 @@
+import type { Usage } from "@dispatch/wire";
+import type { StepMetrics, TelemetryState, TurnMetrics } from "./types";
+
+/** Get the metrics for a specific step within a turn. */
+export function stepMetrics(
+ state: TelemetryState,
+ turnId: string,
+ stepIndex: number,
+): StepMetrics | undefined {
+ return state.turns.get(turnId)?.steps[stepIndex];
+}
+
+/** Get the metrics for a turn. */
+export function turnMetrics(state: TelemetryState, turnId: string): TurnMetrics | undefined {
+ return state.turns.get(turnId);
+}
+
+/** The number of steps in a turn. */
+export function stepCount(state: TelemetryState, turnId: string): number {
+ return state.turns.get(turnId)?.steps.length ?? 0;
+}
+
+/** TTFT of the first step in a turn (the turn-visible first-token latency). */
+export function turnTtft(state: TelemetryState, turnId: string): number | undefined {
+ return state.turns.get(turnId)?.steps[0]?.ttftMs;
+}
+
+/** Sum of all steps' decode times in a turn. */
+export function totalDecodeMs(state: TelemetryState, turnId: string): number | undefined {
+ const steps = state.turns.get(turnId)?.steps;
+ if (steps === undefined || steps.length === 0) return undefined;
+ let total = 0;
+ let found = false;
+ for (const s of steps) {
+ if (s.decodeMs !== undefined) {
+ total += s.decodeMs;
+ found = true;
+ }
+ }
+ return found ? total : undefined;
+}
+
+/** Aggregate output tokens across all steps in a turn. */
+export function totalOutputTokens(state: TelemetryState, turnId: string): number | undefined {
+ const turn = state.turns.get(turnId);
+ if (turn === undefined) return undefined;
+ if (turn.doneUsage !== undefined) return turn.doneUsage.outputTokens;
+ let total = 0;
+ let found = false;
+ for (const s of turn.steps) {
+ if (s.usage?.outputTokens !== undefined) {
+ total += s.usage.outputTokens;
+ found = true;
+ }
+ }
+ return found ? total : undefined;
+}
+
+/** Aggregate input tokens across all steps in a turn. */
+export function totalInputTokens(state: TelemetryState, turnId: string): number | undefined {
+ const turn = state.turns.get(turnId);
+ if (turn === undefined) return undefined;
+ if (turn.doneUsage !== undefined) return turn.doneUsage.inputTokens;
+ let total = 0;
+ let found = false;
+ for (const s of turn.steps) {
+ if (s.usage?.inputTokens !== undefined) {
+ total += s.usage.inputTokens;
+ found = true;
+ }
+ }
+ return found ? total : undefined;
+}
+
+/** Derived TPS for a step: outputTokens / (decodeMs / 1000). */
+export function stepTps(step: StepMetrics): number | undefined {
+ if (step.usage?.outputTokens === undefined || step.decodeMs === undefined) return undefined;
+ if (step.decodeMs === 0) return undefined;
+ return step.usage.outputTokens / (step.decodeMs / 1000);
+}
+
+/** Derived aggregate TPS for a turn. */
+export function turnTps(state: TelemetryState, turnId: string): number | undefined {
+ const outTokens = totalOutputTokens(state, turnId);
+ const decode = totalDecodeMs(state, turnId);
+ if (outTokens === undefined || decode === undefined || decode === 0) return undefined;
+ return outTokens / (decode / 1000);
+}
+
+/** Sum of tool execution durations within a step. */
+export function stepToolDuration(step: StepMetrics): number | undefined {
+ return step.toolDurationMs !== undefined && step.toolDurationMs > 0
+ ? step.toolDurationMs
+ : undefined;
+}
diff --git a/src/core/telemetry/types.ts b/src/core/telemetry/types.ts
new file mode 100644
index 0000000..395ec93
--- /dev/null
+++ b/src/core/telemetry/types.ts
@@ -0,0 +1,35 @@
+import type { StepId, Usage } from "@dispatch/wire";
+
+/**
+ * Per-step metrics, accumulated from `step-complete` + `usage` events.
+ * All fields optional — absent when the backend had no clock or the step
+ * produced no text/reasoning token.
+ */
+export interface StepMetrics {
+ readonly stepId: StepId;
+ readonly ttftMs?: number;
+ readonly decodeMs?: number;
+ readonly genTotalMs?: number;
+ readonly usage?: Usage;
+ readonly toolDurationMs?: number; // sum of tool-result.durationMs in this step
+}
+
+/**
+ * Per-turn metrics, accumulated from `done` events + per-step aggregation.
+ */
+export interface TurnMetrics {
+ readonly wallMs?: number;
+ readonly doneUsage?: Usage;
+ readonly steps: readonly StepMetrics[];
+}
+
+/**
+ * Pure telemetry state — lives alongside but separate from TranscriptState.
+ * Accumulates live-only metric events; never persisted (history has no metrics).
+ * No "active turn" tracking — the consumer (store) passes the relevant turnId
+ * to the selectors. Pure: events flow in, derived values flow out.
+ */
+export interface TelemetryState {
+ /** turnId → TurnMetrics. Multiple turns accumulate (tab switching). */
+ readonly turns: ReadonlyMap<string, TurnMetrics>;
+}
diff --git a/src/core/wire/conformance.test.ts b/src/core/wire/conformance.test.ts
index 50b7f35..690ba4e 100644
--- a/src/core/wire/conformance.test.ts
+++ b/src/core/wire/conformance.test.ts
@@ -62,6 +62,15 @@ describe("classifies every AgentEvent type", () => {
turnId: "t1",
usage: { inputTokens: 10, outputTokens: 20 },
},
+ {
+ type: "step-complete",
+ conversationId: "c1",
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ ttftMs: 300,
+ decodeMs: 700,
+ genTotalMs: 1000,
+ },
{ type: "error", conversationId: "c1", turnId: "t1", message: "oops" },
{ type: "done", conversationId: "c1", turnId: "t1", reason: "complete" },
{ type: "turn-sealed", conversationId: "c1", turnId: "t1" },
@@ -78,14 +87,15 @@ describe("classifies every AgentEvent type", () => {
"tool-result",
"tool-output",
"usage",
+ "step-complete",
"error",
"done",
"turn-sealed",
]);
});
- it("covers all 11 AgentEvent variants", () => {
- expect(samples).toHaveLength(11);
+ it("covers all 12 AgentEvent variants", () => {
+ expect(samples).toHaveLength(12);
});
});
diff --git a/src/core/wire/conformance.ts b/src/core/wire/conformance.ts
index 5d75a60..d89772e 100644
--- a/src/core/wire/conformance.ts
+++ b/src/core/wire/conformance.ts
@@ -30,6 +30,8 @@ export function assertAgentEventExhaustive(event: AgentEvent): string {
return "done";
case "turn-sealed":
return "turn-sealed";
+ case "step-complete":
+ return "step-complete";
default:
return event satisfies never;
}
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 4f2091a..b096cca 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -1,8 +1,10 @@
export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chunks";
export { groupRenderedChunks } from "../../core/chunks";
+export type { StepMetrics, TelemetryState, TurnMetrics } from "../../core/telemetry";
export type { ChatTransport, HistorySync } from "./ports";
export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
export { createChatStore } from "./store.svelte";
export { default as ChatView } from "./ui/ChatView.svelte";
export { default as Composer } from "./ui/Composer.svelte";
export { default as ModelSelector } from "./ui/ModelSelector.svelte";
+export { default as TurnSummary } from "./ui/TurnSummary.svelte";
diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts
index 1d8ab17..58c165f 100644
--- a/src/features/chat/store.svelte.ts
+++ b/src/features/chat/store.svelte.ts
@@ -13,6 +13,8 @@ import {
selectChunks,
selectMessages,
} from "../../core/chunks";
+import type { TelemetryState } from "../../core/telemetry";
+import { foldMetricEvent, initialState as telemetryInitialState } from "../../core/telemetry";
import type { ConversationCache } from "../conversation-cache";
import type { ChatTransport, HistorySync } from "./ports";
@@ -30,6 +32,8 @@ export interface ChatStore {
readonly pendingSync: boolean;
readonly error: string | null;
readonly model: string | undefined;
+ readonly telemetry: TelemetryState;
+ readonly currentTurnId: string | null;
handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void;
send(text: string): void;
setModel(model: string): void;
@@ -42,6 +46,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
let _pendingSync = $state(false);
let _error = $state<string | null>(null);
let _model = $state<string | undefined>(deps.model);
+ let telemetry = $state<TelemetryState>(telemetryInitialState());
let disposed = false;
async function syncTail(): Promise<void> {
@@ -76,6 +81,12 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
get model(): string | undefined {
return _model;
},
+ get telemetry(): TelemetryState {
+ return telemetry;
+ },
+ get currentTurnId(): string | null {
+ return transcript.currentTurnId;
+ },
handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void {
if (msg.type === "chat.error") {
@@ -89,6 +100,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
return;
}
transcript = foldEvent(transcript, msg.event);
+ telemetry = foldMetricEvent(telemetry, msg.event);
if (transcript.sealedTurnId !== null) {
void syncTail();
}
diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts
index 71781ac..347cdd7 100644
--- a/src/features/chat/store.test.ts
+++ b/src/features/chat/store.test.ts
@@ -393,6 +393,52 @@ describe("createChatStore", () => {
store.dispose();
});
+ it("folding step-complete and usage events populates telemetry", () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const cache = createFakeCache();
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ cache: cache.impl,
+ });
+
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(
+ deltaEvent({
+ type: "step-complete",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ ttftMs: 300,
+ decodeMs: 700,
+ genTotalMs: 1000,
+ }),
+ );
+ store.handleDelta(
+ deltaEvent({
+ type: "usage",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 50, outputTokens: 20 },
+ }),
+ );
+
+ const turn = store.telemetry.turns.get("t1");
+ expect(turn).toBeDefined();
+ expect(turn?.steps).toHaveLength(1);
+ const step = turn?.steps.find((s) => s.stepId === ("t1#0" as StepId));
+ expect(step).toBeDefined();
+ expect(step?.ttftMs).toBe(300);
+ expect(step?.decodeMs).toBe(700);
+ expect(step?.usage?.inputTokens).toBe(50);
+ expect(step?.usage?.outputTokens).toBe(20);
+
+ store.dispose();
+ });
+
it("handleDelta ignores a chat.delta for a different conversationId", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts
index b31cbf1..02d3c5a 100644
--- a/src/features/chat/ui.test.ts
+++ b/src/features/chat/ui.test.ts
@@ -3,9 +3,15 @@ import { render, screen } from "@testing-library/svelte";
import userEvent from "@testing-library/user-event";
import { describe, expect, it, vi } from "vitest";
import type { RenderedChunk } from "../../core/chunks";
+import type { TelemetryState } from "../../core/telemetry";
+import { initialState } from "../../core/telemetry";
import ChatView from "./ui/ChatView.svelte";
import Composer from "./ui/Composer.svelte";
import ModelSelector from "./ui/ModelSelector.svelte";
+import TurnSummary from "./ui/TurnSummary.svelte";
+
+const emptyTelemetry = initialState();
+const noTurnId = null;
describe("ChatView", () => {
it("renders a message's text chunk", () => {
@@ -18,7 +24,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
expect(screen.getByText("Hello world")).toBeInTheDocument();
});
@@ -34,7 +40,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
expect(screen.getByText("Hi there")).toBeInTheDocument();
expect(screen.getByText("Hello!")).toBeInTheDocument();
@@ -55,7 +61,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
expect(screen.getByText("read_file")).toBeInTheDocument();
const pre = screen.getByText((content, element) => {
@@ -80,7 +86,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
expect(screen.getByText("read_file")).toBeInTheDocument();
expect(screen.getByText("file contents here")).toBeInTheDocument();
@@ -96,7 +102,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
const alert = screen.getByRole("alert");
expect(alert).toHaveTextContent("Something failed");
@@ -112,7 +118,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
expect(screen.getByText("Rate limited")).toBeInTheDocument();
expect(screen.getByText("[RATE_LIMIT]")).toBeInTheDocument();
@@ -128,7 +134,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
expect(screen.getByText("System context loaded")).toBeInTheDocument();
});
@@ -143,7 +149,7 @@ describe("ChatView", () => {
},
];
- render(ChatView, { props: { chunks } });
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId } });
// In-flight chunks render at full opacity (no faded "disabled" look).
const wrapper = screen.getByText("Streaming...").closest("div");
@@ -151,7 +157,7 @@ describe("ChatView", () => {
});
it("renders empty transcript", () => {
- render(ChatView, { props: { chunks: [] } });
+ render(ChatView, { props: { chunks: [], telemetry: emptyTelemetry, currentTurnId: noTurnId } });
const log = screen.getByRole("log");
expect(log).toBeInTheDocument();
@@ -199,7 +205,9 @@ describe("ChatView", () => {
},
];
- const { container } = render(ChatView, { props: { chunks } });
+ const { container } = render(ChatView, {
+ props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId },
+ });
// One DaisyUI list with two rows (one per call), not separate cards.
const lists = container.querySelectorAll("ul.list");
@@ -224,7 +232,9 @@ describe("ChatView", () => {
},
];
- const { container } = render(ChatView, { props: { chunks } });
+ const { container } = render(ChatView, {
+ props: { chunks, telemetry: emptyTelemetry, currentTurnId: noTurnId },
+ });
const collapse = container.querySelector(".collapse");
expect(collapse).not.toBeNull();
@@ -247,7 +257,9 @@ describe("ChatView", () => {
},
];
- const { container, rerender } = render(ChatView, { props: { chunks: streaming } });
+ const { container, rerender } = render(ChatView, {
+ props: { chunks: streaming, telemetry: emptyTelemetry, currentTurnId: noTurnId },
+ });
// Streaming: "Thinking" + loading dots.
expect(screen.getByText("Thinking")).toBeInTheDocument();
@@ -269,6 +281,8 @@ describe("ChatView", () => {
provisional: false,
},
],
+ telemetry: emptyTelemetry,
+ currentTurnId: noTurnId,
});
// Completed: "Thoughts", no dots — and the open state survived the transition.
@@ -278,6 +292,118 @@ describe("ChatView", () => {
expect(screen.getByRole("checkbox", { name: "Toggle thoughts" })).toBeChecked();
expect(container).toHaveTextContent("hmm, all done");
});
+
+ it("assistant text shows step metrics footer when step-complete data is available", () => {
+ const chunks: RenderedChunk[] = [
+ {
+ seq: 1,
+ role: "assistant",
+ chunk: { type: "text", text: "Here is my answer" },
+ provisional: false,
+ },
+ ];
+
+ const telemetry: TelemetryState = {
+ turns: new Map([
+ [
+ "turn-1",
+ {
+ wallMs: 2500,
+ steps: [
+ {
+ stepId: "turn-1#0" as StepId,
+ genTotalMs: 1200,
+ decodeMs: 1000,
+ usage: { inputTokens: 100, outputTokens: 86 },
+ },
+ ],
+ },
+ ],
+ ]),
+ };
+
+ render(ChatView, { props: { chunks, telemetry, currentTurnId: "turn-1" } });
+
+ expect(screen.getByText("Here is my answer")).toBeInTheDocument();
+ expect(screen.getByText("1.2s")).toBeInTheDocument();
+ expect(screen.getByText("86 t/s")).toBeInTheDocument();
+ expect(screen.getByText("86 tok")).toBeInTheDocument();
+ });
+
+ it("does not show metrics footer when no step data exists", () => {
+ const chunks: RenderedChunk[] = [
+ {
+ seq: 1,
+ role: "assistant",
+ chunk: { type: "text", text: "Still streaming" },
+ provisional: true,
+ },
+ ];
+
+ render(ChatView, { props: { chunks, telemetry: emptyTelemetry, currentTurnId: "turn-1" } });
+
+ expect(screen.getByText("Still streaming")).toBeInTheDocument();
+ expect(screen.queryByText("t/s")).toBeNull();
+ expect(screen.queryByText("tok")).toBeNull();
+ });
+});
+
+describe("TurnSummary", () => {
+ it("renders turn stats when telemetry has data", () => {
+ const telemetry: TelemetryState = {
+ turns: new Map([
+ [
+ "turn-1",
+ {
+ wallMs: 4200,
+ steps: [
+ {
+ stepId: "turn-1#0" as StepId,
+ genTotalMs: 2000,
+ decodeMs: 1500,
+ usage: { inputTokens: 500, outputTokens: 300 },
+ },
+ {
+ stepId: "turn-1#1" as StepId,
+ genTotalMs: 1800,
+ decodeMs: 1200,
+ usage: { inputTokens: 600, outputTokens: 200 },
+ },
+ ],
+ },
+ ],
+ ]),
+ };
+
+ render(TurnSummary, { props: { telemetry, turnId: "turn-1" } });
+
+ expect(screen.getByText("Turn")).toBeInTheDocument();
+ expect(screen.getByText("4.2s")).toBeInTheDocument();
+ expect(screen.getByText("Tokens")).toBeInTheDocument();
+ expect(screen.getByText("1,600")).toBeInTheDocument();
+ expect(screen.getByText("Output")).toBeInTheDocument();
+ expect(screen.getByText("500")).toBeInTheDocument();
+ expect(screen.getByText("Input")).toBeInTheDocument();
+ expect(screen.getByText("1,100")).toBeInTheDocument();
+ expect(screen.getByText("Steps")).toBeInTheDocument();
+ expect(screen.getByText("2")).toBeInTheDocument();
+ expect(screen.getByText("TPS")).toBeInTheDocument();
+ expect(screen.getByText("185 t/s")).toBeInTheDocument();
+ });
+
+ it("renders nothing when turnId is null", () => {
+ const { container } = render(TurnSummary, {
+ props: { telemetry: emptyTelemetry, turnId: null },
+ });
+ expect(container.querySelector(".stats")).toBeNull();
+ });
+
+ it("renders nothing when turn metrics not found", () => {
+ const { container } = render(TurnSummary, {
+ props: { telemetry: emptyTelemetry, turnId: "nonexistent" },
+ });
+ expect(container.querySelector(".stats")).toBeNull();
+ });
});
describe("Composer", () => {
diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte
index 3a078fb..6acda53 100644
--- a/src/features/chat/ui/ChatView.svelte
+++ b/src/features/chat/ui/ChatView.svelte
@@ -1,16 +1,27 @@
<script lang="ts">
import { groupRenderedChunks, type RenderedChunk } from "../index";
+ import type { TelemetryState } from "../../../core/telemetry";
+ import { stepMetrics, stepTps } from "../../../core/telemetry";
- let { chunks }: { chunks: readonly RenderedChunk[] } = $props();
+ interface Props {
+ chunks: readonly RenderedChunk[];
+ telemetry: TelemetryState;
+ currentTurnId: string | null;
+ }
+
+ let { chunks, telemetry, currentTurnId }: Props = $props();
const groups = $derived(groupRenderedChunks(chunks));
- // Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that
- // survives the provisional→committed (seq null → seq N) transition, so the
- // collapse's open/close state is NOT lost when a turn seals. (App isolates
- // these keys per conversation via {#key}.)
+ function formatMs(ms: number): string {
+ if (ms < 1000) return `${Math.round(ms)}ms`;
+ const s = ms / 1000;
+ return s < 60 ? `${s.toFixed(1)}s` : `${Math.floor(s / 60)}m${Math.round(s % 60)}s`;
+ }
+
const rows = $derived.by(() => {
let thinking = 0;
+ let stepIdx = 0;
return groups.map((group, i) => {
let key: string;
if (group.kind === "tool-batch") {
@@ -22,14 +33,17 @@
} else {
key = `p${i}`;
}
- return { group, key };
+ const si = stepIdx;
+ if (group.kind === "tool-batch" || (group.kind === "single" && (group.chunk.chunk.type === "tool-call" || group.chunk.chunk.type === "tool-result"))) {
+ stepIdx++;
+ }
+ return { group, key, stepIdx: si };
});
});
</script>
-{#snippet chunkRow(rendered: RenderedChunk)}
+{#snippet chunkRow(rendered: RenderedChunk, sIdx: number)}
{#if rendered.role === "user"}
- <!-- User: a speech bubble, left-aligned -->
<div class="chat chat-start">
<div class="chat-bubble chat-bubble-primary">
{#if rendered.chunk.type === "text"}
@@ -38,9 +52,6 @@
</div>
</div>
{:else if rendered.chunk.type === "thinking"}
- <!-- Thinking: a visible bubble (like tool cards), holding a checkbox collapse
- (no arrow icon, smooth open/close). Title reads "Thinking" + loading dots
- while generating, then "Thoughts" with no dots once complete. -->
<div class="chat chat-start [&>.chat-bubble]:max-w-5xl [&>.chat-bubble]:p-0">
<div class="chat-bubble w-full bg-transparent">
<div class="collapse w-full rounded-box bg-base-200 text-sm">
@@ -58,14 +69,18 @@
</div>
</div>
{:else if rendered.chunk.type === "tool-call" || rendered.chunk.type === "tool-result"}
- <!-- Single tool call/result: a regular (non-speech) card. Nested in the
- chat-start grid via a transparent, padding-stripped chat-bubble shim so
- the card inherits the same left offset as the bubble bodies. -->
+ {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, sIdx) : undefined}
+ {@const toolDur = step?.toolDurationMs}
<div class="chat chat-start [&>.chat-bubble]:max-w-full [&>.chat-bubble]:p-0">
<div class="chat-bubble bg-transparent">
{#if rendered.chunk.type === "tool-call"}
<div class="w-fit max-w-full rounded-box bg-base-200 p-3 text-sm">
- <strong>{rendered.chunk.toolName}</strong>
+ <div class="flex items-center gap-2">
+ <strong>{rendered.chunk.toolName}</strong>
+ {#if toolDur !== undefined && toolDur > 0}
+ <span class="badge badge-ghost badge-xs ml-auto">{formatMs(toolDur)}</span>
+ {/if}
+ </div>
<pre class="text-xs mt-1">{JSON.stringify(rendered.chunk.input, null, 2)}</pre>
</div>
{:else}
@@ -73,19 +88,43 @@
class="w-fit max-w-full rounded-box bg-base-200 p-3 text-sm"
class:text-error={rendered.chunk.isError}
>
- <strong>{rendered.chunk.toolName}</strong>
+ <div class="flex items-center gap-2">
+ <strong>{rendered.chunk.toolName}</strong>
+ {#if toolDur !== undefined && toolDur > 0}
+ <span class="badge badge-ghost badge-xs ml-auto">{formatMs(toolDur)}</span>
+ {/if}
+ </div>
<pre class="text-xs mt-1">{rendered.chunk.content}</pre>
</div>
{/if}
</div>
</div>
{:else}
- <!-- Assistant text / system / error: an INVISIBLE speech bubble — same
- chat-start grid as the user bubble, so it inherits identical left spacing. -->
+ {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, sIdx) : undefined}
+ {@const tps = step ? stepTps(step) : undefined}
<div class="chat chat-start [&>.chat-bubble]:max-w-5xl">
<div class="chat-bubble w-full bg-transparent">
{#if rendered.chunk.type === "text"}
- <p>{rendered.chunk.text}</p>
+ <ul class="list rounded-box text-sm">
+ <li class="list-row">
+ <p>{rendered.chunk.text}</p>
+ </li>
+ {#if step && (step.genTotalMs !== undefined || tps !== undefined || step.usage?.outputTokens !== undefined)}
+ <li class="list-row">
+ {#if step.genTotalMs !== undefined}
+ <span class="badge badge-ghost badge-xs">{formatMs(step.genTotalMs)}</span>
+ {/if}
+ <span>·</span>
+ {#if tps !== undefined}
+ <span class="badge badge-ghost badge-xs">{Math.round(tps)} t/s</span>
+ {/if}
+ <span>·</span>
+ {#if step.usage?.outputTokens !== undefined}
+ <span class="badge badge-ghost badge-xs">{step.usage.outputTokens} tok</span>
+ {/if}
+ </li>
+ {/if}
+ </ul>
{:else if rendered.chunk.type === "error"}
<div class="text-error" role="alert">
{rendered.chunk.message}
@@ -102,20 +141,24 @@
{/snippet}
<div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite">
- {#each rows as { group, key } (key)}
+ {#each rows as { group, key, stepIdx } (key)}
{#if group.kind === "single"}
- {@render chunkRow(group.chunk)}
+ {@render chunkRow(group.chunk, stepIdx)}
{:else}
- <!-- Batched tool calls (one step): a single bubble holding a DaisyUI list,
- one row per call paired with its result. Same chat-start grid shim as
- the single tool card so it lines up with the other messages. -->
+ {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, stepIdx) : undefined}
+ {@const toolDur = step?.toolDurationMs}
<div class="chat chat-start [&>.chat-bubble]:max-w-full [&>.chat-bubble]:p-0">
<div class="chat-bubble bg-transparent">
<ul class="list w-fit max-w-full rounded-box bg-base-200 text-sm">
{#each group.entries as entry (entry.call.toolCallId)}
<li class="list-row">
<div>
- <strong>{entry.call.toolName}</strong>
+ <div class="flex items-center gap-2">
+ <strong>{entry.call.toolName}</strong>
+ {#if toolDur !== undefined && toolDur > 0}
+ <span class="badge badge-ghost badge-xs ml-auto">{formatMs(toolDur)}</span>
+ {/if}
+ </div>
<pre class="text-xs mt-1">{JSON.stringify(entry.call.input, null, 2)}</pre>
{#if entry.result}
<pre
diff --git a/src/features/chat/ui/TurnSummary.svelte b/src/features/chat/ui/TurnSummary.svelte
new file mode 100644
index 0000000..eedb0cc
--- /dev/null
+++ b/src/features/chat/ui/TurnSummary.svelte
@@ -0,0 +1,75 @@
+<script lang="ts">
+ import type { TelemetryState } from "../../../core/telemetry";
+ import {
+ stepCount,
+ totalInputTokens,
+ totalOutputTokens,
+ turnMetrics,
+ turnTps,
+ } from "../../../core/telemetry";
+
+ interface Props {
+ telemetry: TelemetryState;
+ turnId: string | null;
+ }
+
+ let { telemetry, turnId }: Props = $props();
+
+ function formatMs(ms: number): string {
+ if (ms < 1000) return `${Math.round(ms)}ms`;
+ const s = ms / 1000;
+ return s < 60 ? `${s.toFixed(1)}s` : `${Math.floor(s / 60)}m${Math.round(s % 60)}s`;
+ }
+
+ const stats = $derived.by(() => {
+ if (turnId === null) return null;
+ const metrics = turnMetrics(telemetry, turnId);
+ if (metrics === undefined) return null;
+
+ const items: { label: string; value: string }[] = [];
+
+ if (metrics.wallMs !== undefined) {
+ items.push({ label: "Turn", value: formatMs(metrics.wallMs) });
+ }
+
+ const outTokens = totalOutputTokens(telemetry, turnId);
+ const inTokens = totalInputTokens(telemetry, turnId);
+ if (outTokens !== undefined || inTokens !== undefined) {
+ const total = (outTokens ?? 0) + (inTokens ?? 0);
+ items.push({ label: "Tokens", value: total.toLocaleString() });
+ }
+ if (outTokens !== undefined) {
+ items.push({ label: "Output", value: outTokens.toLocaleString() });
+ }
+ if (inTokens !== undefined) {
+ items.push({ label: "Input", value: inTokens.toLocaleString() });
+ }
+
+ const count = stepCount(telemetry, turnId);
+ if (count > 0) {
+ items.push({ label: "Steps", value: String(count) });
+ }
+
+ const tps = turnTps(telemetry, turnId);
+ if (tps !== undefined) {
+ items.push({ label: "TPS", value: `${Math.round(tps)} t/s` });
+ }
+
+ return items;
+ });
+</script>
+
+{#if stats !== null}
+ <div class="chat chat-start [&>.chat-bubble]:max-w-5xl">
+ <div class="chat-bubble w-full bg-transparent">
+ <div class="stats stats-vertical lg:stats-horizontal">
+ {#each stats as stat}
+ <div class="stat">
+ <div class="stat-title">{stat.label}</div>
+ <div class="stat-value text-sm">{stat.value}</div>
+ </div>
+ {/each}
+ </div>
+ </div>
+ </div>
+{/if}