summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-10 10:06:27 +0900
committerAdam Malczewski <[email protected]>2026-06-10 10:06:27 +0900
commitf8bf715abc8a89ec0c6370b40403c509b1ce2870 (patch)
tree915600a766e042a8491ac57423542cde1dda1eb6
parentccfd2f4157c1cbbb3d8aeceee94d9e963a82ab03 (diff)
downloaddispatch-web-f8bf715abc8a89ec0c6370b40403c509b1ce2870.tar.gz
dispatch-web-f8bf715abc8a89ec0c6370b40403c509b1ce2870.zip
feat(metrics): per-turn + per-step token/timing metrics bubbles
Consume [email protected] / [email protected] metrics: usage.stepId, step-complete (ttft/decode/genTotal), done.durationMs/usage, and the durable GET /conversations/:id/metrics endpoint. - core/metrics: pure live-fold + durable-merge reducer; decode-rate TPS; head-aligned, stable placement; progressive per-step rows (each shown as its step ends) with the turn-total row gated on the done event. - features/chat: store folds metric events + hydrates durable TurnMetrics; ChatView renders inline step bubbles + a turn-total bubble. - app: MetricsSync HTTP effect (tolerates 404) injected into chat stores. - scripts/live-probe: drives the metrics path; live-verified 17/17 vs bin/up. - docs: regenerate .dispatch wire/transport mirrors to 0.4.0; glossary terms (turn/step metrics, TTFT, decode time, TPS, metrics bubble); trim handoff.
-rw-r--r--.dispatch/package-agent.md3
-rw-r--r--.dispatch/transport-contract.reference.md47
-rw-r--r--.dispatch/wire.reference.md119
-rw-r--r--GLOSSARY.md6
-rw-r--r--backend-handoff.md92
-rw-r--r--scripts/live-probe.ts81
-rw-r--r--src/app/App.svelte2
-rw-r--r--src/app/store.svelte.ts14
-rw-r--r--src/core/metrics/format.test.ts199
-rw-r--r--src/core/metrics/format.ts69
-rw-r--r--src/core/metrics/index.ts17
-rw-r--r--src/core/metrics/place.test.ts469
-rw-r--r--src/core/metrics/place.ts151
-rw-r--r--src/core/metrics/reducer.test.ts368
-rw-r--r--src/core/metrics/reducer.ts239
-rw-r--r--src/core/metrics/types.ts68
-rw-r--r--src/features/chat/index.ts3
-rw-r--r--src/features/chat/ports.ts9
-rw-r--r--src/features/chat/store.svelte.ts29
-rw-r--r--src/features/chat/store.test.ts308
-rw-r--r--src/features/chat/test-helpers.ts40
-rw-r--r--src/features/chat/ui.test.ts219
-rw-r--r--src/features/chat/ui/ChatView.svelte54
23 files changed, 2495 insertions, 111 deletions
diff --git a/.dispatch/package-agent.md b/.dispatch/package-agent.md
index 73e960c..5c1a54d 100644
--- a/.dispatch/package-agent.md
+++ b/.dispatch/package-agent.md
@@ -26,7 +26,8 @@ it, test it, and write a report — nothing else. If no single unit is named, st
- **The contracts you consume:** reproduced IN-REPO under `.dispatch/*.reference.md` — read THOSE:
- `.dispatch/ui-contract.reference.md` — `@dispatch/ui-contract` (surfaces + surface WS protocol).
- `.dispatch/wire.reference.md` — `@dispatch/wire` (`Chunk`/`StoredChunk`+`seq`/`ChatMessage`/
- `AgentEvent`/`TurnSealedEvent`/`Usage` — the chat wire types).
+ `AgentEvent`/`TurnSealedEvent`/`Usage` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`,
+ `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs` — the chat wire types).
- `.dispatch/transport-contract.reference.md` — `@dispatch/transport-contract` (HTTP endpoints +
`ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse` + WS chat ops + the unified
`WsClientMessage`/`WsServerMessage` unions).
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index fcc2cbf..d06a7b4 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -5,17 +5,21 @@
> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
> this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes.
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract`
-> (see `ui-contract.reference.md`).
+> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics
+> endpoint shipped + version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see
+> `wire.reference.md`) + `@dispatch/ui-contract` (see `ui-contract.reference.md`).
>
-> **0.2.0 change (step grouping):** no shape change HERE — this contract's own types are
-> identical. It only re-exports the bumped `@dispatch/wire`, whose `AgentEvent` tool variants
-> now carry a required `stepId` and whose tool `Chunk`s carry an optional `stepId`. The
-> `chat.delta` events streamed over WS and the `ConversationHistoryResponse.chunks` you already
-> consume therefore now carry the step grouping key (see `wire.reference.md`).
+> **0.3.0 change (token + timing metrics):** adds the durable metrics READ endpoint
+> `GET /conversations/:id/metrics` → `ConversationMetricsResponse` (`{ turns: TurnMetrics[] }`), and
+> re-exports `StepMetrics` / `TurnMetrics` from `@dispatch/wire`. This is a SEPARATE read axis from
+> the seq-cursor history (`GET /conversations/:id`): metrics are keyed PER TURN (not per chunk), so
+> they get their own route. `turns` is every SEALED turn's `TurnMetrics` in turn order (an in-flight
+> turn is absent until its metrics persist post-seal). The live `usage`/`step-complete`/`done`
+> packets it mirrors are transient (NOT persisted) and ride the `chat.delta`/NDJSON `AgentEvent`
+> stream you already consume — see `wire.reference.md`. The contract's OWN chat/history shapes are
+> otherwise unchanged from 0.2.0.
-## Endpoints (backend, confirmed live — CORS wildcard `*`, HTTP port 24203, WS port 24205)
+## Endpoints (backend — CORS wildcard `*`, HTTP port 24203, WS port 24205)
- `POST /chat` — body `ChatRequest` (JSON); response NDJSON stream, one `AgentEvent` per line;
resolved id also in `X-Conversation-Id` header.
@@ -23,6 +27,8 @@
- `GET /conversations/:id?sinceSeq=<n>` — `ConversationHistoryResponse`: RAW, append-order,
seq-ordered slice with `seq > n` (NOT reconciled — dangling tool-calls returned as-is).
`latestSeq` = last chunk's `seq`, or the requested `sinceSeq` when caught up (empty `chunks`).
+- `GET /conversations/:id/metrics` — `ConversationMetricsResponse`: every SEALED turn's `TurnMetrics`
+ in turn order (per-turn token + timing; NOT seq-filtered). IMPLEMENTED + LIVE-VERIFIED (probe 17/17).
- WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops
(`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive
`WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`**
@@ -42,9 +48,9 @@
*/
import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract";
-import type { AgentEvent, StoredChunk } from "@dispatch/wire";
+import type { AgentEvent, StoredChunk, TurnMetrics } from "@dispatch/wire";
-export type { AgentEvent, StoredChunk } from "@dispatch/wire";
+export type { AgentEvent, StepMetrics, StoredChunk, TurnMetrics } from "@dispatch/wire";
/**
* Request body for `POST /chat` (sent as JSON).
@@ -88,6 +94,25 @@ export interface ConversationHistoryResponse {
readonly latestSeq: number;
}
+/**
+ * Response body for `GET /conversations/:id/metrics` — the persisted per-turn
+ * (and per-step) token + timing metrics for a conversation, for a client
+ * reopening a past conversation to render historical usage/latency.
+ *
+ * This is a SEPARATE axis from the two other read concerns and is deliberately
+ * its own endpoint: the live `usage`/`step-complete`/`done` events are transient
+ * (not persisted), and `ConversationHistoryResponse` carries seq-cursor chunk
+ * CONTENT. Metrics are keyed per TURN (not per chunk) and so are not seq-filtered
+ * — hence a sibling route rather than a field on the history response.
+ *
+ * `turns` is every SEALED turn's `TurnMetrics` in turn order. A turn appears only
+ * after its metrics were persisted (post-seal); an in-flight or unsealed turn is
+ * absent until then.
+ */
+export interface ConversationMetricsResponse {
+ readonly turns: readonly TurnMetrics[];
+}
+
// ─── WebSocket chat ops ───────────────────────────────────────────────────────
// The persistent WS connection multiplexes chat ops (below) with surface ops
// (`@dispatch/ui-contract`). Chat `type`s are namespaced (`chat.*`) so they
diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md
index ed95351..ee5488c 100644
--- a/.dispatch/wire.reference.md
+++ b/.dispatch/wire.reference.md
@@ -4,13 +4,27 @@
> types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission
> prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes.
+> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics types below
+> shipped + version-bumped). Regenerate whenever `@dispatch/wire` changes.
>
-> **0.2.0 change (step grouping):** `ToolCallChunk`/`ToolResultChunk` gained an OPTIONAL
-> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` gained a REQUIRED `stepId: StepId`.
-> A `StepId` is the per-step grouping key for batched/parallel tool calls — group by equality.
-> Live: read `event.stepId`. Replay: read `storedChunk.chunk.stepId` (NOT the envelope; absent on
-> pre-0.2.0 rows / non-tool chunks — tolerate absence). `StoredChunk` envelope is UNCHANGED.
+> **0.3.0 changes (token + timing metrics):**
+> - **Live per-step/per-turn telemetry on the event stream** (transient — NOT persisted):
+> `TurnUsageEvent` gained an OPTIONAL `stepId?` (attribute tokens per step). A NEW
+> `TurnStepCompleteEvent` (`type: "step-complete"`, REQUIRED `stepId`) carries the per-step
+> generation timing `ttftMs?` / `decodeMs?` / `genTotalMs?` (all optional — present only when the
+> runtime had a clock; `ttftMs`/`decodeMs` additionally require a first content token). `TurnDoneEvent`
+> gained an OPTIONAL `durationMs?` (total turn wall-clock) + OPTIONAL `usage?` (aggregate across
+> steps). `TurnToolResultEvent` gained an OPTIONAL `durationMs?` (tool execution time).
+> - **Durable, replayable metrics** (persisted, keyed per turn): NEW `StepMetrics` + `TurnMetrics`
+> — the persisted counterparts of the live `usage` + `step-complete` + `done` packets. Served by
+> `GET /conversations/:id/metrics` (see `transport-contract.reference.md`). Build the SAME
+> `TurnMetrics` shape from the live events for the in-flight turn; the durable endpoint supplies it
+> for sealed turns. TPS is derived (`usage.outputTokens / (genTotalMs / 1000)`), not on the wire.
+> - **0.2.0 (still current — step grouping):** `ToolCallChunk`/`ToolResultChunk` carry an OPTIONAL
+> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` carry a REQUIRED `stepId: StepId`.
+> Group batched/parallel tool calls by `stepId` equality. Live: read `event.stepId`. Replay: read
+> `storedChunk.chunk.stepId` (NOT the envelope; tolerate absence). `StoredChunk` envelope is
+> UNCHANGED (`{ seq, role, chunk }` — carries NO `turnId`).
```ts
/**
@@ -168,6 +182,47 @@ export interface Usage {
readonly cacheWriteTokens?: number;
}
+// ─── Persisted metrics ───────────────────────────────────────────────────────
+
+/**
+ * Durable per-step metrics for a completed step — the persisted, replayable
+ * counterpart of the live `usage` + `step-complete` events. Combines the step's
+ * token usage with its generation timing so a client reopening a past
+ * conversation renders the same per-step token/latency breakdown it would have
+ * seen live. Built from the turn's events, stored by `conversation-store`, and
+ * served by `GET /conversations/:id/metrics`.
+ */
+export interface StepMetrics {
+ readonly stepId: StepId;
+ /** The step's token usage (all four counters; cache fields optional per `Usage`). */
+ readonly usage: Usage;
+ /** Time to first token (stream start → first text/reasoning delta). Optional — see `TurnStepCompleteEvent.ttftMs`. */
+ readonly ttftMs?: number;
+ /** Decode time (first token → stream end). Optional — see `TurnStepCompleteEvent.decodeMs`. */
+ readonly decodeMs?: number;
+ /** Total generation time for the step (stream start → stream end). Optional: present only when a clock was available. */
+ readonly genTotalMs?: number;
+}
+
+/**
+ * Durable per-turn metrics for a completed (sealed) turn — the persisted,
+ * replayable counterpart of the live `done` event's aggregate `usage` +
+ * `durationMs`, plus the per-step breakdown. `usage` is the aggregate across all
+ * steps; `steps` carries each step's `StepMetrics` in step order. Stored by
+ * `conversation-store` keyed by `turnId` and served by
+ * `GET /conversations/:id/metrics`. (`turnId` is the plain wire string carried
+ * on every `AgentEvent`, the join key to the live stream.)
+ */
+export interface TurnMetrics {
+ readonly turnId: string;
+ /** Aggregate token usage across all steps in the turn. */
+ readonly usage: Usage;
+ /** Total wall-clock duration of the turn (turn start → turn end). Optional: present only when a clock was available. */
+ readonly durationMs?: number;
+ /** Per-step metrics in step order. */
+ readonly steps: readonly StepMetrics[];
+}
+
// ─── Outward events ─────────────────────────────────────────────────────────
/**
@@ -183,6 +238,7 @@ export type AgentEvent =
| TurnToolResultEvent
| TurnToolOutputEvent
| TurnUsageEvent
+ | TurnStepCompleteEvent
| TurnErrorEvent
| TurnDoneEvent
| TurnSealedEvent;
@@ -251,6 +307,12 @@ export interface TurnToolResultEvent {
readonly toolName: string;
readonly content: string;
readonly isError: boolean;
+ /**
+ * How long the tool took to execute (dispatch → result), in milliseconds —
+ * the backend's authoritative execution time, distinct from any client-side
+ * wall-clock. Optional: present only when the runtime was given a clock.
+ */
+ readonly durationMs?: number;
}
/** Streaming output from a tool execution (e.g. shell stdout/stderr). */
@@ -268,9 +330,43 @@ export interface TurnUsageEvent {
readonly type: "usage";
readonly conversationId: string;
readonly turnId: string;
+ /**
+ * The step this usage report belongs to, so a consumer can attribute tokens
+ * per step (and join with the matching `step-complete` timing by `stepId`).
+ * Optional: absent when the runtime had no step context, and on usage emitted
+ * before this field existed.
+ */
+ readonly stepId?: StepId;
readonly usage: Usage;
}
+/**
+ * A step (one LLM round-trip) has completed — the authoritative per-step metrics
+ * packet, emitted once at the step's end (after the generation stream finishes),
+ * so its timing is final (unlike `usage`, which may arrive mid-stream). Carries
+ * the step's generation timing; join to the step's tokens via `stepId` on the
+ * `usage` event. All timing fields are optional: present only when the runtime
+ * was given a clock, and `ttftMs`/`decodeMs` additionally require that a first
+ * content token (text or reasoning) was observed this step.
+ */
+export interface TurnStepCompleteEvent {
+ readonly type: "step-complete";
+ readonly conversationId: string;
+ readonly turnId: string;
+ readonly stepId: StepId;
+ /** Time to first token: stream start → first text/reasoning delta. */
+ readonly ttftMs?: number;
+ /** Decode time: first token → stream end (generation total − TTFT). */
+ readonly decodeMs?: number;
+ /**
+ * Total generation time for the step: stream start → stream end. Present
+ * whenever a clock was available, even if no first token was seen (in which
+ * case `ttftMs`/`decodeMs` are absent). When a first token was seen,
+ * `genTotalMs === ttftMs + decodeMs`.
+ */
+ readonly genTotalMs?: number;
+}
+
/** An error occurred during the turn. */
export interface TurnErrorEvent {
readonly type: "error";
@@ -286,6 +382,17 @@ export interface TurnDoneEvent {
readonly conversationId: string;
readonly turnId: string;
readonly reason: string;
+ /**
+ * Total wall-clock duration of the turn (turn start → turn end), in
+ * milliseconds. Optional: present only when the runtime was given a clock.
+ */
+ readonly durationMs?: number;
+ /**
+ * Aggregate token usage across all steps in the turn — a convenience total so
+ * a consumer need not sum the per-step `usage` events. Optional (absent if the
+ * provider reported no usage).
+ */
+ readonly usage?: Usage;
}
/**
diff --git a/GLOSSARY.md b/GLOSSARY.md
index 2d25fd3..538ba7e 100644
--- a/GLOSSARY.md
+++ b/GLOSSARY.md
@@ -15,6 +15,10 @@
| **AgentEvent** | An outward event the runtime emits during a turn (text-delta, tool-call, usage, done, turn-sealed, …). | — |
| **model name** | The selectable id in `<credentialName>/<model>` form. | model id, model reference |
| **model catalog** | The list of available model names. | model list |
+| **turn metrics** | The durable, replayable per-turn metrics record for a sealed turn: aggregate `Usage` (tokens) + turn `durationMs` + its per-step `StepMetrics` (`TurnMetrics`). Persisted backend-side keyed by `turnId`, served by `GET /conversations/:id/metrics`. The persisted counterpart of the live `done` event's metrics; the FE folds the SAME shape from the live `usage`/`step-complete`/`done` events for the in-flight turn. | usage record, turn stats |
+| **step metrics** | The durable per-step metrics within a `TurnMetrics`: the step's `Usage` (tokens) + `ttftMs`/`decodeMs`/`genTotalMs` timing, keyed by `stepId` (`StepMetrics`). The persisted counterpart of the live `usage` + `step-complete` events. | step stats |
+| **TTFT** (time to first token) | Per-step latency: generation stream start → first content token (text or reasoning). One per step (each step re-prefills). On the wire as `step-complete.ttftMs` / `StepMetrics.ttftMs` (optional). | time-to-first-byte |
+| **decode time** | Per-step generation time after the first token (first token → stream end = `genTotalMs − ttftMs`). On the wire as `step-complete.decodeMs` / `StepMetrics.decodeMs` (optional). | — |
## Frontend-specific
| Term | Meaning | Aliases to avoid |
@@ -29,3 +33,5 @@
| **feature module** | A self-contained FE feature (chat, history explorer, …); feature-as-a-library, composed at the root. | — |
| **composition root** | The single place (`src/app/`) that imports + wires feature modules + the surface host. | — |
| **surface interpreter** | The generic renderer: field kind → component. Knows kinds, never surface ids. | — |
+| **metrics bubble** | The FE chat element that renders a turn's **turn metrics** (one per-turn total) and **step metrics** (one per step) as muted system-style bubbles at a turn's tail. UI presentation of `TurnMetrics`/`StepMetrics`; never a surface. | telemetry bubble, usage bubble, stats bubble |
+| **TPS** (tokens per second) | A FE-DERIVED decode rate: `outputTokens / (decodeMs / 1000)` (per step; per turn over Σ `decodeMs`), falling back to `genTotalMs` when `decodeMs` is absent. The backend-recommended basis (excludes first-token latency). Not carried on the wire; omitted when timing is absent. | throughput |
diff --git a/backend-handoff.md b/backend-handoff.md
index b0b0494..df6a618 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,93 +5,33 @@
> **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
> `lsp` does NOT span the repos (ORCHESTRATOR §5) — every cross-repo ask flows through here.
-_Last updated: 2026-06-06 — Slice 3 (tabs + model selector + DaisyUI) FE-complete; no new backend asks._
+_Last updated: 2026-06-10 — metrics display slice FE-complete + live-verified (probe 17/17). No open backend asks._
---
-## 1. Current FE status
+## 1. Pinned backend contracts (consumed by the FE)
-| Slice | State |
-|---|---|
-| **Slice 1** — surface system + WS + composition root | ✅ DONE, committed, green. |
-| **Slice 2** — conversation transcript: cache + delta streaming (design §6) | ✅ DONE + **LIVE-VERIFIED** — live e2e probe **9/9** against `bin/up` (see §6). |
-| **Slice 3** — tabs (multi-conversation) + model selector + DaisyUI/dracula | ✅ FE-COMPLETE — svelte-check 0/0, **281 vitest**, biome clean, build ok. Per-tab chat stores, one WS routed by `conversationId`, local-forget on tab close, tabs persisted to localStorage. No backend change needed. |
-
-**Slice 2 units built** (all pure-core / injected-shell, single-owner): `core/chunks` (the one
-transcript reducer) · `core/wire` (contract-conformance drift guard) · `adapters/ws` (now multiplexes
-`chat.send`/`chat.delta` on the one socket) · `features/conversation-cache` (pure reconcile/evict +
-`ConversationChunkStore` port) · `adapters/idb` (IndexedDB impl) · `features/chat` (runes view-model
-+ `ChatView`/`Composer`) · `app` (one socket for surface+chat, host-relative HTTP `:24203` history
-sync, IndexedDB cache, renders the chat). Consumes ONLY the pinned `@0.1.0` contracts — no backend
-change was needed.
-
-## 2. Pinned backend contracts (consumed by the FE)
-
-All three pinned as `file:` deps at **`@0.1.0`** and live-verified consumable (import smoke-test passes):
+Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
| Package | Used for |
|---|---|
-| `@dispatch/ui-contract` | surfaces + surface WS protocol (Slice 1) |
-| `@dispatch/wire` | chat wire types: `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage` |
-| `@dispatch/transport-contract` | HTTP endpoints + `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse` + WS chat ops + unified `WsClientMessage`/`WsServerMessage` |
-
-Backend endpoints in use (port **24203** HTTP, **24205** WS, CORS wildcard `*` — all confirmed live):
-`POST /chat` (NDJSON), `GET /models`, `GET /conversations/:id?sinceSeq=<n>`, WS `chat.send`→`chat.delta`.
-Confirmed invariants C1–C4 (raw seq-ordered history slice · one path-agnostic WS multiplexing surface+chat · `turn-sealed` fires post-persist = cache-commit · live deltas carry no `seq`).
-
-Mirrored in-repo for headless agents: `.dispatch/ui-contract.reference.md`, `.dispatch/wire.reference.md`,
-`.dispatch/transport-contract.reference.md` (regenerated on any contract bump).
-
-## 3. Open items FOR THE BACKEND
+| `@dispatch/ui-contract` | surfaces + surface WS protocol |
+| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs` |
+| `@dispatch/transport-contract` | `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + WS chat ops + `WsClientMessage`/`WsServerMessage` |
-### 3.1 Resolved / answered
-- ✅ Wire-types split, per-chunk `seq`, history endpoint, WS chat multiplexing, CORS — all delivered
- (backend commit `812621c`).
+Endpoints in use (HTTP **24203**, WS **24205**, CORS `*`):
+`POST /chat` (NDJSON) · `GET /models` · `GET /conversations/:id?sinceSeq=<n>` ·
+`GET /conversations/:id/metrics` · WS `chat.send`→`chat.delta`.
-### 3.2a Finding — model is NOT persisted/exposed per conversation (FE handled it)
-`model` is a per-turn `ChatRequest` field only; `session-orchestrator` resolves it per `handleMessage`
-and never stores it, and `conversation-store`/`ConversationHistoryResponse` carry no model. So the FE
-**persists the selected model per tab** (localStorage). No action needed. OPTIONAL future nicety: if
-you ever persist + expose a per-conversation "last model" (e.g. on the `GET /conversations` list when
-it lands), the FE could seed a reopened tab's model from the server instead of localStorage.
+Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-contract}.reference.md`
+(regenerate on any contract bump).
-### 3.2 FYI — non-blocking gotcha (no action required unless you publish externally)
-- **`workspace:*` breaks external `file:` consumption under bun.** `transport-contract`'s deps are
- `@dispatch/ui-contract`/`@dispatch/wire` at `workspace:*`; `bun install` from dispatch-web could not
- resolve them ("Workspace dependency not found"). **Worked around FE-side** with a `package.json`
- `overrides` block mapping both to their `file:` paths — no backend change needed now. If you ever
- publish these to a registry, prefer real semver ranges over `workspace:*` for out-of-monorepo
- consumers.
+## 2. Open asks FOR THE BACKEND
-### 3.3 Pending asks / roadblocks
-- _(none open)_ — Slice 2 needed no backend change. One coordination item below (§6).
+- _(none open)_
-## 6. LIVE end-to-end probe — DONE ✅ (9/9, against `bin/up`)
+## 3. Likely NEXT backend asks (heads-up, not yet requested)
-Ran `bun scripts/live-probe.ts` (drives the FE's REAL network-facing stack — `adapters/ws` socket,
-`core/chunks` reducer, `conversation-cache` + `adapters/idb`, and the HTTP history endpoint — against
-the running backend). **All 9 checks passed:**
-- one WS (`:24205`) delivered the surface `catalog` AND the chat stream;
-- `chat.send` → ~33 `chat.delta` events (incl. `text-delta`) → folded to the expected assistant text
- → `turn-sealed`;
-- post-seal `GET :24203/conversations/:id?sinceSeq=0` → 3 seq-monotonic `StoredChunk`s
- (`latestSeq=3`); `applyHistory` superseded the provisional turn (`sealedTurnId` cleared);
-- IndexedDB cache persisted the sealed turn; committed transcript shows the assistant text.
-
-**No backend mismatch found — every confirmed invariant (C1–C4) held live.** One FE-internal note
-(not a backend matter): the idb adapter relies on the global `IDBKeyRange` (fine in a browser; the
-probe needed `fake-indexeddb/auto` to supply it under Bun).
-
-Also caught + fixed during browser bring-up (FE-only bug, not backend): a BLANK page on plain-HTTP
-non-localhost origins (`http://arch-razer:24204`) because `crypto.randomUUID()` is secure-context-only
-— now replaced with a `getRandomValues`-based fallback.
-
-## 4. (history) Slice 2 unit map — delivered, see §1.
-
-## 5. Likely NEXT backend asks (heads-up, not yet requested)
-
-These belong to **later** FE slices (design §7 "later slice") — flagged early so they're on your radar:
-- `GET /conversations` — conversation list / sidebar (FE history explorer / conversation switcher).
+- `GET /conversations` — conversation list / sidebar (history explorer / switcher); could also expose a
+ per-conversation "last model" so a reopened tab seeds its model from the server instead of localStorage.
- `POST /conversations/:id/cancel` — "stop generating".
-
-When the FE reaches those slices, the concrete request will be filed here in §3.3.
diff --git a/scripts/live-probe.ts b/scripts/live-probe.ts
index 2c4dfb9..2b2880b 100644
--- a/scripts/live-probe.ts
+++ b/scripts/live-probe.ts
@@ -30,6 +30,7 @@ import type {
ChatDeltaMessage,
ChatErrorMessage,
ConversationHistoryResponse,
+ ConversationMetricsResponse,
} from "@dispatch/transport-contract";
import type { SurfaceServerMessage } from "@dispatch/ui-contract";
import { createIdbChunkStore } from "../src/adapters/idb/index.ts";
@@ -43,6 +44,13 @@ import {
selectMessages,
type TranscriptState,
} from "../src/core/chunks/index.ts";
+import {
+ applyDurableMetrics,
+ foldMetricsEvent,
+ initialMetricsState,
+ type MetricsState,
+ selectOrderedTurnMetrics,
+} from "../src/core/metrics/index.ts";
import { createConversationCache } from "../src/features/conversation-cache/index.ts";
const WS_URL = process.env.PROBE_WS ?? "ws://localhost:24205";
@@ -74,6 +82,15 @@ async function historySync(id: string, sinceSeq: number): Promise<ConversationHi
return (await res.json()) as ConversationHistoryResponse;
}
+/** Durable metrics fetch — returns the response, or the HTTP status when not OK
+ * (the endpoint is being implemented backend-side; the FE tolerates a 404). */
+async function metricsSync(id: string): Promise<ConversationMetricsResponse | { status: number }> {
+ const url = `${HTTP_BASE}/conversations/${encodeURIComponent(id)}/metrics`;
+ const res = await fetch(url, { headers: { Origin: "http://localhost:24204" } });
+ if (!res.ok) return { status: res.status };
+ return (await res.json()) as ConversationMetricsResponse;
+}
+
type ChatMsg = ChatDeltaMessage | ChatErrorMessage;
type Socket = ReturnType<typeof createSurfaceSocket>;
@@ -87,8 +104,15 @@ async function runTurn(
socket: Socket,
conversationId: string,
prompt: string,
-): Promise<{ state: TranscriptState; deltas: number; sealed: boolean; error: string | null }> {
+): Promise<{
+ state: TranscriptState;
+ metrics: MetricsState;
+ deltas: number;
+ sealed: boolean;
+ error: string | null;
+}> {
let state = initialState();
+ let metrics = initialMetricsState();
let deltas = 0;
let sealed = false;
let error: string | null = null;
@@ -102,6 +126,7 @@ async function runTurn(
}
deltas++;
state = foldEvent(state, msg.event);
+ metrics = foldMetricsEvent(metrics, msg.event);
if (msg.event.type === "turn-sealed") {
sealed = true;
done.resolve();
@@ -113,7 +138,7 @@ async function runTurn(
await done.promise;
clearTimeout(timeout);
handlers.delete(conversationId);
- return { state, deltas, sealed, error };
+ return { state, metrics, deltas, sealed, error };
}
function toolChunksOf(state: TranscriptState) {
@@ -178,6 +203,58 @@ async function main() {
.join("");
record("turn 1 committed transcript has assistant text", committedText.length > 0);
+ // ─── Metrics: LIVE token + timing ([email protected] usage/step-complete/done) ──────
+ const liveTurns = selectOrderedTurnMetrics(t1.metrics);
+ const m1 = liveTurns[0];
+ record(
+ "turn 1 LIVE metrics: a turn with output tokens",
+ m1 !== undefined && m1.usage.outputTokens > 0,
+ m1
+ ? `in=${m1.usage.inputTokens} out=${m1.usage.outputTokens} steps=${m1.steps.length}`
+ : "no turn",
+ );
+ if (m1 !== undefined) {
+ const anyGen = m1.steps.some((s) => s.genTotalMs !== undefined);
+ const anyTtft = m1.steps.some((s) => s.ttftMs !== undefined);
+ note(
+ `live timing: durationMs=${m1.durationMs ?? "—"}, ` +
+ `genTotalMs present=${anyGen}, ttftMs present=${anyTtft}`,
+ );
+ record(
+ "turn 1 LIVE metrics carries timing (durationMs or step genTotalMs)",
+ m1.durationMs !== undefined || anyGen,
+ "requires the backend runtime to have a clock",
+ );
+ }
+
+ // ─── Metrics: DURABLE endpoint (GET /conversations/:id/metrics) ──────────────
+ const dm = await metricsSync(textConv);
+ if ("status" in dm) {
+ note(
+ `durable /metrics not available yet (HTTP ${dm.status}) — FE degrades to live-only, as designed`,
+ );
+ record(
+ "durable /metrics is implemented OR gracefully absent (404)",
+ dm.status === 404 || dm.status === 405,
+ `HTTP ${dm.status}`,
+ );
+ } else {
+ record(
+ "durable /metrics returned TurnMetrics[]",
+ Array.isArray(dm.turns),
+ `${dm.turns.length} turn(s)`,
+ );
+ const durableMerged = selectOrderedTurnMetrics(
+ applyDurableMetrics(initialMetricsState(), dm.turns),
+ );
+ const d1 = durableMerged[0];
+ record(
+ "durable /metrics turn has token usage",
+ d1 !== undefined && d1.usage.outputTokens > 0,
+ d1 ? `out=${d1.usage.outputTokens} steps=${d1.steps.length}` : "no turn",
+ );
+ }
+
// ─── Turn 2: tool-call batching ([email protected] stepId) ─────────────────────────
console.log(`\n[live-probe] TURN 2 (tools): "${TOOL_PROMPT}"`);
const toolConv = crypto.randomUUID();
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 61b4cb9..857a1e5 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -62,7 +62,7 @@
<div class="flex-1 overflow-y-auto">
{#key store.activeConversationId}
- <ChatView chunks={store.activeChat.chunks} />
+ <ChatView chunks={store.activeChat.chunks} turnMetrics={store.activeChat.turnMetrics} />
{/key}
</div>
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index 760c390..fe3c55c 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -2,6 +2,7 @@ import type {
ChatDeltaMessage,
ChatErrorMessage,
ConversationHistoryResponse,
+ ConversationMetricsResponse,
ModelsResponse,
} from "@dispatch/transport-contract";
import type { SurfaceServerMessage, SurfaceSpec } from "@dispatch/ui-contract";
@@ -17,7 +18,7 @@ import {
subscribe as protocolSubscribe,
unsubscribe as protocolUnsubscribe,
} from "../core/protocol";
-import type { ChatStore } from "../features/chat";
+import type { ChatStore, MetricsSync } from "../features/chat";
import { createChatStore } from "../features/chat";
import type { ConversationCache } from "../features/conversation-cache";
import { createConversationCache } from "../features/conversation-cache";
@@ -73,6 +74,15 @@ function createHistorySync(
};
}
+function createMetricsSync(httpBase: string, fetchImpl: typeof fetch): MetricsSync {
+ return async (conversationId: string) => {
+ const url = `${httpBase}/conversations/${encodeURIComponent(conversationId)}/metrics`;
+ const res = await fetchImpl(url);
+ if (!res.ok) return { turns: [] };
+ return (await res.json()) as ConversationMetricsResponse;
+ };
+}
+
export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
let protocol = $state<ProtocolState>(protocolInitialState());
let selectedId = $state<string | null>(null);
@@ -112,6 +122,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
);
const historySync = createHistorySync(httpBase, fetchImpl);
+ const metricsSync = createMetricsSync(httpBase, fetchImpl);
const chatStores = new Map<string, ChatStore>();
@@ -125,6 +136,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
},
},
historySync,
+ metricsSync,
cache,
});
}
diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts
new file mode 100644
index 0000000..9881e50
--- /dev/null
+++ b/src/core/metrics/format.test.ts
@@ -0,0 +1,199 @@
+import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import { computeTps, viewStepMetrics, viewTurnMetrics } from "./format";
+
+describe("computeTps", () => {
+ it("null when elapsed missing", () => {
+ expect(computeTps(100, undefined)).toBeNull();
+ });
+
+ it("null when elapsed is zero", () => {
+ expect(computeTps(100, 0)).toBeNull();
+ });
+
+ it("null when elapsed is negative", () => {
+ expect(computeTps(100, -100)).toBeNull();
+ });
+
+ it("computes tokens per second", () => {
+ expect(computeTps(1000, 2000)).toBe(500);
+ });
+
+ it("computes fractional tps", () => {
+ expect(computeTps(100, 3000)).toBeCloseTo(33.33, 1);
+ });
+});
+
+describe("viewStepMetrics", () => {
+ it("formats tokens with thousands separator, tps, and durations", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 1234, outputTokens: 567 },
+ ttftMs: 820,
+ decodeMs: 1200,
+ genTotalMs: 2020,
+ };
+ const view = viewStepMetrics(step, 0);
+ expect(view.label).toBe("step 1");
+ expect(view.tokensLabel).toBe("1,801 tok");
+ expect(view.tps).toBe("473 tok/s");
+ expect(view.ttft).toBe("820ms");
+ expect(view.decode).toBe("1.2s");
+ expect(view.genTotal).toBe("2.0s");
+ });
+
+ it("handles missing timing fields", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ };
+ const view = viewStepMetrics(step, 0);
+ expect(view.tps).toBeNull();
+ expect(view.ttft).toBeNull();
+ expect(view.decode).toBeNull();
+ expect(view.genTotal).toBeNull();
+ });
+
+ it("formats duration < 1s as ms", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 10, outputTokens: 5 },
+ ttftMs: 42,
+ };
+ const view = viewStepMetrics(step, 0);
+ expect(view.ttft).toBe("42ms");
+ });
+
+ it("formats duration >= 1s as seconds", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 10, outputTokens: 5 },
+ genTotalMs: 3200,
+ };
+ const view = viewStepMetrics(step, 0);
+ expect(view.genTotal).toBe("3.2s");
+ });
+
+ it("uses step index for label", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 10, outputTokens: 5 },
+ };
+ expect(viewStepMetrics(step, 2).label).toBe("step 3");
+ });
+
+ it("tps uses decodeMs (not genTotalMs)", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ decodeMs: 500,
+ genTotalMs: 800,
+ };
+ const view = viewStepMetrics(step, 0);
+ // 50 / (500/1000) = 100 tok/s, NOT 50/(800/1000)=62.5
+ expect(view.tps).toBe("100 tok/s");
+ });
+
+ it("tps falls back to genTotalMs when decodeMs absent", () => {
+ const step: StepMetrics = {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ genTotalMs: 800,
+ };
+ const view = viewStepMetrics(step, 0);
+ // 50 / (800/1000) = 62.5 → rounds to 63
+ expect(view.tps).toBe("63 tok/s");
+ });
+});
+
+describe("viewTurnMetrics", () => {
+ it("formats total tokens and breakdown", () => {
+ const turn: TurnMetrics = {
+ turnId: "t1",
+ usage: { inputTokens: 1000, outputTokens: 234 },
+ durationMs: 5000,
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 1000, outputTokens: 234 },
+ decodeMs: 3000,
+ genTotalMs: 4000,
+ },
+ ],
+ };
+ const view = viewTurnMetrics(turn);
+ expect(view.tokensLabel).toBe("1,234 tok");
+ expect(view.breakdown).toBe("1,000 in / 234 out");
+ expect(view.tps).toBe("78 tok/s");
+ expect(view.duration).toBe("5.0s");
+ });
+
+ it("breakdown includes cache only when present", () => {
+ const turn: TurnMetrics = {
+ turnId: "t1",
+ usage: { inputTokens: 1000, outputTokens: 234, cacheReadTokens: 500 },
+ steps: [],
+ };
+ const view = viewTurnMetrics(turn);
+ expect(view.breakdown).toBe("1,000 in / 234 out / 500 cache");
+ });
+
+ it("breakdown omits cache when not present", () => {
+ const turn: TurnMetrics = {
+ turnId: "t1",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ steps: [],
+ };
+ const view = viewTurnMetrics(turn);
+ expect(view.breakdown).toBe("100 in / 50 out");
+ });
+
+ it("tps is null when no step has decodeMs or genTotalMs", () => {
+ const turn: TurnMetrics = {
+ turnId: "t1",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ },
+ ],
+ };
+ const view = viewTurnMetrics(turn);
+ expect(view.tps).toBeNull();
+ });
+
+ it("duration is null when durationMs absent", () => {
+ const turn: TurnMetrics = {
+ turnId: "t1",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ steps: [],
+ };
+ const view = viewTurnMetrics(turn);
+ expect(view.duration).toBeNull();
+ });
+
+ it("sums decodeMs across steps (fallback genTotalMs per step) for tps", () => {
+ const turn: TurnMetrics = {
+ turnId: "t1",
+ usage: { inputTokens: 300, outputTokens: 150 },
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ decodeMs: 800,
+ genTotalMs: 1000,
+ },
+ {
+ stepId: "s2" as StepId,
+ usage: { inputTokens: 200, outputTokens: 100 },
+ genTotalMs: 2000,
+ },
+ ],
+ };
+ const view = viewTurnMetrics(turn);
+ // step1 uses decodeMs=800, step2 falls back to genTotalMs=2000 → total=2800ms
+ // 150 / (2800/1000) = 53.57 → rounds to 54
+ expect(view.tps).toBe("54 tok/s");
+ });
+});
diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts
new file mode 100644
index 0000000..3a4078c
--- /dev/null
+++ b/src/core/metrics/format.ts
@@ -0,0 +1,69 @@
+import type { StepMetrics, TurnMetrics, Usage } from "@dispatch/wire";
+import type { StepMetricsView, TurnMetricsView } from "./types";
+
+function formatTokens(n: number): string {
+ return n.toLocaleString("en-US");
+}
+
+function formatDuration(ms: number | undefined): string | null {
+ if (ms === undefined || ms <= 0) return null;
+ if (ms < 1000) return `${Math.round(ms)}ms`;
+ return `${(ms / 1000).toFixed(1)}s`;
+}
+
+function formatTps(tps: number | null): string | null {
+ if (tps === null) return null;
+ if (tps < 10) return `${tps.toFixed(1)} tok/s`;
+ return `${Math.round(tps)} tok/s`;
+}
+
+/** Compute tokens-per-second. Returns null when elapsed time is absent or zero. */
+export function computeTps(outputTokens: number, elapsedMs: number | undefined): number | null {
+ if (elapsedMs === undefined || elapsedMs <= 0) return null;
+ return outputTokens / (elapsedMs / 1000);
+}
+
+function totalTokens(u: Usage): number {
+ return u.inputTokens + u.outputTokens;
+}
+
+function formatBreakdown(u: Usage): string {
+ let s = `${formatTokens(u.inputTokens)} in / ${formatTokens(u.outputTokens)} out`;
+ if (u.cacheReadTokens !== undefined && u.cacheReadTokens > 0) {
+ s += ` / ${formatTokens(u.cacheReadTokens)} cache`;
+ }
+ return s;
+}
+
+/** Build a formatted view of a single step's metrics. */
+export function viewStepMetrics(step: StepMetrics, index: number): StepMetricsView {
+ const total = totalTokens(step.usage);
+ const tps = computeTps(step.usage.outputTokens, step.decodeMs ?? step.genTotalMs);
+ return {
+ label: `step ${index + 1}`,
+ tokensLabel: `${formatTokens(total)} tok`,
+ tps: formatTps(tps),
+ ttft: formatDuration(step.ttftMs),
+ decode: formatDuration(step.decodeMs),
+ genTotal: formatDuration(step.genTotalMs),
+ };
+}
+
+/** Build a formatted view of a turn's aggregate metrics. */
+export function viewTurnMetrics(turn: TurnMetrics): TurnMetricsView {
+ const total = totalTokens(turn.usage);
+ let totalGenMs: number | undefined;
+ for (const step of turn.steps) {
+ const stepMs = step.decodeMs ?? step.genTotalMs;
+ if (stepMs !== undefined) {
+ totalGenMs = (totalGenMs ?? 0) + stepMs;
+ }
+ }
+ const tps = computeTps(turn.usage.outputTokens, totalGenMs);
+ return {
+ tokensLabel: `${formatTokens(total)} tok`,
+ breakdown: formatBreakdown(turn.usage),
+ tps: formatTps(tps),
+ duration: formatDuration(turn.durationMs),
+ };
+}
diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts
new file mode 100644
index 0000000..72d825d
--- /dev/null
+++ b/src/core/metrics/index.ts
@@ -0,0 +1,17 @@
+export { computeTps, viewStepMetrics, viewTurnMetrics } from "./format";
+export { interleaveTurnMetrics } from "./place";
+export {
+ applyDurableMetrics,
+ foldMetricsEvent,
+ initialMetricsState,
+ selectOrderedTurnMetrics,
+} from "./reducer";
+export type {
+ MetricsRow,
+ MetricsState,
+ StepMetrics,
+ StepMetricsView,
+ TurnMetrics,
+ TurnMetricsEntry,
+ TurnMetricsView,
+} from "./types";
diff --git a/src/core/metrics/place.test.ts b/src/core/metrics/place.test.ts
new file mode 100644
index 0000000..b6cb877
--- /dev/null
+++ b/src/core/metrics/place.test.ts
@@ -0,0 +1,469 @@
+import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import type { RenderGroup } from "../chunks";
+import { interleaveTurnMetrics } from "./place";
+import type { TurnMetricsEntry } from "./types";
+
+function userGroup(seq: number, text: string): RenderGroup {
+ return {
+ kind: "single",
+ chunk: {
+ seq,
+ role: "user",
+ chunk: { type: "text", text },
+ provisional: false,
+ },
+ };
+}
+
+function assistantGroup(seq: number, text: string): RenderGroup {
+ return {
+ kind: "single",
+ chunk: {
+ seq,
+ role: "assistant",
+ chunk: { type: "text", text },
+ provisional: false,
+ },
+ };
+}
+
+function toolCallGroup(seq: number, stepId: string, toolCallId: string): RenderGroup {
+ return {
+ kind: "single",
+ chunk: {
+ seq,
+ role: "assistant",
+ chunk: {
+ type: "tool-call",
+ toolCallId,
+ toolName: "test",
+ input: {},
+ stepId: stepId as StepId,
+ },
+ provisional: false,
+ },
+ };
+}
+
+function toolResultGroup(seq: number, stepId: string, toolCallId: string): RenderGroup {
+ return {
+ kind: "single",
+ chunk: {
+ seq,
+ role: "tool",
+ chunk: {
+ type: "tool-result",
+ toolCallId,
+ toolName: "test",
+ content: "",
+ isError: false,
+ stepId: stepId as StepId,
+ },
+ provisional: false,
+ },
+ };
+}
+
+function toolBatchGroup(stepId: string, toolCallIds: string[]): RenderGroup {
+ return {
+ kind: "tool-batch",
+ stepId,
+ entries: toolCallIds.map((id) => ({
+ call: {
+ type: "tool-call" as const,
+ toolCallId: id,
+ toolName: "test",
+ input: {},
+ stepId: stepId as StepId,
+ },
+ result: null,
+ })),
+ provisional: false,
+ };
+}
+
+function makeStep(stepId: string, inputTokens: number, outputTokens: number): StepMetrics {
+ return {
+ stepId: stepId as StepId,
+ usage: { inputTokens, outputTokens },
+ };
+}
+
+function makeTurn(
+ turnId: string,
+ inputTokens: number,
+ outputTokens: number,
+ steps: StepMetrics[] = [],
+): TurnMetrics {
+ return {
+ turnId,
+ usage: { inputTokens, outputTokens },
+ steps,
+ };
+}
+
+function makeEntry(
+ turnId: string,
+ inputTokens: number,
+ outputTokens: number,
+ steps: StepMetrics[] = [],
+): TurnMetricsEntry {
+ return {
+ turnId,
+ steps,
+ total: makeTurn(turnId, inputTokens, outputTokens, steps),
+ };
+}
+
+function makeProgressiveEntry(turnId: string, steps: StepMetrics[]): TurnMetricsEntry {
+ return {
+ turnId,
+ steps,
+ total: null,
+ };
+}
+
+function expectGroupAt(
+ rows: readonly { readonly kind: string }[],
+ index: number,
+ expected: RenderGroup,
+): void {
+ const row = rows[index];
+ expect(row?.kind).toBe("group");
+ expect((row as { readonly group: RenderGroup } | undefined)?.group).toBe(expected);
+}
+
+function expectStepMetricsAt(
+ rows: readonly { readonly kind: string }[],
+ index: number,
+ expectedStepId: string,
+ expectedIndex: number,
+): void {
+ const row = rows[index];
+ expect(row?.kind).toBe("step-metrics");
+ const sm = row as { readonly step: StepMetrics; readonly index: number } | undefined;
+ expect(sm?.step.stepId).toBe(expectedStepId);
+ expect(sm?.index).toBe(expectedIndex);
+}
+
+function expectTurnMetricsAt(
+ rows: readonly { readonly kind: string }[],
+ index: number,
+ expectedTurnId: string,
+): void {
+ const row = rows[index];
+ expect(row?.kind).toBe("turn-metrics");
+ expect((row as { readonly turn: TurnMetrics } | undefined)?.turn.turnId).toBe(expectedTurnId);
+}
+
+describe("interleaveTurnMetrics", () => {
+ it("no metrics: rows are all groups, unchanged order", () => {
+ const g1 = userGroup(1, "q");
+ const g2 = assistantGroup(2, "a");
+ const rows = interleaveTurnMetrics([g1, g2], []);
+ expect(rows).toHaveLength(2);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ });
+
+ it("head-aligned: segment i gets entries[i]", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const g3 = userGroup(3, "q2");
+ const g4 = assistantGroup(4, "a2");
+ const step1 = makeStep("s1", 100, 50);
+ const step2 = makeStep("s2", 200, 80);
+ const rows = interleaveTurnMetrics(
+ [g1, g2, g3, g4],
+ [makeEntry("t1", 100, 50, [step1]), makeEntry("t2", 200, 80, [step2])],
+ );
+
+ expect(rows).toHaveLength(8);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ expectGroupAt(rows, 4, g3);
+ expectGroupAt(rows, 5, g4);
+ expectStepMetricsAt(rows, 6, "s2", 0);
+ expectTurnMetricsAt(rows, 7, "t2");
+ });
+
+ it("a trailing segment with no entry (in-flight turn) renders no metrics", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const g3 = userGroup(3, "q2");
+ const g4 = assistantGroup(4, "a2");
+ const step = makeStep("s1", 100, 50);
+ const rows = interleaveTurnMetrics([g1, g2, g3, g4], [makeEntry("t1", 100, 50, [step])]);
+
+ expect(rows).toHaveLength(6);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ expectGroupAt(rows, 4, g3);
+ expectGroupAt(rows, 5, g4);
+ });
+
+ it("single text-only turn: step row + turn-metrics both at tail", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const step = makeStep("s1", 100, 50);
+ const turn = makeEntry("t1", 100, 50, [step]);
+ const rows = interleaveTurnMetrics([g1, g2], [turn]);
+
+ expect(rows).toHaveLength(4);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ });
+
+ it("tool step anchors inline after its tool-batch group", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolBatchGroup("t#0", ["c1", "c2"]);
+ const g3 = assistantGroup(3, "a1");
+ const step0 = makeStep("t#0", 100, 50);
+ const step1 = makeStep("t#1", 200, 80);
+ const turn = makeEntry("t1", 300, 130, [step0, step1]);
+ const rows = interleaveTurnMetrics([g1, g2, g3], [turn]);
+
+ expect(rows).toHaveLength(6);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "t#0", 0);
+ expectGroupAt(rows, 3, g3);
+ expectStepMetricsAt(rows, 4, "t#1", 1);
+ expectTurnMetricsAt(rows, 5, "t1");
+ });
+
+ it("single tool-call group anchors its step", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolCallGroup(2, "s1", "c1");
+ const g3 = assistantGroup(3, "a1");
+ const step = makeStep("s1", 100, 50);
+ const turn = makeEntry("t1", 100, 50, [step]);
+ const rows = interleaveTurnMetrics([g1, g2, g3], [turn]);
+
+ expect(rows).toHaveLength(5);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectGroupAt(rows, 3, g3);
+ expectTurnMetricsAt(rows, 4, "t1");
+ });
+
+ it("single tool-result group anchors its step", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolResultGroup(2, "s1", "c1");
+ const g3 = assistantGroup(3, "a1");
+ const step = makeStep("s1", 100, 50);
+ const turn = makeEntry("t1", 100, 50, [step]);
+ const rows = interleaveTurnMetrics([g1, g2, g3], [turn]);
+
+ expect(rows).toHaveLength(5);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectGroupAt(rows, 3, g3);
+ expectTurnMetricsAt(rows, 4, "t1");
+ });
+
+ it("multi-step: each tool step inline, final step + total at tail", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolBatchGroup("t#0", ["c1"]);
+ const g3 = assistantGroup(2, "thinking");
+ const g4 = toolBatchGroup("t#1", ["c2", "c3"]);
+ const g5 = assistantGroup(3, "a1");
+ const step0 = makeStep("t#0", 100, 50);
+ const step1 = makeStep("t#1", 200, 80);
+ const step2 = makeStep("t#2", 50, 20);
+ const turn = makeEntry("t1", 350, 150, [step0, step1, step2]);
+ const rows = interleaveTurnMetrics([g1, g2, g3, g4, g5], [turn]);
+
+ expect(rows).toHaveLength(9);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "t#0", 0);
+ expectGroupAt(rows, 3, g3);
+ expectGroupAt(rows, 4, g4);
+ expectStepMetricsAt(rows, 5, "t#1", 1);
+ expectGroupAt(rows, 6, g5);
+ expectStepMetricsAt(rows, 7, "t#2", 2);
+ expectTurnMetricsAt(rows, 8, "t1");
+ });
+
+ it("multiple turns head-aligned with inline steps", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolBatchGroup("s1", ["c1"]);
+ const g3 = assistantGroup(2, "a1");
+ const g4 = userGroup(3, "q2");
+ const g5 = assistantGroup(4, "a2");
+ const step1 = makeStep("s1", 100, 50);
+ const step2 = makeStep("s2", 200, 80);
+ const rows = interleaveTurnMetrics(
+ [g1, g2, g3, g4, g5],
+ [makeEntry("t1", 100, 50, [step1]), makeEntry("t2", 200, 80, [step2])],
+ );
+
+ expect(rows).toHaveLength(9);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectGroupAt(rows, 3, g3);
+ expectTurnMetricsAt(rows, 4, "t1");
+ expectGroupAt(rows, 5, g4);
+ expectGroupAt(rows, 6, g5);
+ expectStepMetricsAt(rows, 7, "s2", 0);
+ expectTurnMetricsAt(rows, 8, "t2");
+ });
+
+ it("unanchored step (stepId not in groups) falls back to tail before turn-metrics", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const step0 = makeStep("orphan", 100, 50);
+ const turn = makeEntry("t1", 100, 50, [step0]);
+ const rows = interleaveTurnMetrics([g1, g2], [turn]);
+
+ expect(rows).toHaveLength(4);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "orphan", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ });
+
+ it("fewer metrics than segments: trailing segments are bare", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const g3 = userGroup(3, "q2");
+ const g4 = assistantGroup(4, "a2");
+ const g5 = userGroup(5, "q3");
+ const g6 = assistantGroup(6, "a3");
+ const step = makeStep("s1", 300, 120);
+ const rows = interleaveTurnMetrics(
+ [g1, g2, g3, g4, g5, g6],
+ [makeEntry("t1", 300, 120, [step])],
+ );
+
+ expect(rows).toHaveLength(8);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ expectGroupAt(rows, 4, g3);
+ expectGroupAt(rows, 5, g4);
+ expectGroupAt(rows, 6, g5);
+ expectGroupAt(rows, 7, g6);
+ });
+
+ it("in-flight turn (no durationMs) still produces step + turn rows", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const step = makeStep("s1", 100, 50);
+ const turn: TurnMetricsEntry = {
+ turnId: "t1",
+ steps: [step],
+ total: {
+ turnId: "t1",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ steps: [step],
+ },
+ };
+ const rows = interleaveTurnMetrics([g1, g2], [turn]);
+
+ expect(rows).toHaveLength(4);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ const metricsRow = rows[3] as { readonly turn: TurnMetrics } | undefined;
+ expect(metricsRow?.turn.durationMs).toBeUndefined();
+ });
+
+ it("leading non-turn groups emit as plain group rows", () => {
+ const g0 = assistantGroup(1, "system msg");
+ const g1 = userGroup(2, "q1");
+ const g2 = assistantGroup(3, "a1");
+ const step = makeStep("s1", 100, 50);
+ const rows = interleaveTurnMetrics([g0, g1, g2], [makeEntry("t1", 100, 50, [step])]);
+
+ expect(rows).toHaveLength(5);
+ expectGroupAt(rows, 0, g0);
+ expect(rows[1]?.kind).toBe("group");
+ expect(rows[2]?.kind).toBe("group");
+ expectStepMetricsAt(rows, 3, "s1", 0);
+ expectTurnMetricsAt(rows, 4, "t1");
+ });
+
+ it("more metrics than segments: only T entries placed (extra ignored)", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const step1 = makeStep("s1", 100, 50);
+ const step2 = makeStep("s2", 200, 80);
+ const rows = interleaveTurnMetrics(
+ [g1, g2],
+ [makeEntry("t1", 100, 50, [step1]), makeEntry("t2", 200, 80, [step2])],
+ );
+
+ expect(rows).toHaveLength(4);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectTurnMetricsAt(rows, 3, "t1");
+ });
+
+ it("turn with no steps emits only turn-metrics (no step-metrics)", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const rows = interleaveTurnMetrics([g1, g2], [makeEntry("t1", 100, 50)]);
+
+ expect(rows).toHaveLength(3);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectTurnMetricsAt(rows, 2, "t1");
+ });
+
+ it("progressive: entry with steps but total=null emits step rows and NO turn-metrics row", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolBatchGroup("s1", ["c1"]);
+ const g3 = assistantGroup(2, "a1");
+ const step1 = makeStep("s1", 100, 50);
+ const entry = makeProgressiveEntry("t1", [step1]);
+ const rows = interleaveTurnMetrics([g1, g2, g3], [entry]);
+
+ expect(rows).toHaveLength(4);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectGroupAt(rows, 3, g3);
+ });
+
+ it("entry with total emits step rows + a turn-metrics row", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = toolBatchGroup("s1", ["c1"]);
+ const g3 = assistantGroup(2, "a1");
+ const step1 = makeStep("s1", 100, 50);
+ const entry = makeEntry("t1", 100, 50, [step1]);
+ const rows = interleaveTurnMetrics([g1, g2, g3], [entry]);
+
+ expect(rows).toHaveLength(5);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectGroupAt(rows, 3, g3);
+ expectTurnMetricsAt(rows, 4, "t1");
+ });
+
+ it("progressive multi-step: unanchored steps at tail, no turn-metrics", () => {
+ const g1 = userGroup(1, "q1");
+ const g2 = assistantGroup(2, "a1");
+ const step0 = makeStep("s1", 100, 50);
+ const step1 = makeStep("s2", 200, 80);
+ const entry = makeProgressiveEntry("t1", [step0, step1]);
+ const rows = interleaveTurnMetrics([g1, g2], [entry]);
+
+ expect(rows).toHaveLength(4);
+ expectGroupAt(rows, 0, g1);
+ expectGroupAt(rows, 1, g2);
+ expectStepMetricsAt(rows, 2, "s1", 0);
+ expectStepMetricsAt(rows, 3, "s2", 1);
+ });
+});
diff --git a/src/core/metrics/place.ts b/src/core/metrics/place.ts
new file mode 100644
index 0000000..2481a16
--- /dev/null
+++ b/src/core/metrics/place.ts
@@ -0,0 +1,151 @@
+import type { RenderGroup } from "../chunks";
+import type { MetricsRow, TurnMetricsEntry } from "./types";
+
+function groupStepId(g: RenderGroup): string | undefined {
+ if (g.kind === "tool-batch") return g.stepId;
+ const c = g.chunk.chunk;
+ return c.type === "tool-call" || c.type === "tool-result" ? c.stepId : undefined;
+}
+
+/**
+ * Interleave turn metrics into the rendered transcript.
+ *
+ * Splits groups into per-turn segments: a new segment begins at each `single`
+ * group with `group.chunk.role === "user"`. Head-aligns: segment `i` receives
+ * `entries[i]` (the first `min(K, T)` segments get the first `min(K, T)` entries).
+ *
+ * Within a segment that has an aligned turn entry, each completed step's metrics
+ * are placed INLINE right after the last group bearing that step's `stepId` (tool-call/
+ * tool-result chunks and tool-batch groups carry `stepId`). Steps whose `stepId` does
+ * not appear in any group ("unanchored") fall back to the segment tail, before the
+ * turn-metrics row (if present).
+ *
+ * A `turn-metrics` row is emitted ONLY when `entry.total !== null` (i.e. the turn
+ * is finalized via `done` or durable data). A still-generating turn emits its
+ * completed step rows but NO turn-total row.
+ *
+ * Head-alignment is stable: the durable `/metrics` endpoint returns every
+ * SEALED turn in turn order (a contiguous prefix from turn 0), and we append
+ * only the just-finished live turn — so `entries[i]` is turn `i`, and existing
+ * turns never move when a new turn is appended.
+ */
+export function interleaveTurnMetrics(
+ groups: readonly RenderGroup[],
+ entries: readonly TurnMetricsEntry[],
+): readonly MetricsRow[] {
+ if (entries.length === 0) {
+ return groups.map((g) => ({ kind: "group" as const, group: g }));
+ }
+
+ const segmentStarts: number[] = [];
+ for (let i = 0; i < groups.length; i++) {
+ const g = groups[i];
+ if (g !== undefined && g.kind === "single" && g.chunk.role === "user") {
+ segmentStarts.push(i);
+ }
+ }
+
+ const T = segmentStarts.length;
+
+ if (T === 0) {
+ return groups.map((g) => ({ kind: "group" as const, group: g }));
+ }
+
+ const K = entries.length;
+ const matched = Math.min(K, T);
+
+ // Head-alignment: segment i ↔ entries[i] for i in [0, matched).
+ // A trailing segment with no corresponding entry renders no metrics.
+ const segmentEntries = new Map<number, TurnMetricsEntry>();
+ for (let i = 0; i < matched; i++) {
+ const entry = entries[i];
+ if (entry !== undefined) {
+ segmentEntries.set(i, entry);
+ }
+ }
+
+ const rows: MetricsRow[] = [];
+
+ const firstUserIdx = segmentStarts[0] ?? 0;
+ for (let i = 0; i < firstUserIdx; i++) {
+ const g = groups[i];
+ if (g !== undefined) {
+ rows.push({ kind: "group", group: g });
+ }
+ }
+
+ for (let seg = 0; seg < T; seg++) {
+ const start = segmentStarts[seg] ?? 0;
+ const end = seg + 1 < T ? (segmentStarts[seg + 1] ?? groups.length) : groups.length;
+
+ const entry = segmentEntries.get(seg);
+
+ if (entry === undefined) {
+ for (let i = start; i < end; i++) {
+ const g = groups[i];
+ if (g !== undefined) {
+ rows.push({ kind: "group", group: g });
+ }
+ }
+ continue;
+ }
+
+ // Build anchor map: for each stepId, the LAST group index in this segment.
+ const anchorByStepId = new Map<string, number>();
+ for (let i = start; i < end; i++) {
+ const g = groups[i];
+ if (g === undefined) continue;
+ const sid = groupStepId(g);
+ if (sid !== undefined) {
+ anchorByStepId.set(sid, i);
+ }
+ }
+
+ // Classify each step as anchored (at a group index) or unanchored.
+ const anchored: Map<number, { stepIndex: number; step: (typeof entry.steps)[number] }[]> =
+ new Map();
+ const unanchored: { stepIndex: number; step: (typeof entry.steps)[number] }[] = [];
+
+ for (let i = 0; i < entry.steps.length; i++) {
+ const step = entry.steps[i];
+ if (step === undefined) continue;
+ const anchorGroupIdx = anchorByStepId.get(step.stepId);
+ if (anchorGroupIdx !== undefined) {
+ let arr = anchored.get(anchorGroupIdx);
+ if (arr === undefined) {
+ arr = [];
+ anchored.set(anchorGroupIdx, arr);
+ }
+ arr.push({ stepIndex: i, step });
+ } else {
+ unanchored.push({ stepIndex: i, step });
+ }
+ }
+
+ // Emit groups; after each anchored group, emit its step-metrics rows.
+ for (let i = start; i < end; i++) {
+ const g = groups[i];
+ if (g !== undefined) {
+ rows.push({ kind: "group", group: g });
+ }
+ const stepsHere = anchored.get(i);
+ if (stepsHere !== undefined) {
+ stepsHere.sort((a, b) => a.stepIndex - b.stepIndex);
+ for (const { step, stepIndex } of stepsHere) {
+ rows.push({ kind: "step-metrics", step, index: stepIndex });
+ }
+ }
+ }
+
+ // Segment tail: unanchored steps, then turn-metrics (only when total is present).
+ unanchored.sort((a, b) => a.stepIndex - b.stepIndex);
+ for (const { step, stepIndex } of unanchored) {
+ rows.push({ kind: "step-metrics", step, index: stepIndex });
+ }
+ if (entry.total !== null) {
+ rows.push({ kind: "turn-metrics", turn: entry.total });
+ }
+ }
+
+ return rows;
+}
diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts
new file mode 100644
index 0000000..16c88b3
--- /dev/null
+++ b/src/core/metrics/reducer.test.ts
@@ -0,0 +1,368 @@
+import type { StepId, TurnDoneEvent, TurnStepCompleteEvent, TurnUsageEvent } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import {
+ applyDurableMetrics,
+ foldMetricsEvent,
+ initialMetricsState,
+ selectOrderedTurnMetrics,
+} from "./reducer";
+
+const usageEvent = (
+ turnId: string,
+ inputTokens: number,
+ outputTokens: number,
+ stepId?: string,
+): TurnUsageEvent => {
+ const base = {
+ type: "usage" as const,
+ conversationId: "c1",
+ turnId,
+ usage: { inputTokens, outputTokens },
+ };
+ if (stepId !== undefined) {
+ return { ...base, stepId: stepId as StepId };
+ }
+ return base;
+};
+
+const stepCompleteEvent = (
+ turnId: string,
+ stepId: string,
+ timing: { ttftMs?: number; decodeMs?: number; genTotalMs?: number } = {},
+): TurnStepCompleteEvent => ({
+ type: "step-complete",
+ conversationId: "c1",
+ turnId,
+ stepId: stepId as StepId,
+ ...timing,
+});
+
+const doneEvent = (
+ turnId: string,
+ extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {},
+): TurnDoneEvent => ({
+ type: "done",
+ conversationId: "c1",
+ turnId,
+ reason: "stop",
+ ...extra,
+});
+
+describe("initialMetricsState", () => {
+ it("starts empty", () => {
+ const s = initialMetricsState();
+ expect(s.live.size).toBe(0);
+ expect(s.liveOrder).toEqual([]);
+ expect(s.durable.size).toBe(0);
+ expect(s.durableOrder).toEqual([]);
+ });
+});
+
+describe("foldMetricsEvent", () => {
+ it("folds per-step usage by stepId into a turn", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.steps).toHaveLength(2);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.steps[0]?.usage).toEqual({ inputTokens: 100, outputTokens: 50 });
+ expect(ordered[0]?.steps[1]?.stepId).toBe("s2");
+ expect(ordered[0]?.steps[1]?.usage).toEqual({ inputTokens: 200, outputTokens: 80 });
+ });
+
+ it("folds step-complete timing and merges with same-step usage", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(
+ s,
+ stepCompleteEvent("t1", "s1", { ttftMs: 200, decodeMs: 800, genTotalMs: 1000 }),
+ );
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ const step = ordered[0]?.steps[0];
+ expect(step?.usage).toEqual({ inputTokens: 100, outputTokens: 50 });
+ expect(step?.ttftMs).toBe(200);
+ expect(step?.decodeMs).toBe(800);
+ expect(step?.genTotalMs).toBe(1000);
+ });
+
+ it("step-complete before usage defaults usage to zeros", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 }));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ const step = ordered[0]?.steps[0];
+ expect(step?.usage).toEqual({ inputTokens: 0, outputTokens: 0 });
+ expect(step?.genTotalMs).toBe(500);
+ });
+
+ it("done sets durationMs and aggregate usage", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(
+ s,
+ doneEvent("t1", {
+ durationMs: 5000,
+ usage: { inputTokens: 300, outputTokens: 150 },
+ }),
+ );
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.durationMs).toBe(5000);
+ expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 150 });
+ });
+
+ it("aggregate usage sums steps when done.usage absent", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 });
+ });
+
+ it("aggregate usage includes cache only when a step had cache", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, {
+ type: "usage",
+ conversationId: "c1",
+ turnId: "t1",
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50, cacheReadTokens: 30 },
+ });
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.usage.cacheReadTokens).toBe(30);
+ expect(ordered[0]?.total?.usage.cacheWriteTokens).toBeUndefined();
+ });
+
+ it("tolerates missing clock (no genTotalMs/ttft/decode)", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ const step = ordered[0]?.steps[0];
+ expect(step?.ttftMs).toBeUndefined();
+ expect(step?.decodeMs).toBeUndefined();
+ expect(step?.genTotalMs).toBeUndefined();
+ expect(ordered[0]?.total?.durationMs).toBeUndefined();
+ });
+
+ it("usage without stepId does not create a turn", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(0);
+ });
+
+ it("ignores non-metrics events", () => {
+ const s = initialMetricsState();
+ const next = foldMetricsEvent(s, {
+ type: "status",
+ conversationId: "c1",
+ status: "running",
+ });
+ expect(next).toBe(s);
+ });
+
+ it("preserves first-seen order of steps", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 10, 5, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s2");
+ expect(ordered[0]?.steps[1]?.stepId).toBe("s1");
+ });
+
+ it("preserves first-seen order of turns", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t2", 10, 5, "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 20, 8, "s1"));
+ s = foldMetricsEvent(s, doneEvent("t2"));
+ s = foldMetricsEvent(s, doneEvent("t1"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.turnId).toBe("t2");
+ expect(ordered[1]?.turnId).toBe("t1");
+ });
+});
+
+describe("selectOrderedTurnMetrics", () => {
+ it("durable wins over live by turnId, live-done appended last", () => {
+ let s = initialMetricsState();
+
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, usageEvent("t2", 200, 80, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t2", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t2"));
+
+ s = applyDurableMetrics(s, [
+ {
+ turnId: "t1",
+ usage: { inputTokens: 999, outputTokens: 999 },
+ durationMs: 3000,
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 999, outputTokens: 999 },
+ genTotalMs: 3000,
+ },
+ ],
+ },
+ ]);
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(2);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.total?.usage.inputTokens).toBe(999);
+ expect(ordered[0]?.total?.durationMs).toBe(3000);
+ expect(ordered[1]?.turnId).toBe("t2");
+ expect(ordered[1]?.total?.durationMs).toBeUndefined();
+ });
+
+ it("empty state returns empty", () => {
+ const s = initialMetricsState();
+ expect(selectOrderedTurnMetrics(s)).toEqual([]);
+ });
+
+ it("selectOrderedTurnMetrics: in-flight turn exposes only completed steps and total=null", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 }));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.steps).toHaveLength(1);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.total).toBeNull();
+ });
+
+ it("selectOrderedTurnMetrics: a turn with no complete step and not done is omitted", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(0);
+ });
+
+ it("selectOrderedTurnMetrics: after done, total is present", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 1000 }));
+ s = foldMetricsEvent(s, doneEvent("t1", { durationMs: 2000 }));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.total?.durationMs).toBe(2000);
+ expect(ordered[0]?.steps).toHaveLength(1);
+ });
+
+ it("step-complete marks the step complete", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1", { genTotalMs: 500 }));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.steps).toHaveLength(1);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.steps[0]?.genTotalMs).toBe(500);
+ });
+
+ it("selectOrderedTurnMetrics: durable turn → steps + total present", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ {
+ turnId: "t1",
+ usage: { inputTokens: 300, outputTokens: 150 },
+ durationMs: 5000,
+ steps: [
+ {
+ stepId: "s1" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ genTotalMs: 1000,
+ },
+ {
+ stepId: "s2" as StepId,
+ usage: { inputTokens: 200, outputTokens: 100 },
+ genTotalMs: 2000,
+ },
+ ],
+ },
+ ]);
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered).toHaveLength(1);
+ expect(ordered[0]?.turnId).toBe("t1");
+ expect(ordered[0]?.steps).toHaveLength(2);
+ expect(ordered[0]?.steps[0]?.stepId).toBe("s1");
+ expect(ordered[0]?.steps[1]?.stepId).toBe("s2");
+ expect(ordered[0]?.total?.usage.inputTokens).toBe(300);
+ expect(ordered[0]?.total?.durationMs).toBe(5000);
+ });
+});
+
+describe("applyDurableMetrics", () => {
+ it("stores durable turns in order", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] },
+ { turnId: "t2", usage: { inputTokens: 20, outputTokens: 8 }, steps: [] },
+ ]);
+ expect(s.durableOrder).toEqual(["t1", "t2"]);
+ expect(s.durable.size).toBe(2);
+ });
+
+ it("is idempotent for same turnId", () => {
+ let s = initialMetricsState();
+ const turn = {
+ turnId: "t1",
+ usage: { inputTokens: 10, outputTokens: 5 },
+ steps: [],
+ };
+ s = applyDurableMetrics(s, [turn]);
+ s = applyDurableMetrics(s, [turn]);
+ expect(s.durableOrder).toEqual(["t1"]);
+ expect(s.durable.size).toBe(1);
+ });
+
+ it("overwrites durable turn data for same turnId", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [] },
+ ]);
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 99, outputTokens: 99 }, steps: [] },
+ ]);
+ expect(s.durable.get("t1")?.usage.inputTokens).toBe(99);
+ });
+});
diff --git a/src/core/metrics/reducer.ts b/src/core/metrics/reducer.ts
new file mode 100644
index 0000000..d36dba1
--- /dev/null
+++ b/src/core/metrics/reducer.ts
@@ -0,0 +1,239 @@
+import type { AgentEvent, StepId, StepMetrics, TurnMetrics, Usage } from "@dispatch/wire";
+import type { BuildingStep, LiveTurn, MetricsState, TurnMetricsEntry } from "./types";
+
+function sumStepUsages(steps: readonly BuildingStep[]): Usage {
+ let inputTokens = 0;
+ let outputTokens = 0;
+ let hasCacheRead = false;
+ let hasCacheWrite = false;
+ let cacheReadTokens = 0;
+ let cacheWriteTokens = 0;
+
+ for (const step of steps) {
+ if (step.usage === undefined) continue;
+ inputTokens += step.usage.inputTokens;
+ outputTokens += step.usage.outputTokens;
+ if (step.usage.cacheReadTokens !== undefined && step.usage.cacheReadTokens > 0) {
+ hasCacheRead = true;
+ cacheReadTokens += step.usage.cacheReadTokens;
+ }
+ if (step.usage.cacheWriteTokens !== undefined && step.usage.cacheWriteTokens > 0) {
+ hasCacheWrite = true;
+ cacheWriteTokens += step.usage.cacheWriteTokens;
+ }
+ }
+
+ const base: Usage = { inputTokens, outputTokens };
+ if (hasCacheRead) {
+ (base as { cacheReadTokens?: number }).cacheReadTokens = cacheReadTokens;
+ }
+ if (hasCacheWrite) {
+ (base as { cacheWriteTokens?: number }).cacheWriteTokens = cacheWriteTokens;
+ }
+ return base;
+}
+
+function buildingStepToMetrics(bs: BuildingStep): StepMetrics {
+ const usage: Usage = bs.usage ?? { inputTokens: 0, outputTokens: 0 };
+ const base: StepMetrics = { stepId: bs.stepId as StepId, usage };
+ if (bs.ttftMs !== undefined) {
+ (base as { ttftMs?: number }).ttftMs = bs.ttftMs;
+ }
+ if (bs.decodeMs !== undefined) {
+ (base as { decodeMs?: number }).decodeMs = bs.decodeMs;
+ }
+ if (bs.genTotalMs !== undefined) {
+ (base as { genTotalMs?: number }).genTotalMs = bs.genTotalMs;
+ }
+ return base;
+}
+
+function getStep(lt: LiveTurn, id: string): BuildingStep {
+ const step = lt.stepMap.get(id);
+ if (step === undefined) throw new Error(`Missing step ${id} in live turn`);
+ return step;
+}
+
+function liveTurnToMetrics(lt: LiveTurn): TurnMetrics {
+ const buildingSteps = lt.stepOrder.map((id) => getStep(lt, id));
+ const steps = buildingSteps.map((bs) => buildingStepToMetrics(bs));
+ const usage = lt.doneUsage ?? sumStepUsages(buildingSteps);
+ const base: TurnMetrics = { turnId: lt.turnId, usage, steps };
+ if (lt.durationMs !== undefined) {
+ (base as { durationMs?: number }).durationMs = lt.durationMs;
+ }
+ return base;
+}
+
+function ensureLiveTurn(state: MetricsState, turnId: string): [MetricsState, LiveTurn] {
+ const existing = state.live.get(turnId);
+ if (existing !== undefined) return [state, existing];
+
+ const newTurn: LiveTurn = {
+ turnId,
+ done: false,
+ durationMs: undefined,
+ doneUsage: undefined,
+ stepMap: new Map(),
+ stepOrder: [],
+ };
+ const newLive = new Map(state.live);
+ newLive.set(turnId, newTurn);
+ return [{ ...state, live: newLive, liveOrder: [...state.liveOrder, turnId] }, newTurn];
+}
+
+function upsertStep(lt: LiveTurn, stepId: string, update: Partial<BuildingStep>): LiveTurn {
+ const existing = lt.stepMap.get(stepId);
+ if (existing !== undefined) {
+ const merged: BuildingStep = {
+ stepId,
+ usage: update.usage ?? existing.usage,
+ ttftMs: update.ttftMs ?? existing.ttftMs,
+ decodeMs: update.decodeMs ?? existing.decodeMs,
+ genTotalMs: update.genTotalMs ?? existing.genTotalMs,
+ complete: update.complete ?? existing.complete,
+ };
+ const newMap = new Map(lt.stepMap);
+ newMap.set(stepId, merged);
+ return { ...lt, stepMap: newMap };
+ }
+
+ const fresh: BuildingStep = {
+ stepId,
+ usage: update.usage,
+ ttftMs: update.ttftMs,
+ decodeMs: update.decodeMs,
+ genTotalMs: update.genTotalMs,
+ complete: update.complete ?? false,
+ };
+ const newMap = new Map(lt.stepMap);
+ newMap.set(stepId, fresh);
+ return { ...lt, stepMap: newMap, stepOrder: [...lt.stepOrder, stepId] };
+}
+
+/** The initial empty metrics state. */
+export function initialMetricsState(): MetricsState {
+ return {
+ live: new Map(),
+ liveOrder: [],
+ durable: new Map(),
+ durableOrder: [],
+ };
+}
+
+/**
+ * Fold one live AgentEvent into the metrics state.
+ *
+ * - `usage` with `stepId`: upsert that step's usage.
+ * - `usage` without `stepId`: ignored.
+ * - `step-complete`: upsert that step's timing; default usage to zeros if absent.
+ * - `done`: set turn's `durationMs` and optional aggregate `usage`.
+ * - All other event types: return state unchanged.
+ */
+export function foldMetricsEvent(state: MetricsState, event: AgentEvent): MetricsState {
+ switch (event.type) {
+ case "usage": {
+ if (event.stepId === undefined) return state;
+ const [s1, lt] = ensureLiveTurn(state, event.turnId);
+ const updated = upsertStep(lt, event.stepId, { usage: event.usage });
+ const newLive = new Map(s1.live);
+ newLive.set(event.turnId, updated);
+ return { ...s1, live: newLive };
+ }
+
+ case "step-complete": {
+ const [s1, lt] = ensureLiveTurn(state, event.turnId);
+ const updated = upsertStep(lt, event.stepId, {
+ ttftMs: event.ttftMs,
+ decodeMs: event.decodeMs,
+ genTotalMs: event.genTotalMs,
+ complete: true,
+ });
+ const newLive = new Map(s1.live);
+ newLive.set(event.turnId, updated);
+ return { ...s1, live: newLive };
+ }
+
+ case "done": {
+ const [s1, lt] = ensureLiveTurn(state, event.turnId);
+ const updated: LiveTurn = {
+ ...lt,
+ done: true,
+ durationMs: event.durationMs ?? lt.durationMs,
+ doneUsage: event.usage ?? lt.doneUsage,
+ };
+ const newLive = new Map(s1.live);
+ newLive.set(event.turnId, updated);
+ return { ...s1, live: newLive };
+ }
+
+ default:
+ return state;
+ }
+}
+
+/**
+ * Store durable (sealed) metrics from the backend. These win over live data
+ * for any shared `turnId`.
+ */
+export function applyDurableMetrics(
+ state: MetricsState,
+ turns: readonly TurnMetrics[],
+): MetricsState {
+ const newDurable = new Map(state.durable);
+ const newDurableOrder = [...state.durableOrder];
+ for (const turn of turns) {
+ if (!newDurable.has(turn.turnId)) {
+ newDurableOrder.push(turn.turnId);
+ }
+ newDurable.set(turn.turnId, turn);
+ }
+ return {
+ ...state,
+ durable: newDurable,
+ durableOrder: newDurableOrder,
+ };
+}
+
+/**
+ * Select the merged ordered list of turn metrics entries.
+ * Durable turns come first (in their order), then any live turns whose
+ * `turnId` is not in durable (in live first-seen order).
+ *
+ * Each entry contains the completed steps so far and an optional total
+ * (null until the turn is finalized via `done` or durable data).
+ * Live turns with no completed steps and not done are omitted.
+ */
+export function selectOrderedTurnMetrics(state: MetricsState): readonly TurnMetricsEntry[] {
+ const result: TurnMetricsEntry[] = [];
+ const seen = new Set<string>();
+
+ for (const turnId of state.durableOrder) {
+ const tm = state.durable.get(turnId);
+ if (tm !== undefined) {
+ result.push({ turnId, steps: tm.steps, total: tm });
+ seen.add(turnId);
+ }
+ }
+
+ for (const turnId of state.liveOrder) {
+ if (seen.has(turnId)) continue;
+ const lt = state.live.get(turnId);
+ if (lt === undefined) continue;
+
+ const completeSteps = lt.stepOrder
+ .map((id) => lt.stepMap.get(id))
+ .filter((s): s is BuildingStep => s?.complete === true)
+ .map((s) => buildingStepToMetrics(s));
+
+ if (completeSteps.length === 0 && !lt.done) continue;
+
+ result.push({
+ turnId,
+ steps: completeSteps,
+ total: lt.done ? liveTurnToMetrics(lt) : null,
+ });
+ }
+
+ return result;
+}
diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts
new file mode 100644
index 0000000..2b26e8d
--- /dev/null
+++ b/src/core/metrics/types.ts
@@ -0,0 +1,68 @@
+import type { StepMetrics, TurnMetrics, Usage } from "@dispatch/wire";
+import type { RenderGroup } from "../chunks";
+
+export type { StepMetrics, TurnMetrics };
+
+/** A step being built from live events (may be incomplete). */
+export interface BuildingStep {
+ readonly stepId: string;
+ readonly usage: Usage | undefined;
+ readonly ttftMs: number | undefined;
+ readonly decodeMs: number | undefined;
+ readonly genTotalMs: number | undefined;
+ readonly complete: boolean;
+}
+
+/** A turn being built from live events (in-flight). */
+export interface LiveTurn {
+ readonly turnId: string;
+ readonly done: boolean;
+ readonly durationMs: number | undefined;
+ readonly doneUsage: Usage | undefined;
+ readonly stepMap: ReadonlyMap<string, BuildingStep>;
+ readonly stepOrder: readonly string[];
+}
+
+/**
+ * Reducer state for per-turn / per-step token + timing metrics.
+ *
+ * - `live`: in-flight turns keyed by `turnId` in FIRST-SEEN order.
+ * - `durable`: sealed turns keyed by `turnId` in the order they arrived.
+ */
+export interface MetricsState {
+ readonly live: ReadonlyMap<string, LiveTurn>;
+ readonly liveOrder: readonly string[];
+ readonly durable: ReadonlyMap<string, TurnMetrics>;
+ readonly durableOrder: readonly string[];
+}
+
+/** Per-turn placement entry: completed steps so far + optional turn total. */
+export interface TurnMetricsEntry {
+ readonly turnId: string;
+ readonly steps: readonly StepMetrics[];
+ readonly total: TurnMetrics | null;
+}
+
+/** A row in the interleaved transcript: a render group, per-step metrics, or turn metrics. */
+export type MetricsRow =
+ | { readonly kind: "group"; readonly group: RenderGroup }
+ | { readonly kind: "step-metrics"; readonly step: StepMetrics; readonly index: number }
+ | { readonly kind: "turn-metrics"; readonly turn: TurnMetrics };
+
+/** Formatted per-step view for display. */
+export interface StepMetricsView {
+ readonly label: string;
+ readonly tokensLabel: string;
+ readonly tps: string | null;
+ readonly ttft: string | null;
+ readonly decode: string | null;
+ readonly genTotal: string | null;
+}
+
+/** Formatted per-turn view for display. */
+export interface TurnMetricsView {
+ readonly tokensLabel: string;
+ readonly breakdown: string;
+ readonly tps: string | null;
+ readonly duration: string | null;
+}
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 4f2091a..ae3e1f8 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -1,6 +1,7 @@
export type { RenderedChunk, RenderGroup, ToolBatchEntry } from "../../core/chunks";
export { groupRenderedChunks } from "../../core/chunks";
-export type { ChatTransport, HistorySync } from "./ports";
+export type { TurnMetricsEntry } from "../../core/metrics";
+export type { ChatTransport, HistorySync, MetricsSync } from "./ports";
export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
export { createChatStore } from "./store.svelte";
export { default as ChatView } from "./ui/ChatView.svelte";
diff --git a/src/features/chat/ports.ts b/src/features/chat/ports.ts
index 07943c7..e28ebf6 100644
--- a/src/features/chat/ports.ts
+++ b/src/features/chat/ports.ts
@@ -1,4 +1,8 @@
-import type { ChatSendMessage, ConversationHistoryResponse } from "@dispatch/transport-contract";
+import type {
+ ChatSendMessage,
+ ConversationHistoryResponse,
+ ConversationMetricsResponse,
+} from "@dispatch/transport-contract";
/** Injected transport port — sends chat messages to the server. */
export interface ChatTransport {
@@ -10,3 +14,6 @@ export type HistorySync = (
conversationId: string,
sinceSeq: number,
) => Promise<ConversationHistoryResponse>;
+
+/** Injected metrics-sync port — fetches persisted per-turn metrics from the server. */
+export type MetricsSync = (conversationId: string) => Promise<ConversationMetricsResponse>;
diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts
index 1d8ab17..f4ad07b 100644
--- a/src/features/chat/store.svelte.ts
+++ b/src/features/chat/store.svelte.ts
@@ -13,20 +13,29 @@ import {
selectChunks,
selectMessages,
} from "../../core/chunks";
+import type { MetricsState, TurnMetricsEntry } from "../../core/metrics";
+import {
+ applyDurableMetrics,
+ foldMetricsEvent,
+ initialMetricsState,
+ selectOrderedTurnMetrics,
+} from "../../core/metrics";
import type { ConversationCache } from "../conversation-cache";
-import type { ChatTransport, HistorySync } from "./ports";
+import type { ChatTransport, HistorySync, MetricsSync } from "./ports";
export interface ChatStoreDependencies {
readonly conversationId: string;
readonly model?: string;
readonly transport: ChatTransport;
readonly historySync: HistorySync;
+ readonly metricsSync: MetricsSync;
readonly cache: ConversationCache;
}
export interface ChatStore {
readonly messages: readonly ChatMessage[];
readonly chunks: readonly RenderedChunk[];
+ readonly turnMetrics: readonly TurnMetricsEntry[];
readonly pendingSync: boolean;
readonly error: string | null;
readonly model: string | undefined;
@@ -39,6 +48,7 @@ export interface ChatStore {
export function createChatStore(deps: ChatStoreDependencies): ChatStore {
let transcript = $state<TranscriptState>(initialState());
+ let metrics = $state<MetricsState>(initialMetricsState());
let _pendingSync = $state(false);
let _error = $state<string | null>(null);
let _model = $state<string | undefined>(deps.model);
@@ -60,6 +70,17 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
}
}
+ async function syncMetrics(): Promise<void> {
+ if (disposed) return;
+ try {
+ const res = await deps.metricsSync(deps.conversationId);
+ metrics = applyDurableMetrics(metrics, res.turns);
+ } catch {
+ // Metrics fetch failure must not block history sync or throw;
+ // live-folded metrics remain intact.
+ }
+ }
+
return {
get messages(): readonly ChatMessage[] {
return selectMessages(transcript);
@@ -67,6 +88,9 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
get chunks(): readonly RenderedChunk[] {
return selectChunks(transcript);
},
+ get turnMetrics(): readonly TurnMetricsEntry[] {
+ return selectOrderedTurnMetrics(metrics);
+ },
get pendingSync(): boolean {
return _pendingSync;
},
@@ -89,8 +113,10 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
return;
}
transcript = foldEvent(transcript, msg.event);
+ metrics = foldMetricsEvent(metrics, msg.event);
if (transcript.sealedTurnId !== null) {
void syncTail();
+ void syncMetrics();
}
},
@@ -115,6 +141,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
transcript = applyHistory(transcript, cached);
}
await syncTail();
+ await syncMetrics();
},
dispose(): void {
diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts
index 71781ac..1c99e7c 100644
--- a/src/features/chat/store.test.ts
+++ b/src/features/chat/store.test.ts
@@ -1,7 +1,12 @@
import type { AgentEvent, StepId, StoredChunk } from "@dispatch/wire";
import { describe, expect, it, vi } from "vitest";
import { createChatStore } from "./store.svelte";
-import { createFakeCache, createFakeHistorySync, createFakeTransport } from "./test-helpers";
+import {
+ createFakeCache,
+ createFakeHistorySync,
+ createFakeMetricsSync,
+ createFakeTransport,
+} from "./test-helpers";
const CONV_ID = "test-conv-1";
@@ -21,11 +26,13 @@ describe("createChatStore", () => {
it("folding a chat.delta updates messages", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -51,11 +58,13 @@ describe("createChatStore", () => {
it("turn-sealed triggers a history sync, commits to cache, and applies merged history", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -92,11 +101,13 @@ describe("createChatStore", () => {
it("send posts a chat.send with conversationId", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -114,12 +125,14 @@ describe("createChatStore", () => {
it("send posts a chat.send with model when set", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
model: "openai/gpt-4",
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -134,11 +147,13 @@ describe("createChatStore", () => {
it("chat.error sets error", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -154,6 +169,7 @@ describe("createChatStore", () => {
it("load hydrates from cache then syncs the tail", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
// Pre-populate cache
@@ -166,6 +182,7 @@ describe("createChatStore", () => {
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -184,6 +201,7 @@ describe("createChatStore", () => {
it("load with empty cache still syncs", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
historySync.returnChunks = [makeStoredChunk(1, "assistant")];
@@ -192,6 +210,7 @@ describe("createChatStore", () => {
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -206,11 +225,13 @@ describe("createChatStore", () => {
it("error is cleared on successful sync", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -236,11 +257,13 @@ describe("createChatStore", () => {
it("dispose prevents further syncs", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -262,6 +285,7 @@ describe("createChatStore", () => {
it("overlapping syncs are guarded", async () => {
const transport = createFakeTransport();
const _historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
// Make the first sync slow
@@ -283,6 +307,7 @@ describe("createChatStore", () => {
conversationId: CONV_ID,
transport: transport.impl,
historySync: slowHistorySync,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -310,11 +335,13 @@ describe("createChatStore", () => {
it("handles tool-call and tool-result chunks", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -353,12 +380,14 @@ describe("createChatStore", () => {
it("setModel changes the model used by the next send", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
model: "openai/gpt-4",
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -375,11 +404,13 @@ describe("createChatStore", () => {
it("setModel from undefined to a model", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -396,11 +427,13 @@ describe("createChatStore", () => {
it("handleDelta ignores a chat.delta for a different conversationId", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -424,11 +457,13 @@ describe("createChatStore", () => {
it("handleDelta ignores a chat.error for a different conversationId", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -442,11 +477,13 @@ describe("createChatStore", () => {
it("send optimistically shows the user message immediately", () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -464,11 +501,13 @@ describe("createChatStore", () => {
it("the optimistic user message is replaced after turn-sealed + history sync", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
const cache = createFakeCache();
const store = createChatStore({
conversationId: CONV_ID,
transport: transport.impl,
historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
cache: cache.impl,
});
@@ -496,4 +535,271 @@ describe("createChatStore", () => {
store.dispose();
});
+
+ it("folding usage/step-complete/done deltas exposes turnMetrics", () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ });
+
+ expect(store.turnMetrics).toHaveLength(0);
+
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(
+ deltaEvent({
+ type: "usage",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+ store.handleDelta(
+ deltaEvent({
+ type: "step-complete",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ ttftMs: 200,
+ genTotalMs: 800,
+ }),
+ );
+ store.handleDelta(
+ deltaEvent({
+ type: "done",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ reason: "end-turn",
+ durationMs: 1200,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+
+ expect(store.turnMetrics).toHaveLength(1);
+ const entry = store.turnMetrics[0];
+ expect(entry?.turnId).toBe("t1");
+ expect(entry?.steps).toHaveLength(1);
+ expect(entry?.steps[0]?.stepId).toBe("t1#0" as StepId);
+ expect(entry?.steps[0]?.usage.inputTokens).toBe(100);
+ expect(entry?.steps[0]?.genTotalMs).toBe(800);
+ expect(entry?.total).not.toBeNull();
+ expect(entry?.total?.usage.inputTokens).toBe(100);
+ expect(entry?.total?.usage.outputTokens).toBe(50);
+ expect(entry?.total?.durationMs).toBe(1200);
+
+ store.dispose();
+ });
+
+ it("turnMetrics entry has total: null before done (progressive turn)", () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ });
+
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(
+ deltaEvent({
+ type: "usage",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+ store.handleDelta(
+ deltaEvent({
+ type: "step-complete",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ ttftMs: 200,
+ genTotalMs: 800,
+ }),
+ );
+
+ expect(store.turnMetrics).toHaveLength(1);
+ const entry = store.turnMetrics[0];
+ expect(entry?.turnId).toBe("t1");
+ expect(entry?.steps).toHaveLength(1);
+ expect(entry?.steps[0]?.stepId).toBe("t1#0" as StepId);
+ expect(entry?.total).toBeNull();
+
+ store.dispose();
+ });
+
+ it("metricsSync durable result overrides live by turnId", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ });
+
+ // Live fold gives some metrics
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(
+ deltaEvent({
+ type: "usage",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+ store.handleDelta(
+ deltaEvent({
+ type: "done",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ reason: "end-turn",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+
+ expect(store.turnMetrics).toHaveLength(1);
+ expect(store.turnMetrics[0]?.total?.usage.outputTokens).toBe(50);
+
+ // Durable sync returns different numbers for the same turnId
+ metricsSync.returnTurns = [
+ {
+ turnId: "t1",
+ usage: { inputTokens: 200, outputTokens: 80 },
+ durationMs: 500,
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 200, outputTokens: 80 },
+ genTotalMs: 400,
+ },
+ ],
+ },
+ ];
+
+ // Trigger metrics sync via turn-sealed
+ historySync.returnChunks = [];
+ store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" }));
+
+ await vi.waitFor(() => {
+ expect(metricsSync.calls).toHaveLength(1);
+ });
+
+ // Durable should now override live (syncMetrics is async, wait for it)
+ await vi.waitFor(() => {
+ expect(store.turnMetrics[0]?.total?.usage.outputTokens).toBe(80);
+ });
+
+ expect(store.turnMetrics).toHaveLength(1);
+ expect(store.turnMetrics[0]?.total?.durationMs).toBe(500);
+
+ store.dispose();
+ });
+
+ it("rejected metricsSync leaves live metrics intact and does not throw", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ });
+
+ // Live fold some metrics
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(
+ deltaEvent({
+ type: "usage",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+ store.handleDelta(
+ deltaEvent({
+ type: "done",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ reason: "end-turn",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ }),
+ );
+
+ expect(store.turnMetrics).toHaveLength(1);
+
+ // Make the metrics sync reject
+ metricsSync.nextError = "metrics endpoint unavailable";
+
+ historySync.returnChunks = [];
+ store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" }));
+
+ await vi.waitFor(() => {
+ expect(metricsSync.calls).toHaveLength(1);
+ });
+
+ // Live metrics should still be intact
+ expect(store.turnMetrics).toHaveLength(1);
+ expect(store.turnMetrics[0]?.total?.usage.outputTokens).toBe(50);
+
+ // No error should have been thrown to the store
+ expect(store.error).toBeNull();
+
+ store.dispose();
+ });
+
+ it("load calls metricsSync after history sync", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+
+ metricsSync.returnTurns = [
+ {
+ turnId: "t1",
+ usage: { inputTokens: 300, outputTokens: 100 },
+ durationMs: 900,
+ steps: [],
+ },
+ ];
+
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ });
+
+ await store.load();
+
+ expect(historySync.calls).toHaveLength(1);
+ expect(metricsSync.calls).toHaveLength(1);
+ expect(metricsSync.calls[0]).toBe(CONV_ID);
+ expect(store.turnMetrics).toHaveLength(1);
+ expect(store.turnMetrics[0]?.total?.usage.inputTokens).toBe(300);
+
+ store.dispose();
+ });
});
diff --git a/src/features/chat/test-helpers.ts b/src/features/chat/test-helpers.ts
index d37b59e..07dad26 100644
--- a/src/features/chat/test-helpers.ts
+++ b/src/features/chat/test-helpers.ts
@@ -1,6 +1,6 @@
import type { StoredChunk } from "@dispatch/wire";
import type { ConversationCache } from "../conversation-cache";
-import type { ChatTransport, HistorySync } from "./ports";
+import type { ChatTransport, HistorySync, MetricsSync } from "./ports";
export interface FakeTransport {
readonly sent: import("@dispatch/transport-contract").ChatSendMessage[];
@@ -46,6 +46,44 @@ export function createFakeHistorySync(): FakeHistorySync {
};
}
+export interface FakeMetricsSync {
+ readonly calls: string[];
+ returnTurns: import("@dispatch/wire").TurnMetrics[];
+ /** If set, the next call will reject with this error. */
+ nextError: string | undefined;
+ readonly impl: MetricsSync;
+}
+
+export function createFakeMetricsSync(): FakeMetricsSync {
+ const calls: string[] = [];
+ let returnTurns: import("@dispatch/wire").TurnMetrics[] = [];
+ let nextError: string | undefined;
+ return {
+ calls,
+ get returnTurns() {
+ return returnTurns;
+ },
+ set returnTurns(v: import("@dispatch/wire").TurnMetrics[]) {
+ returnTurns = v;
+ },
+ get nextError() {
+ return nextError;
+ },
+ set nextError(v: string | undefined) {
+ nextError = v;
+ },
+ impl: async (conversationId) => {
+ calls.push(conversationId);
+ if (nextError !== undefined) {
+ const err = nextError;
+ nextError = undefined;
+ throw new Error(err);
+ }
+ return { turns: returnTurns };
+ },
+ };
+}
+
export interface FakeCache {
readonly store: Map<string, StoredChunk[]>;
readonly impl: ConversationCache;
diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts
index b31cbf1..4abf717 100644
--- a/src/features/chat/ui.test.ts
+++ b/src/features/chat/ui.test.ts
@@ -3,6 +3,7 @@ import { render, screen } from "@testing-library/svelte";
import userEvent from "@testing-library/user-event";
import { describe, expect, it, vi } from "vitest";
import type { RenderedChunk } from "../../core/chunks";
+import type { TurnMetricsEntry } from "../../core/metrics";
import ChatView from "./ui/ChatView.svelte";
import Composer from "./ui/Composer.svelte";
import ModelSelector from "./ui/ModelSelector.svelte";
@@ -278,6 +279,224 @@ describe("ChatView", () => {
expect(screen.getByRole("checkbox", { name: "Toggle thoughts" })).toBeChecked();
expect(container).toHaveTextContent("hmm, all done");
});
+
+ it("renders step and turn metrics as separate rows", () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 1, role: "user", chunk: { type: "text", text: "Hi" }, provisional: false },
+ {
+ seq: 2,
+ role: "assistant",
+ chunk: { type: "text", text: "Hello!" },
+ provisional: false,
+ },
+ ];
+
+ const turnMetrics: TurnMetricsEntry[] = [
+ {
+ turnId: "t1",
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ genTotalMs: 800,
+ },
+ ],
+ total: {
+ turnId: "t1",
+ usage: { inputTokens: 100, outputTokens: 50 },
+ durationMs: 1200,
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ genTotalMs: 800,
+ },
+ ],
+ },
+ },
+ ];
+
+ render(ChatView, { props: { chunks, turnMetrics } });
+
+ expect(screen.getByText("Hi")).toBeInTheDocument();
+ expect(screen.getByText("Hello!")).toBeInTheDocument();
+ expect(screen.getByText(/step 1/)).toBeInTheDocument();
+ expect(screen.getAllByText(/150 tok/)).toHaveLength(2);
+ expect(screen.getByText(/turn · 150 tok \(100 in \/ 50 out\)/)).toBeInTheDocument();
+ expect(screen.getByText(/1\.2s/)).toBeInTheDocument();
+ });
+
+ it("renders step-metrics inline after tool group", () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 1, role: "user", chunk: { type: "text", text: "Run it" }, provisional: false },
+ {
+ seq: 2,
+ role: "assistant",
+ chunk: {
+ type: "tool-call",
+ toolCallId: "tc1",
+ toolName: "bash",
+ input: { command: "ls" },
+ stepId: "t1#0" as StepId,
+ },
+ provisional: false,
+ },
+ {
+ seq: 3,
+ role: "tool",
+ chunk: {
+ type: "tool-result",
+ toolCallId: "tc1",
+ toolName: "bash",
+ content: "file.txt",
+ isError: false,
+ stepId: "t1#0" as StepId,
+ },
+ provisional: false,
+ },
+ {
+ seq: 4,
+ role: "assistant",
+ chunk: { type: "text", text: "Done!" },
+ provisional: false,
+ },
+ ];
+
+ const turnMetrics: TurnMetricsEntry[] = [
+ {
+ turnId: "t1",
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 80, outputTokens: 20 },
+ genTotalMs: 300,
+ },
+ ],
+ total: {
+ turnId: "t1",
+ usage: { inputTokens: 80, outputTokens: 20 },
+ durationMs: 500,
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 80, outputTokens: 20 },
+ genTotalMs: 300,
+ },
+ ],
+ },
+ },
+ ];
+
+ render(ChatView, { props: { chunks, turnMetrics } });
+
+ // Both step-metrics and turn-metrics render
+ expect(screen.getByText(/step 1/)).toBeInTheDocument();
+ expect(screen.getByText(/turn · 100 tok/)).toBeInTheDocument();
+
+ // They are in separate elements (different rows)
+ const stepEl = screen.getByText(/step 1 · 100 tok/).closest("div");
+ const turnEl = screen.getByText(/turn · 100 tok/).closest("div");
+ expect(stepEl).not.toBe(turnEl);
+ });
+
+ it("renders no metrics bubble when turnMetrics is empty", () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 1, role: "user", chunk: { type: "text", text: "Hi" }, provisional: false },
+ {
+ seq: 2,
+ role: "assistant",
+ chunk: { type: "text", text: "Hello!" },
+ provisional: false,
+ },
+ ];
+
+ render(ChatView, { props: { chunks, turnMetrics: [] } });
+
+ expect(screen.getByText("Hi")).toBeInTheDocument();
+ expect(screen.getByText("Hello!")).toBeInTheDocument();
+ expect(screen.queryByText(/step 1/)).toBeNull();
+ expect(screen.queryByText(/^turn/)).toBeNull();
+ });
+
+ it("omits null view values from metrics bubbles", () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 1, role: "user", chunk: { type: "text", text: "Test" }, provisional: false },
+ {
+ seq: 2,
+ role: "assistant",
+ chunk: { type: "text", text: "Response" },
+ provisional: false,
+ },
+ ];
+
+ const turnMetrics: TurnMetricsEntry[] = [
+ {
+ turnId: "t1",
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 10, outputTokens: 5 },
+ },
+ ],
+ total: {
+ turnId: "t1",
+ usage: { inputTokens: 10, outputTokens: 5 },
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 10, outputTokens: 5 },
+ },
+ ],
+ },
+ },
+ ];
+
+ render(ChatView, { props: { chunks, turnMetrics } });
+
+ // Step metrics rendered
+ expect(screen.getByText(/step 1/)).toBeInTheDocument();
+ expect(screen.getAllByText(/15 tok/)).toHaveLength(2);
+ // Turn metrics rendered
+ expect(screen.getByText(/turn · 15 tok \(10 in \/ 5 out\)/)).toBeInTheDocument();
+ // No "null" or "undefined" in the DOM
+ expect(screen.queryByText("null")).toBeNull();
+ expect(screen.queryByText("undefined")).toBeNull();
+ });
+
+ it("renders step text but no turn total for a progressive turn (total: null)", () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 1, role: "user", chunk: { type: "text", text: "Hi" }, provisional: false },
+ {
+ seq: 2,
+ role: "assistant",
+ chunk: { type: "text", text: "Hello!" },
+ provisional: false,
+ },
+ ];
+
+ const turnMetrics: TurnMetricsEntry[] = [
+ {
+ turnId: "t1",
+ steps: [
+ {
+ stepId: "t1#0" as StepId,
+ usage: { inputTokens: 100, outputTokens: 50 },
+ genTotalMs: 800,
+ },
+ ],
+ total: null,
+ },
+ ];
+
+ render(ChatView, { props: { chunks, turnMetrics } });
+
+ // Step metrics should render
+ expect(screen.getByText(/step 1/)).toBeInTheDocument();
+ expect(screen.getByText(/150 tok/)).toBeInTheDocument();
+
+ // Turn total should NOT render (total is null — turn still in progress)
+ expect(screen.queryByText(/^turn/)).toBeNull();
+ });
});
describe("Composer", () => {
diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte
index 3a078fb..ba6e961 100644
--- a/src/features/chat/ui/ChatView.svelte
+++ b/src/features/chat/ui/ChatView.svelte
@@ -1,17 +1,33 @@
<script lang="ts">
import { groupRenderedChunks, type RenderedChunk } from "../index";
+ import { interleaveTurnMetrics, viewStepMetrics, viewTurnMetrics, type TurnMetricsEntry } from "../../../core/metrics";
- let { chunks }: { chunks: readonly RenderedChunk[] } = $props();
+ let {
+ chunks,
+ turnMetrics = [],
+ }: {
+ chunks: readonly RenderedChunk[];
+ turnMetrics?: readonly TurnMetricsEntry[];
+ } = $props();
const groups = $derived(groupRenderedChunks(chunks));
+ const rows = $derived(interleaveTurnMetrics(groups, turnMetrics));
+
// Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that
// survives the provisional→committed (seq null → seq N) transition, so the
// collapse's open/close state is NOT lost when a turn seals. (App isolates
// these keys per conversation via {#key}.)
- const rows = $derived.by(() => {
+ const keyedRows = $derived.by(() => {
let thinking = 0;
- return groups.map((group, i) => {
+ return rows.map((row, i) => {
+ if (row.kind === "step-metrics") {
+ return { row, key: `s${row.step.stepId}` };
+ }
+ if (row.kind === "turn-metrics") {
+ return { row, key: `m${row.turn.turnId}` };
+ }
+ const group = row.group;
let key: string;
if (group.kind === "tool-batch") {
key = `b${group.stepId}`;
@@ -22,7 +38,7 @@
} else {
key = `p${i}`;
}
- return { group, key };
+ return { row, key };
});
});
</script>
@@ -102,9 +118,31 @@
{/snippet}
<div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite">
- {#each rows as { group, key } (key)}
- {#if group.kind === "single"}
- {@render chunkRow(group.chunk)}
+ {#each keyedRows as { row, key } (key)}
+ {#if row.kind === "step-metrics"}
+ {@const sv = viewStepMetrics(row.step, row.index)}
+ <div class="chat chat-start">
+ <div class="chat-bubble w-full max-w-5xl bg-transparent p-0">
+ <div class="text-xs opacity-70">
+ {sv.label} · {sv.tokensLabel}
+ {#if sv.tps} · {sv.tps}{/if}
+ {#if sv.genTotal} · {sv.genTotal}{/if}
+ </div>
+ </div>
+ </div>
+ {:else if row.kind === "turn-metrics"}
+ {@const turnView = viewTurnMetrics(row.turn)}
+ <div class="chat chat-start">
+ <div class="chat-bubble w-full max-w-5xl bg-transparent p-0">
+ <div class="text-xs opacity-70">
+ turn · {turnView.tokensLabel} ({turnView.breakdown})
+ {#if turnView.tps} · {turnView.tps}{/if}
+ {#if turnView.duration} · {turnView.duration}{/if}
+ </div>
+ </div>
+ </div>
+ {:else if row.group.kind === "single"}
+ {@render chunkRow(row.group.chunk)}
{:else}
<!-- Batched tool calls (one step): a single bubble holding a DaisyUI list,
one row per call paired with its result. Same chat-start grid shim as
@@ -112,7 +150,7 @@
<div class="chat chat-start [&>.chat-bubble]:max-w-full [&>.chat-bubble]:p-0">
<div class="chat-bubble bg-transparent">
<ul class="list w-fit max-w-full rounded-box bg-base-200 text-sm">
- {#each group.entries as entry (entry.call.toolCallId)}
+ {#each row.group.entries as entry (entry.call.toolCallId)}
<li class="list-row">
<div>
<strong>{entry.call.toolName}</strong>