From 80f8a219c89a963c485da0f40dc428bf688fedb7 Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Sun, 7 Jun 2026 18:52:13 +0900 Subject: Revert "feat(chat): live turn metrics — telemetry reducer + rendering" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 48c6d85c3cc5a57a729f14068e2346b17ed62088. --- .dispatch/transport-contract.reference.md | 14 +- .dispatch/wire.reference.md | 113 ++++++-------- scripts/live-probe.ts | 77 +-------- src/app/App.svelte | 12 +- src/core/chunks/reducer.ts | 4 - src/core/telemetry/index.ts | 14 -- src/core/telemetry/reducer.test.ts | 252 ------------------------------ src/core/telemetry/reducer.ts | 122 --------------- src/core/telemetry/selectors.ts | 95 ----------- src/core/telemetry/types.ts | 35 ----- src/core/wire/conformance.test.ts | 14 +- src/core/wire/conformance.ts | 2 - src/features/chat/index.ts | 2 - src/features/chat/store.svelte.ts | 12 -- src/features/chat/store.test.ts | 46 ------ src/features/chat/ui.test.ts | 150 ++---------------- src/features/chat/ui/ChatView.svelte | 93 +++-------- src/features/chat/ui/TurnSummary.svelte | 75 --------- 18 files changed, 98 insertions(+), 1034 deletions(-) delete mode 100644 src/core/telemetry/index.ts delete mode 100644 src/core/telemetry/reducer.test.ts delete mode 100644 src/core/telemetry/reducer.ts delete mode 100644 src/core/telemetry/selectors.ts delete mode 100644 src/core/telemetry/types.ts delete mode 100644 src/features/chat/ui/TurnSummary.svelte diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md index ef0235a..fcc2cbf 100644 --- a/.dispatch/transport-contract.reference.md +++ b/.dispatch/transport-contract.reference.md @@ -5,15 +5,15 @@ > hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally — > this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `transport-contract@0.3.0`. Regenerate whenever it changes. -> Depends on `@dispatch/wire@0.3.0` (see `wire.reference.md`) + `@dispatch/ui-contract` +> **Orchestrator:** SNAPSHOT of `transport-contract@0.2.0`. Regenerate whenever it changes. +> Depends on `@dispatch/wire@0.2.0` (see `wire.reference.md`) + `@dispatch/ui-contract` > (see `ui-contract.reference.md`). > -> **0.3.0 change (live metrics):** no shape change HERE — this contract's own types are identical. -> It re-exports the bumped `@dispatch/wire`, whose `AgentEvent` union gained a `step-complete` -> variant and timing fields on `usage`/`tool-result`/`done`. So the `chat.delta` events you stream -> over WS now also carry the live metrics. See `frontend-metrics-handoff.md` for the full guide. -> (0.2.0: tool-call `stepId` grouping.) +> **0.2.0 change (step grouping):** no shape change HERE — this contract's own types are +> identical. It only re-exports the bumped `@dispatch/wire`, whose `AgentEvent` tool variants +> now carry a required `stepId` and whose tool `Chunk`s carry an optional `stepId`. The +> `chat.delta` events streamed over WS and the `ConversationHistoryResponse.chunks` you already +> consume therefore now carry the step grouping key (see `wire.reference.md`). ## Endpoints (backend, confirmed live — CORS wildcard `*`, HTTP port 24203, WS port 24205) diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md index 7814bc3..ed95351 100644 --- a/.dispatch/wire.reference.md +++ b/.dispatch/wire.reference.md @@ -4,14 +4,13 @@ > types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission > prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `wire@0.3.0`. Regenerate whenever `@dispatch/wire` changes. +> **Orchestrator:** SNAPSHOT of `wire@0.2.0`. Regenerate whenever `@dispatch/wire` changes. > -> **0.3.0 change (live metrics — see `frontend-metrics-handoff.md` for the full guide):** new -> `TurnStepCompleteEvent` (`type:"step-complete"`) in the `AgentEvent` union with per-step -> `ttftMs?`/`decodeMs?`/`genTotalMs?`; `TurnUsageEvent` gained `stepId?`; `TurnToolResultEvent` -> gained `durationMs?` (tool exec time); `TurnDoneEvent` gained `durationMs?` (turn wall-clock) + -> `usage?` (turn total). All additive/optional — existing handling is unaffected. (0.2.0 added -> `stepId` for tool-call grouping.) +> **0.2.0 change (step grouping):** `ToolCallChunk`/`ToolResultChunk` gained an OPTIONAL +> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` gained a REQUIRED `stepId: StepId`. +> A `StepId` is the per-step grouping key for batched/parallel tool calls — group by equality. +> Live: read `event.stepId`. Replay: read `storedChunk.chunk.stepId` (NOT the envelope; absent on +> pre-0.2.0 rows / non-tool chunks — tolerate absence). `StoredChunk` envelope is UNCHANGED. ```ts /** @@ -76,7 +75,17 @@ export interface ToolCallChunk { readonly toolCallId: string; readonly toolName: string; readonly input: unknown; - /** Step grouping key (generation provenance). Optional — tolerate absence. */ + /** + * The step that produced this call — generation provenance stamped by the + * runtime when the model emits the call (NOT storage metadata like `seq`, + * which is why it lives on the chunk and travels with it through persistence + * and replay). Tool calls a model batches together in one step share the same + * `stepId`: the grouping key for rendering a parallel batch as one unit, and + * equal to the `stepId` on the matching `tool-call` AgentEvent. Optional: + * absent on chunks reconstructed outside a turn and on rows persisted before + * this field existed, so a consumer must tolerate its absence (render + * ungrouped). + */ readonly stepId?: StepId; } @@ -91,7 +100,14 @@ export interface ToolResultChunk { readonly toolName: string; readonly content: string; readonly isError: boolean; - /** Step grouping key — equals the originating call's. Optional. */ + /** + * The step that produced the originating call — equal to the `stepId` on the + * matching `tool-call` chunk (same `toolCallId`) and on the `tool-result` + * AgentEvent, so a consumer groups a step's calls with their results. + * Generation provenance, not storage metadata (see `ToolCallChunk.stepId`). + * Optional for the same reasons; `reconcile` copies it from the originating + * call onto a synthesized (interrupted) result. + */ readonly stepId?: StepId; } @@ -122,10 +138,16 @@ export interface ChatMessage { } /** - * A persisted chunk plus its sync metadata: `{ seq, role, chunk }`. `seq` is the - * per-conversation sync cursor (envelope); a tool chunk's `stepId` rides on - * `chunk` (generation provenance). NOTE: usage/timing metrics are NOT persisted — - * they exist only on the live stream (see `frontend-metrics-handoff.md`). + * A persisted chunk plus its sync metadata. The append-only conversation log + * stamps every chunk with a monotonic, gap-free, per-conversation `seq` (the + * sync cursor, assigned in append order) and records the `role` of the message + * it belongs to. This makes a flat seq-ordered stream both incrementally + * syncable ("give me chunks after seq N") and regroupable into messages by the + * client. `chunk` is the content unit — `Chunk` carries no storage/sync cursor + * (`seq` lives here on the envelope, not on the chunk, since it is assigned by + * the store and the provider has no use for it). A chunk MAY still carry + * generation provenance assigned at production time (e.g. a tool chunk's + * `stepId`), which is intrinsic to the content and so travels with it. */ export interface StoredChunk { readonly seq: number; @@ -161,7 +183,6 @@ export type AgentEvent = | TurnToolResultEvent | TurnToolOutputEvent | TurnUsageEvent - | TurnStepCompleteEvent | TurnErrorEvent | TurnDoneEvent | TurnSealedEvent; @@ -201,7 +222,13 @@ export interface TurnToolCallEvent { readonly type: "tool-call"; readonly conversationId: string; readonly turnId: string; - /** Step grouping key (matches the tool-result event + persisted chunk). */ + /** + * The step that produced this call. Tool calls a model batches together in + * one step share the same `stepId` — the grouping key for rendering a + * parallel batch as one unit. Matches the `stepId` on the matching + * `tool-result` event and on the persisted tool chunk + * (`StoredChunk.chunk.stepId`). + */ readonly stepId: StepId; readonly toolCallId: string; readonly toolName: string; @@ -213,18 +240,17 @@ export interface TurnToolResultEvent { readonly type: "tool-result"; readonly conversationId: string; readonly turnId: string; - /** Step grouping key — equals the matching tool-call's. */ + /** + * The step that produced the originating call. Equal to the `stepId` on the + * matching `tool-call` event (same `toolCallId`) and on the persisted tool + * chunk (`StoredChunk.chunk.stepId`), so a client groups a step's calls with + * their results. + */ readonly stepId: StepId; readonly toolCallId: string; readonly toolName: string; readonly content: string; readonly isError: boolean; - /** - * How long the tool took to execute (dispatch → result), in milliseconds — - * the backend's authoritative execution time, distinct from any client-side - * wall-clock. Optional: present only when the runtime was given a clock. - */ - readonly durationMs?: number; } /** Streaming output from a tool execution (e.g. shell stdout/stderr). */ @@ -242,42 +268,9 @@ export interface TurnUsageEvent { readonly type: "usage"; readonly conversationId: string; readonly turnId: string; - /** - * The step this usage report belongs to, so a consumer can attribute tokens - * per step (and join with the matching `step-complete` timing by `stepId`). - * Optional: absent when the runtime had no step context. - */ - readonly stepId?: StepId; readonly usage: Usage; } -/** - * A step (one LLM round-trip) has completed — the authoritative per-step metrics - * packet, emitted once at the step's end (after the generation stream finishes), - * so its timing is final (unlike `usage`, which may arrive mid-stream). Carries - * the step's generation timing; join to the step's tokens via `stepId` on the - * `usage` event. All timing fields are optional: present only when the runtime - * was given a clock, and `ttftMs`/`decodeMs` additionally require that a first - * content token (text or reasoning) was observed this step. - */ -export interface TurnStepCompleteEvent { - readonly type: "step-complete"; - readonly conversationId: string; - readonly turnId: string; - readonly stepId: StepId; - /** Time to first token: stream start → first text/reasoning delta. */ - readonly ttftMs?: number; - /** Decode time: first token → stream end (generation total − TTFT). */ - readonly decodeMs?: number; - /** - * Total generation time for the step: stream start → stream end. Present - * whenever a clock was available, even if no first token was seen (then - * `ttftMs`/`decodeMs` are absent). When a first token was seen, - * `genTotalMs === ttftMs + decodeMs`. - */ - readonly genTotalMs?: number; -} - /** An error occurred during the turn. */ export interface TurnErrorEvent { readonly type: "error"; @@ -293,16 +286,6 @@ export interface TurnDoneEvent { readonly conversationId: string; readonly turnId: string; readonly reason: string; - /** - * Total wall-clock duration of the turn (turn start → turn end), in - * milliseconds. Optional: present only when the runtime was given a clock. - */ - readonly durationMs?: number; - /** - * Aggregate token usage across all steps in the turn — a convenience total so - * a consumer need not sum the per-step `usage` events. Optional. - */ - readonly usage?: Usage; } /** diff --git a/scripts/live-probe.ts b/scripts/live-probe.ts index f38c907..2c4dfb9 100644 --- a/scripts/live-probe.ts +++ b/scripts/live-probe.ts @@ -43,13 +43,6 @@ import { selectMessages, type TranscriptState, } from "../src/core/chunks/index.ts"; -import { - foldMetricEvent, - stepMetrics, - type TelemetryState, - initialState as telemetryInitialState, - turnMetrics, -} from "../src/core/telemetry/index.ts"; import { createConversationCache } from "../src/features/conversation-cache/index.ts"; const WS_URL = process.env.PROBE_WS ?? "ws://localhost:24205"; @@ -94,15 +87,8 @@ async function runTurn( socket: Socket, conversationId: string, prompt: string, -): Promise<{ - state: TranscriptState; - telemetry: TelemetryState; - deltas: number; - sealed: boolean; - error: string | null; -}> { +): Promise<{ state: TranscriptState; deltas: number; sealed: boolean; error: string | null }> { let state = initialState(); - let telemetry = telemetryInitialState(); let deltas = 0; let sealed = false; let error: string | null = null; @@ -116,7 +102,6 @@ async function runTurn( } deltas++; state = foldEvent(state, msg.event); - telemetry = foldMetricEvent(telemetry, msg.event); if (msg.event.type === "turn-sealed") { sealed = true; done.resolve(); @@ -128,7 +113,7 @@ async function runTurn( await done.promise; clearTimeout(timeout); handlers.delete(conversationId); - return { state, telemetry, deltas, sealed, error }; + return { state, deltas, sealed, error }; } function toolChunksOf(state: TranscriptState) { @@ -193,44 +178,6 @@ async function main() { .join(""); record("turn 1 committed transcript has assistant text", committedText.length > 0); - // ─── Turn 1 telemetry: verify step metrics populated ─────────────────────── - const t1Turn = turnMetrics(t1.telemetry, textConv); - const t1StepCount = t1Turn?.steps.length ?? 0; - record("turn 1 telemetry accumulated steps", t1StepCount > 0, `${t1StepCount} step(s)`); - if (t1StepCount > 0) { - const s0 = stepMetrics(t1.telemetry, textConv, 0); - const hasTiming = s0?.genTotalMs !== undefined || s0?.ttftMs !== undefined; - if (hasTiming) { - record( - "turn 1 step 0 has timing metrics", - true, - `ttftMs=${s0?.ttftMs ?? "–"} decodeMs=${s0?.decodeMs ?? "–"} genTotalMs=${s0?.genTotalMs ?? "–"}`, - ); - } else { - note( - "turn 1 step 0 has no timing (backend may not have a clock) — telemetry path verified but no timing to assert", - ); - } - const hasTokens = s0?.usage?.outputTokens !== undefined; - if (hasTokens) { - record( - "turn 1 step 0 has token usage", - true, - `in=${s0?.usage?.inputTokens ?? "–"} out=${s0?.usage?.outputTokens ?? "–"}`, - ); - } else { - note( - "turn 1 step 0 has no usage (stepId may not have been on the usage event) — telemetry path verified", - ); - } - } - const t1Done = t1Turn?.wallMs; - if (t1Done !== undefined) { - record("turn 1 done event recorded wall-clock", true, `${t1Done}ms`); - } else { - note("turn 1 done.durationMs absent (backend clock unavailable)"); - } - // ─── Turn 2: tool-call batching (wire@0.2.0 stepId) ───────────────────────── console.log(`\n[live-probe] TURN 2 (tools): "${TOOL_PROMPT}"`); const toolConv = crypto.randomUUID(); @@ -238,26 +185,6 @@ async function main() { if (t2.error !== null) record("turn 2 had no chat.error", false, t2.error); record("turn 2 reached turn-sealed", t2.sealed); - // ─── Turn 2 telemetry: verify step + tool metrics ────────────────────────── - const t2Turn = turnMetrics(t2.telemetry, toolConv); - const t2StepCount = t2Turn?.steps.length ?? 0; - record("turn 2 telemetry accumulated steps", t2StepCount > 0, `${t2StepCount} step(s)`); - if (t2StepCount > 0) { - const s0 = stepMetrics(t2.telemetry, toolConv, 0); - if (s0?.toolDurationMs !== undefined && s0.toolDurationMs > 0) { - record("turn 2 step 0 has tool execution time", true, `toolDurationMs=${s0.toolDurationMs}`); - } else { - note("turn 2 step 0 has no toolDurationMs (tool-result.durationMs may be absent)"); - } - if (s0?.genTotalMs !== undefined) { - record("turn 2 step 0 has generation timing", true, `genTotalMs=${s0.genTotalMs}`); - } - } - const t2Done = t2Turn?.wallMs; - if (t2Done !== undefined) { - record("turn 2 done event recorded wall-clock", true, `${t2Done}ms`); - } - const liveTool = toolChunksOf(t2.state); const liveCalls = liveTool.filter((c) => c.chunk.type === "tool-call"); diff --git a/src/app/App.svelte b/src/app/App.svelte index e1d59f9..61b4cb9 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -1,6 +1,6 @@ -{#snippet chunkRow(rendered: RenderedChunk, sIdx: number)} +{#snippet chunkRow(rendered: RenderedChunk)} {#if rendered.role === "user"} +
{#if rendered.chunk.type === "text"} @@ -52,6 +38,9 @@
{:else if rendered.chunk.type === "thinking"} +
@@ -69,18 +58,14 @@
{:else if rendered.chunk.type === "tool-call" || rendered.chunk.type === "tool-result"} - {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, sIdx) : undefined} - {@const toolDur = step?.toolDurationMs} +
{#if rendered.chunk.type === "tool-call"}
-
- {rendered.chunk.toolName} - {#if toolDur !== undefined && toolDur > 0} - {formatMs(toolDur)} - {/if} -
+ {rendered.chunk.toolName}
{JSON.stringify(rendered.chunk.input, null, 2)}
{:else} @@ -88,43 +73,19 @@ class="w-fit max-w-full rounded-box bg-base-200 p-3 text-sm" class:text-error={rendered.chunk.isError} > -
- {rendered.chunk.toolName} - {#if toolDur !== undefined && toolDur > 0} - {formatMs(toolDur)} - {/if} -
+ {rendered.chunk.toolName}
{rendered.chunk.content}
{/if}
{:else} - {@const step = currentTurnId ? stepMetrics(telemetry, currentTurnId, sIdx) : undefined} - {@const tps = step ? stepTps(step) : undefined} +
{#if rendered.chunk.type === "text"} -
    -
  • -

    {rendered.chunk.text}

    -
  • - {#if step && (step.genTotalMs !== undefined || tps !== undefined || step.usage?.outputTokens !== undefined)} -
  • - {#if step.genTotalMs !== undefined} - {formatMs(step.genTotalMs)} - {/if} - · - {#if tps !== undefined} - {Math.round(tps)} t/s - {/if} - · - {#if step.usage?.outputTokens !== undefined} - {step.usage.outputTokens} tok - {/if} -
  • - {/if} -
+

{rendered.chunk.text}

{:else if rendered.chunk.type === "error"}