diff options
Diffstat (limited to '.dispatch')
| -rw-r--r-- | .dispatch/package-agent.md | 3 | ||||
| -rw-r--r-- | .dispatch/transport-contract.reference.md | 47 | ||||
| -rw-r--r-- | .dispatch/wire.reference.md | 119 |
3 files changed, 151 insertions, 18 deletions
diff --git a/.dispatch/package-agent.md b/.dispatch/package-agent.md index 73e960c..5c1a54d 100644 --- a/.dispatch/package-agent.md +++ b/.dispatch/package-agent.md @@ -26,7 +26,8 @@ it, test it, and write a report — nothing else. If no single unit is named, st - **The contracts you consume:** reproduced IN-REPO under `.dispatch/*.reference.md` — read THOSE: - `.dispatch/ui-contract.reference.md` — `@dispatch/ui-contract` (surfaces + surface WS protocol). - `.dispatch/wire.reference.md` — `@dispatch/wire` (`Chunk`/`StoredChunk`+`seq`/`ChatMessage`/ - `AgentEvent`/`TurnSealedEvent`/`Usage` — the chat wire types). + `AgentEvent`/`TurnSealedEvent`/`Usage` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, + `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs` — the chat wire types). - `.dispatch/transport-contract.reference.md` — `@dispatch/transport-contract` (HTTP endpoints + `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse` + WS chat ops + the unified `WsClientMessage`/`WsServerMessage` unions). diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md index fcc2cbf..d06a7b4 100644 --- a/.dispatch/transport-contract.reference.md +++ b/.dispatch/transport-contract.reference.md @@ -5,17 +5,21 @@ > hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally — > this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes. -> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract` -> (see `ui-contract.reference.md`). +> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics +> endpoint shipped + version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see +> `wire.reference.md`) + `@dispatch/ui-contract` (see `ui-contract.reference.md`). > -> **0.2.0 change (step grouping):** no shape change HERE — this contract's own types are -> identical. It only re-exports the bumped `@dispatch/wire`, whose `AgentEvent` tool variants -> now carry a required `stepId` and whose tool `Chunk`s carry an optional `stepId`. The -> `chat.delta` events streamed over WS and the `ConversationHistoryResponse.chunks` you already -> consume therefore now carry the step grouping key (see `wire.reference.md`). +> **0.3.0 change (token + timing metrics):** adds the durable metrics READ endpoint +> `GET /conversations/:id/metrics` → `ConversationMetricsResponse` (`{ turns: TurnMetrics[] }`), and +> re-exports `StepMetrics` / `TurnMetrics` from `@dispatch/wire`. This is a SEPARATE read axis from +> the seq-cursor history (`GET /conversations/:id`): metrics are keyed PER TURN (not per chunk), so +> they get their own route. `turns` is every SEALED turn's `TurnMetrics` in turn order (an in-flight +> turn is absent until its metrics persist post-seal). The live `usage`/`step-complete`/`done` +> packets it mirrors are transient (NOT persisted) and ride the `chat.delta`/NDJSON `AgentEvent` +> stream you already consume — see `wire.reference.md`. The contract's OWN chat/history shapes are +> otherwise unchanged from 0.2.0. -## Endpoints (backend, confirmed live — CORS wildcard `*`, HTTP port 24203, WS port 24205) +## Endpoints (backend — CORS wildcard `*`, HTTP port 24203, WS port 24205) - `POST /chat` — body `ChatRequest` (JSON); response NDJSON stream, one `AgentEvent` per line; resolved id also in `X-Conversation-Id` header. @@ -23,6 +27,8 @@ - `GET /conversations/:id?sinceSeq=<n>` — `ConversationHistoryResponse`: RAW, append-order, seq-ordered slice with `seq > n` (NOT reconciled — dangling tool-calls returned as-is). `latestSeq` = last chunk's `seq`, or the requested `sinceSeq` when caught up (empty `chunks`). +- `GET /conversations/:id/metrics` — `ConversationMetricsResponse`: every SEALED turn's `TurnMetrics` + in turn order (per-turn token + timing; NOT seq-filtered). IMPLEMENTED + LIVE-VERIFIED (probe 17/17). - WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops (`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive `WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`** @@ -42,9 +48,9 @@ */ import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract"; -import type { AgentEvent, StoredChunk } from "@dispatch/wire"; +import type { AgentEvent, StoredChunk, TurnMetrics } from "@dispatch/wire"; -export type { AgentEvent, StoredChunk } from "@dispatch/wire"; +export type { AgentEvent, StepMetrics, StoredChunk, TurnMetrics } from "@dispatch/wire"; /** * Request body for `POST /chat` (sent as JSON). @@ -88,6 +94,25 @@ export interface ConversationHistoryResponse { readonly latestSeq: number; } +/** + * Response body for `GET /conversations/:id/metrics` — the persisted per-turn + * (and per-step) token + timing metrics for a conversation, for a client + * reopening a past conversation to render historical usage/latency. + * + * This is a SEPARATE axis from the two other read concerns and is deliberately + * its own endpoint: the live `usage`/`step-complete`/`done` events are transient + * (not persisted), and `ConversationHistoryResponse` carries seq-cursor chunk + * CONTENT. Metrics are keyed per TURN (not per chunk) and so are not seq-filtered + * — hence a sibling route rather than a field on the history response. + * + * `turns` is every SEALED turn's `TurnMetrics` in turn order. A turn appears only + * after its metrics were persisted (post-seal); an in-flight or unsealed turn is + * absent until then. + */ +export interface ConversationMetricsResponse { + readonly turns: readonly TurnMetrics[]; +} + // ─── WebSocket chat ops ─────────────────────────────────────────────────────── // The persistent WS connection multiplexes chat ops (below) with surface ops // (`@dispatch/ui-contract`). Chat `type`s are namespaced (`chat.*`) so they diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md index ed95351..ee5488c 100644 --- a/.dispatch/wire.reference.md +++ b/.dispatch/wire.reference.md @@ -4,13 +4,27 @@ > types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission > prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes. +> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics types below +> shipped + version-bumped). Regenerate whenever `@dispatch/wire` changes. > -> **0.2.0 change (step grouping):** `ToolCallChunk`/`ToolResultChunk` gained an OPTIONAL -> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` gained a REQUIRED `stepId: StepId`. -> A `StepId` is the per-step grouping key for batched/parallel tool calls — group by equality. -> Live: read `event.stepId`. Replay: read `storedChunk.chunk.stepId` (NOT the envelope; absent on -> pre-0.2.0 rows / non-tool chunks — tolerate absence). `StoredChunk` envelope is UNCHANGED. +> **0.3.0 changes (token + timing metrics):** +> - **Live per-step/per-turn telemetry on the event stream** (transient — NOT persisted): +> `TurnUsageEvent` gained an OPTIONAL `stepId?` (attribute tokens per step). A NEW +> `TurnStepCompleteEvent` (`type: "step-complete"`, REQUIRED `stepId`) carries the per-step +> generation timing `ttftMs?` / `decodeMs?` / `genTotalMs?` (all optional — present only when the +> runtime had a clock; `ttftMs`/`decodeMs` additionally require a first content token). `TurnDoneEvent` +> gained an OPTIONAL `durationMs?` (total turn wall-clock) + OPTIONAL `usage?` (aggregate across +> steps). `TurnToolResultEvent` gained an OPTIONAL `durationMs?` (tool execution time). +> - **Durable, replayable metrics** (persisted, keyed per turn): NEW `StepMetrics` + `TurnMetrics` +> — the persisted counterparts of the live `usage` + `step-complete` + `done` packets. Served by +> `GET /conversations/:id/metrics` (see `transport-contract.reference.md`). Build the SAME +> `TurnMetrics` shape from the live events for the in-flight turn; the durable endpoint supplies it +> for sealed turns. TPS is derived (`usage.outputTokens / (genTotalMs / 1000)`), not on the wire. +> - **0.2.0 (still current — step grouping):** `ToolCallChunk`/`ToolResultChunk` carry an OPTIONAL +> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` carry a REQUIRED `stepId: StepId`. +> Group batched/parallel tool calls by `stepId` equality. Live: read `event.stepId`. Replay: read +> `storedChunk.chunk.stepId` (NOT the envelope; tolerate absence). `StoredChunk` envelope is +> UNCHANGED (`{ seq, role, chunk }` — carries NO `turnId`). ```ts /** @@ -168,6 +182,47 @@ export interface Usage { readonly cacheWriteTokens?: number; } +// ─── Persisted metrics ─────────────────────────────────────────────────────── + +/** + * Durable per-step metrics for a completed step — the persisted, replayable + * counterpart of the live `usage` + `step-complete` events. Combines the step's + * token usage with its generation timing so a client reopening a past + * conversation renders the same per-step token/latency breakdown it would have + * seen live. Built from the turn's events, stored by `conversation-store`, and + * served by `GET /conversations/:id/metrics`. + */ +export interface StepMetrics { + readonly stepId: StepId; + /** The step's token usage (all four counters; cache fields optional per `Usage`). */ + readonly usage: Usage; + /** Time to first token (stream start → first text/reasoning delta). Optional — see `TurnStepCompleteEvent.ttftMs`. */ + readonly ttftMs?: number; + /** Decode time (first token → stream end). Optional — see `TurnStepCompleteEvent.decodeMs`. */ + readonly decodeMs?: number; + /** Total generation time for the step (stream start → stream end). Optional: present only when a clock was available. */ + readonly genTotalMs?: number; +} + +/** + * Durable per-turn metrics for a completed (sealed) turn — the persisted, + * replayable counterpart of the live `done` event's aggregate `usage` + + * `durationMs`, plus the per-step breakdown. `usage` is the aggregate across all + * steps; `steps` carries each step's `StepMetrics` in step order. Stored by + * `conversation-store` keyed by `turnId` and served by + * `GET /conversations/:id/metrics`. (`turnId` is the plain wire string carried + * on every `AgentEvent`, the join key to the live stream.) + */ +export interface TurnMetrics { + readonly turnId: string; + /** Aggregate token usage across all steps in the turn. */ + readonly usage: Usage; + /** Total wall-clock duration of the turn (turn start → turn end). Optional: present only when a clock was available. */ + readonly durationMs?: number; + /** Per-step metrics in step order. */ + readonly steps: readonly StepMetrics[]; +} + // ─── Outward events ───────────────────────────────────────────────────────── /** @@ -183,6 +238,7 @@ export type AgentEvent = | TurnToolResultEvent | TurnToolOutputEvent | TurnUsageEvent + | TurnStepCompleteEvent | TurnErrorEvent | TurnDoneEvent | TurnSealedEvent; @@ -251,6 +307,12 @@ export interface TurnToolResultEvent { readonly toolName: string; readonly content: string; readonly isError: boolean; + /** + * How long the tool took to execute (dispatch → result), in milliseconds — + * the backend's authoritative execution time, distinct from any client-side + * wall-clock. Optional: present only when the runtime was given a clock. + */ + readonly durationMs?: number; } /** Streaming output from a tool execution (e.g. shell stdout/stderr). */ @@ -268,9 +330,43 @@ export interface TurnUsageEvent { readonly type: "usage"; readonly conversationId: string; readonly turnId: string; + /** + * The step this usage report belongs to, so a consumer can attribute tokens + * per step (and join with the matching `step-complete` timing by `stepId`). + * Optional: absent when the runtime had no step context, and on usage emitted + * before this field existed. + */ + readonly stepId?: StepId; readonly usage: Usage; } +/** + * A step (one LLM round-trip) has completed — the authoritative per-step metrics + * packet, emitted once at the step's end (after the generation stream finishes), + * so its timing is final (unlike `usage`, which may arrive mid-stream). Carries + * the step's generation timing; join to the step's tokens via `stepId` on the + * `usage` event. All timing fields are optional: present only when the runtime + * was given a clock, and `ttftMs`/`decodeMs` additionally require that a first + * content token (text or reasoning) was observed this step. + */ +export interface TurnStepCompleteEvent { + readonly type: "step-complete"; + readonly conversationId: string; + readonly turnId: string; + readonly stepId: StepId; + /** Time to first token: stream start → first text/reasoning delta. */ + readonly ttftMs?: number; + /** Decode time: first token → stream end (generation total − TTFT). */ + readonly decodeMs?: number; + /** + * Total generation time for the step: stream start → stream end. Present + * whenever a clock was available, even if no first token was seen (in which + * case `ttftMs`/`decodeMs` are absent). When a first token was seen, + * `genTotalMs === ttftMs + decodeMs`. + */ + readonly genTotalMs?: number; +} + /** An error occurred during the turn. */ export interface TurnErrorEvent { readonly type: "error"; @@ -286,6 +382,17 @@ export interface TurnDoneEvent { readonly conversationId: string; readonly turnId: string; readonly reason: string; + /** + * Total wall-clock duration of the turn (turn start → turn end), in + * milliseconds. Optional: present only when the runtime was given a clock. + */ + readonly durationMs?: number; + /** + * Aggregate token usage across all steps in the turn — a convenience total so + * a consumer need not sum the per-step `usage` events. Optional (absent if the + * provider reported no usage). + */ + readonly usage?: Usage; } /** |
