summaryrefslogtreecommitdiffhomepage
path: root/.dispatch
diff options
context:
space:
mode:
Diffstat (limited to '.dispatch')
-rw-r--r--.dispatch/package-agent.md3
-rw-r--r--.dispatch/transport-contract.reference.md47
-rw-r--r--.dispatch/wire.reference.md119
3 files changed, 151 insertions, 18 deletions
diff --git a/.dispatch/package-agent.md b/.dispatch/package-agent.md
index 73e960c..5c1a54d 100644
--- a/.dispatch/package-agent.md
+++ b/.dispatch/package-agent.md
@@ -26,7 +26,8 @@ it, test it, and write a report — nothing else. If no single unit is named, st
- **The contracts you consume:** reproduced IN-REPO under `.dispatch/*.reference.md` — read THOSE:
- `.dispatch/ui-contract.reference.md` — `@dispatch/ui-contract` (surfaces + surface WS protocol).
- `.dispatch/wire.reference.md` — `@dispatch/wire` (`Chunk`/`StoredChunk`+`seq`/`ChatMessage`/
- `AgentEvent`/`TurnSealedEvent`/`Usage` — the chat wire types).
+ `AgentEvent`/`TurnSealedEvent`/`Usage` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`,
+ `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs` — the chat wire types).
- `.dispatch/transport-contract.reference.md` — `@dispatch/transport-contract` (HTTP endpoints +
`ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse` + WS chat ops + the unified
`WsClientMessage`/`WsServerMessage` unions).
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index fcc2cbf..d06a7b4 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -5,17 +5,21 @@
> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
> this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever it changes.
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/ui-contract`
-> (see `ui-contract.reference.md`).
+> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics
+> endpoint shipped + version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see
+> `wire.reference.md`) + `@dispatch/ui-contract` (see `ui-contract.reference.md`).
>
-> **0.2.0 change (step grouping):** no shape change HERE — this contract's own types are
-> identical. It only re-exports the bumped `@dispatch/wire`, whose `AgentEvent` tool variants
-> now carry a required `stepId` and whose tool `Chunk`s carry an optional `stepId`. The
-> `chat.delta` events streamed over WS and the `ConversationHistoryResponse.chunks` you already
-> consume therefore now carry the step grouping key (see `wire.reference.md`).
+> **0.3.0 change (token + timing metrics):** adds the durable metrics READ endpoint
+> `GET /conversations/:id/metrics` → `ConversationMetricsResponse` (`{ turns: TurnMetrics[] }`), and
+> re-exports `StepMetrics` / `TurnMetrics` from `@dispatch/wire`. This is a SEPARATE read axis from
+> the seq-cursor history (`GET /conversations/:id`): metrics are keyed PER TURN (not per chunk), so
+> they get their own route. `turns` is every SEALED turn's `TurnMetrics` in turn order (an in-flight
+> turn is absent until its metrics persist post-seal). The live `usage`/`step-complete`/`done`
+> packets it mirrors are transient (NOT persisted) and ride the `chat.delta`/NDJSON `AgentEvent`
+> stream you already consume — see `wire.reference.md`. The contract's OWN chat/history shapes are
+> otherwise unchanged from 0.2.0.
-## Endpoints (backend, confirmed live — CORS wildcard `*`, HTTP port 24203, WS port 24205)
+## Endpoints (backend — CORS wildcard `*`, HTTP port 24203, WS port 24205)
- `POST /chat` — body `ChatRequest` (JSON); response NDJSON stream, one `AgentEvent` per line;
resolved id also in `X-Conversation-Id` header.
@@ -23,6 +27,8 @@
- `GET /conversations/:id?sinceSeq=<n>` — `ConversationHistoryResponse`: RAW, append-order,
seq-ordered slice with `seq > n` (NOT reconciled — dangling tool-calls returned as-is).
`latestSeq` = last chunk's `seq`, or the requested `sinceSeq` when caught up (empty `chunks`).
+- `GET /conversations/:id/metrics` — `ConversationMetricsResponse`: every SEALED turn's `TurnMetrics`
+ in turn order (per-turn token + timing; NOT seq-filtered). IMPLEMENTED + LIVE-VERIFIED (probe 17/17).
- WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops
(`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive
`WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`**
@@ -42,9 +48,9 @@
*/
import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract";
-import type { AgentEvent, StoredChunk } from "@dispatch/wire";
+import type { AgentEvent, StoredChunk, TurnMetrics } from "@dispatch/wire";
-export type { AgentEvent, StoredChunk } from "@dispatch/wire";
+export type { AgentEvent, StepMetrics, StoredChunk, TurnMetrics } from "@dispatch/wire";
/**
* Request body for `POST /chat` (sent as JSON).
@@ -88,6 +94,25 @@ export interface ConversationHistoryResponse {
readonly latestSeq: number;
}
+/**
+ * Response body for `GET /conversations/:id/metrics` — the persisted per-turn
+ * (and per-step) token + timing metrics for a conversation, for a client
+ * reopening a past conversation to render historical usage/latency.
+ *
+ * This is a SEPARATE axis from the two other read concerns and is deliberately
+ * its own endpoint: the live `usage`/`step-complete`/`done` events are transient
+ * (not persisted), and `ConversationHistoryResponse` carries seq-cursor chunk
+ * CONTENT. Metrics are keyed per TURN (not per chunk) and so are not seq-filtered
+ * — hence a sibling route rather than a field on the history response.
+ *
+ * `turns` is every SEALED turn's `TurnMetrics` in turn order. A turn appears only
+ * after its metrics were persisted (post-seal); an in-flight or unsealed turn is
+ * absent until then.
+ */
+export interface ConversationMetricsResponse {
+ readonly turns: readonly TurnMetrics[];
+}
+
// ─── WebSocket chat ops ───────────────────────────────────────────────────────
// The persistent WS connection multiplexes chat ops (below) with surface ops
// (`@dispatch/ui-contract`). Chat `type`s are namespaced (`chat.*`) so they
diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md
index ed95351..ee5488c 100644
--- a/.dispatch/wire.reference.md
+++ b/.dispatch/wire.reference.md
@@ -4,13 +4,27 @@
> types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission
> prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]`. Regenerate whenever `@dispatch/wire` changes.
+> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics types below
+> shipped + version-bumped). Regenerate whenever `@dispatch/wire` changes.
>
-> **0.2.0 change (step grouping):** `ToolCallChunk`/`ToolResultChunk` gained an OPTIONAL
-> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` gained a REQUIRED `stepId: StepId`.
-> A `StepId` is the per-step grouping key for batched/parallel tool calls — group by equality.
-> Live: read `event.stepId`. Replay: read `storedChunk.chunk.stepId` (NOT the envelope; absent on
-> pre-0.2.0 rows / non-tool chunks — tolerate absence). `StoredChunk` envelope is UNCHANGED.
+> **0.3.0 changes (token + timing metrics):**
+> - **Live per-step/per-turn telemetry on the event stream** (transient — NOT persisted):
+> `TurnUsageEvent` gained an OPTIONAL `stepId?` (attribute tokens per step). A NEW
+> `TurnStepCompleteEvent` (`type: "step-complete"`, REQUIRED `stepId`) carries the per-step
+> generation timing `ttftMs?` / `decodeMs?` / `genTotalMs?` (all optional — present only when the
+> runtime had a clock; `ttftMs`/`decodeMs` additionally require a first content token). `TurnDoneEvent`
+> gained an OPTIONAL `durationMs?` (total turn wall-clock) + OPTIONAL `usage?` (aggregate across
+> steps). `TurnToolResultEvent` gained an OPTIONAL `durationMs?` (tool execution time).
+> - **Durable, replayable metrics** (persisted, keyed per turn): NEW `StepMetrics` + `TurnMetrics`
+> — the persisted counterparts of the live `usage` + `step-complete` + `done` packets. Served by
+> `GET /conversations/:id/metrics` (see `transport-contract.reference.md`). Build the SAME
+> `TurnMetrics` shape from the live events for the in-flight turn; the durable endpoint supplies it
+> for sealed turns. TPS is derived (`usage.outputTokens / (genTotalMs / 1000)`), not on the wire.
+> - **0.2.0 (still current — step grouping):** `ToolCallChunk`/`ToolResultChunk` carry an OPTIONAL
+> `stepId?: StepId`; `TurnToolCallEvent`/`TurnToolResultEvent` carry a REQUIRED `stepId: StepId`.
+> Group batched/parallel tool calls by `stepId` equality. Live: read `event.stepId`. Replay: read
+> `storedChunk.chunk.stepId` (NOT the envelope; tolerate absence). `StoredChunk` envelope is
+> UNCHANGED (`{ seq, role, chunk }` — carries NO `turnId`).
```ts
/**
@@ -168,6 +182,47 @@ export interface Usage {
readonly cacheWriteTokens?: number;
}
+// ─── Persisted metrics ───────────────────────────────────────────────────────
+
+/**
+ * Durable per-step metrics for a completed step — the persisted, replayable
+ * counterpart of the live `usage` + `step-complete` events. Combines the step's
+ * token usage with its generation timing so a client reopening a past
+ * conversation renders the same per-step token/latency breakdown it would have
+ * seen live. Built from the turn's events, stored by `conversation-store`, and
+ * served by `GET /conversations/:id/metrics`.
+ */
+export interface StepMetrics {
+ readonly stepId: StepId;
+ /** The step's token usage (all four counters; cache fields optional per `Usage`). */
+ readonly usage: Usage;
+ /** Time to first token (stream start → first text/reasoning delta). Optional — see `TurnStepCompleteEvent.ttftMs`. */
+ readonly ttftMs?: number;
+ /** Decode time (first token → stream end). Optional — see `TurnStepCompleteEvent.decodeMs`. */
+ readonly decodeMs?: number;
+ /** Total generation time for the step (stream start → stream end). Optional: present only when a clock was available. */
+ readonly genTotalMs?: number;
+}
+
+/**
+ * Durable per-turn metrics for a completed (sealed) turn — the persisted,
+ * replayable counterpart of the live `done` event's aggregate `usage` +
+ * `durationMs`, plus the per-step breakdown. `usage` is the aggregate across all
+ * steps; `steps` carries each step's `StepMetrics` in step order. Stored by
+ * `conversation-store` keyed by `turnId` and served by
+ * `GET /conversations/:id/metrics`. (`turnId` is the plain wire string carried
+ * on every `AgentEvent`, the join key to the live stream.)
+ */
+export interface TurnMetrics {
+ readonly turnId: string;
+ /** Aggregate token usage across all steps in the turn. */
+ readonly usage: Usage;
+ /** Total wall-clock duration of the turn (turn start → turn end). Optional: present only when a clock was available. */
+ readonly durationMs?: number;
+ /** Per-step metrics in step order. */
+ readonly steps: readonly StepMetrics[];
+}
+
// ─── Outward events ─────────────────────────────────────────────────────────
/**
@@ -183,6 +238,7 @@ export type AgentEvent =
| TurnToolResultEvent
| TurnToolOutputEvent
| TurnUsageEvent
+ | TurnStepCompleteEvent
| TurnErrorEvent
| TurnDoneEvent
| TurnSealedEvent;
@@ -251,6 +307,12 @@ export interface TurnToolResultEvent {
readonly toolName: string;
readonly content: string;
readonly isError: boolean;
+ /**
+ * How long the tool took to execute (dispatch → result), in milliseconds —
+ * the backend's authoritative execution time, distinct from any client-side
+ * wall-clock. Optional: present only when the runtime was given a clock.
+ */
+ readonly durationMs?: number;
}
/** Streaming output from a tool execution (e.g. shell stdout/stderr). */
@@ -268,9 +330,43 @@ export interface TurnUsageEvent {
readonly type: "usage";
readonly conversationId: string;
readonly turnId: string;
+ /**
+ * The step this usage report belongs to, so a consumer can attribute tokens
+ * per step (and join with the matching `step-complete` timing by `stepId`).
+ * Optional: absent when the runtime had no step context, and on usage emitted
+ * before this field existed.
+ */
+ readonly stepId?: StepId;
readonly usage: Usage;
}
+/**
+ * A step (one LLM round-trip) has completed — the authoritative per-step metrics
+ * packet, emitted once at the step's end (after the generation stream finishes),
+ * so its timing is final (unlike `usage`, which may arrive mid-stream). Carries
+ * the step's generation timing; join to the step's tokens via `stepId` on the
+ * `usage` event. All timing fields are optional: present only when the runtime
+ * was given a clock, and `ttftMs`/`decodeMs` additionally require that a first
+ * content token (text or reasoning) was observed this step.
+ */
+export interface TurnStepCompleteEvent {
+ readonly type: "step-complete";
+ readonly conversationId: string;
+ readonly turnId: string;
+ readonly stepId: StepId;
+ /** Time to first token: stream start → first text/reasoning delta. */
+ readonly ttftMs?: number;
+ /** Decode time: first token → stream end (generation total − TTFT). */
+ readonly decodeMs?: number;
+ /**
+ * Total generation time for the step: stream start → stream end. Present
+ * whenever a clock was available, even if no first token was seen (in which
+ * case `ttftMs`/`decodeMs` are absent). When a first token was seen,
+ * `genTotalMs === ttftMs + decodeMs`.
+ */
+ readonly genTotalMs?: number;
+}
+
/** An error occurred during the turn. */
export interface TurnErrorEvent {
readonly type: "error";
@@ -286,6 +382,17 @@ export interface TurnDoneEvent {
readonly conversationId: string;
readonly turnId: string;
readonly reason: string;
+ /**
+ * Total wall-clock duration of the turn (turn start → turn end), in
+ * milliseconds. Optional: present only when the runtime was given a clock.
+ */
+ readonly durationMs?: number;
+ /**
+ * Aggregate token usage across all steps in the turn — a convenience total so
+ * a consumer need not sum the per-step `usage` events. Optional (absent if the
+ * provider reported no usage).
+ */
+ readonly usage?: Usage;
}
/**