diff options
| author | Adam Malczewski <[email protected]> | 2026-06-12 01:01:32 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-12 01:01:32 +0900 |
| commit | 6bd7b39f6f53dd8f3743347a1cb72c2f74424dd8 (patch) | |
| tree | b41911099883e8386ea8edbd88d42911de401d27 | |
| parent | fd565a6555e8bc9f37f21cf9d900523ef3be531b (diff) | |
| download | dispatch-web-6bd7b39f6f53dd8f3743347a1cb72c2f74424dd8.tar.gz dispatch-web-6bd7b39f6f53dd8f3743347a1cb72c2f74424dd8.zip | |
feat(metrics): consume contextSize — current context-usage readout
Backend context-size handoff: re-pin [email protected] / [email protected]
(+ re-mirror .dispatch reference snapshots). Thread the optional contextSize
through core/metrics (done fold + durable + selectCurrentContextSize: latest
turn's defined value, undefined=>unknown never 0, durable-wins-over-live).
Chat store exposes currentContextSize; ContextSizeBadge renders
"N tokens in context" / "context size unknown" above the composer.
GLOSSARY: add context size / context window. 533 tests green.
| -rw-r--r-- | .dispatch/transport-contract.reference.md | 13 | ||||
| -rw-r--r-- | .dispatch/wire.reference.md | 36 | ||||
| -rw-r--r-- | GLOSSARY.md | 2 | ||||
| -rw-r--r-- | backend-handoff.md | 29 | ||||
| -rw-r--r-- | src/app/App.svelte | 9 | ||||
| -rw-r--r-- | src/core/metrics/format.test.ts | 15 | ||||
| -rw-r--r-- | src/core/metrics/format.ts | 11 | ||||
| -rw-r--r-- | src/core/metrics/index.ts | 2 | ||||
| -rw-r--r-- | src/core/metrics/reducer.test.ts | 76 | ||||
| -rw-r--r-- | src/core/metrics/reducer.ts | 26 | ||||
| -rw-r--r-- | src/core/metrics/types.ts | 6 | ||||
| -rw-r--r-- | src/features/chat/index.ts | 1 | ||||
| -rw-r--r-- | src/features/chat/store.svelte.ts | 10 | ||||
| -rw-r--r-- | src/features/chat/ui/ContextSizeBadge.svelte | 20 |
14 files changed, 241 insertions, 15 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md index c2e2076..40ced1e 100644 --- a/.dispatch/transport-contract.reference.md +++ b/.dispatch/transport-contract.reference.md @@ -5,9 +5,16 @@ > hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally — > this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics -> endpoint shipped + version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see -> `wire.reference.md`) + `@dispatch/ui-contract` (see `ui-contract.reference.md`). +> **Orchestrator:** SNAPSHOT of `[email protected]` (the metrics endpoint shipped + +> version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + +> `@dispatch/[email protected]` (see `ui-contract.reference.md`). +> +> **2026-06-12 delta (context-size handoff — package bumped `0.5.0` → `0.6.0`, depends on +> `[email protected]`):** no NEW transport shape — the optional `contextSize?: number` rides the +> re-exported `TurnMetrics` (so `ConversationMetricsResponse.turns[].contextSize`) and, live, the +> `TurnDoneEvent.contextSize` on the `done` AgentEvent (`chat.delta` WS / `/chat` NDJSON). On +> (re)hydrate take the LAST `turns[]` element with a defined `contextSize`; live, update on `done`. +> See the `wire.reference.md` context-size delta for the definition. > > **2026-06 delta (cache-warming handoff, additive — package still `0.4.0`):** adds > `POST /chat/warm` (`WarmRequest` → `WarmResponse`) for an on-demand prompt-cache warm, and the diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md index ee5488c..cf1410a 100644 --- a/.dispatch/wire.reference.md +++ b/.dispatch/wire.reference.md @@ -4,8 +4,15 @@ > types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission > prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only. > -> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics types below -> shipped + version-bumped). Regenerate whenever `@dispatch/wire` changes. +> **Orchestrator:** SNAPSHOT of `[email protected]` (the metrics types below shipped + version-bumped). +> Regenerate whenever `@dispatch/wire` changes. +> +> **2026-06-12 delta (context-size handoff — package bumped `0.4.0` → `0.5.0`):** adds an OPTIONAL +> `contextSize?: number` to BOTH `TurnDoneEvent` (live `done`) and `TurnMetrics` (persisted) — the +> turn's FINAL step `inputTokens + outputTokens` (current context occupancy), NOT the aggregate +> `usage` (which overcounts multi-step turns). The two carriers are equal for the same turn. Current +> value = the LATEST turn's `contextSize`; `undefined` ⇒ render "unknown", never `0`. See the field +> doc-comments on `TurnMetrics`/`TurnDoneEvent` below. > > **0.3.0 changes (token + timing metrics):** > - **Live per-step/per-turn telemetry on the event stream** (transient — NOT persisted): @@ -221,6 +228,16 @@ export interface TurnMetrics { readonly durationMs?: number; /** Per-step metrics in step order. */ readonly steps: readonly StepMetrics[]; + /** + * **Context size** — tokens the conversation occupies as of this turn: the + * turn's FINAL step `inputTokens + outputTokens` (the last entry of `steps`), + * NOT the aggregate `usage` (which sums per-step prompts and overcounts a + * multi-step turn). The persisted, replayable counterpart of + * `TurnDoneEvent.contextSize` and equal to it for the same turn. A client + * reopening a past conversation reads the LAST turn's `contextSize` as the + * current context usage. Optional: absent when no per-step usage was available. + */ + readonly contextSize?: number; } // ─── Outward events ───────────────────────────────────────────────────────── @@ -393,6 +410,21 @@ export interface TurnDoneEvent { * provider reported no usage). */ readonly usage?: Usage; + /** + * **Context size** — tokens the conversation occupies right now: the turn's + * FINAL step `inputTokens + outputTokens` (the prompt sent into the last LLM + * round-trip plus that round-trip's output). This is the "tokens in context" + * figure a client renders as the chat's current context usage, and a client + * treats the LATEST turn's value as the live total. + * + * Deliberately NOT the aggregate `usage` above: `usage` SUMS each step's + * `inputTokens`, which overcounts a multi-step / tool-calling turn because every + * step re-prefills the growing prompt — the final step's input already includes + * all prior context, so its input+output is the true occupancy. Optional: absent + * when no per-step usage was observed this turn (mirrors `usage`). A later field + * will carry the model's max context-window LIMIT; this is only the current size. + */ + readonly contextSize?: number; } /** diff --git a/GLOSSARY.md b/GLOSSARY.md index 538ba7e..d632c8d 100644 --- a/GLOSSARY.md +++ b/GLOSSARY.md @@ -19,6 +19,8 @@ | **step metrics** | The durable per-step metrics within a `TurnMetrics`: the step's `Usage` (tokens) + `ttftMs`/`decodeMs`/`genTotalMs` timing, keyed by `stepId` (`StepMetrics`). The persisted counterpart of the live `usage` + `step-complete` events. | step stats | | **TTFT** (time to first token) | Per-step latency: generation stream start → first content token (text or reasoning). One per step (each step re-prefills). On the wire as `step-complete.ttftMs` / `StepMetrics.ttftMs` (optional). | time-to-first-byte | | **decode time** | Per-step generation time after the first token (first token → stream end = `genTotalMs − ttftMs`). On the wire as `step-complete.decodeMs` / `StepMetrics.decodeMs` (optional). | — | +| **context size** | The tokens a conversation currently occupies: the most recent turn's FINAL step `inputTokens + outputTokens` (NOT the aggregate per-turn `usage`, which sums per-step prompts and overcounts a multi-step turn). On the wire as `TurnDoneEvent.contextSize` (live `done`) + `TurnMetrics.contextSize` (persisted); the FE reads the LATEST turn's value as current usage, and treats `undefined` as "unknown" (renders a placeholder, never `0`). Mirrors the backend GLOSSARY. | context usage, context length, tokens used (and do NOT call it "context window" — that's the limit) | +| **context window** | The model's MAXIMUM token capacity (the limit a **context size** is measured against). A FUTURE backend field — not on the wire yet; the FE shows context size alone (no `size / limit` denominator) until it ships. | max context, token limit (distinct from **context size**, the current usage) | ## Frontend-specific | Term | Meaning | Aliases to avoid | diff --git a/backend-handoff.md b/backend-handoff.md index 99c2964..e9b128a 100644 --- a/backend-handoff.md +++ b/backend-handoff.md @@ -5,24 +5,33 @@ > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user. > `lsp` does NOT span the repos (ORCHESTRATOR §5) — every cross-repo ask flows through here. -_Last updated: 2026-06-11. **FE is current on `[email protected]`.** All handoffs to date are -consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming (incl. -authoritative timer + retention + cache-rate fix), and **per-conversation cwd + LSP status** (new -`workspace` feature — cwd field in the Model view + a "Language Servers" view; works for drafts too). +_Last updated: 2026-06-12. **FE is current on `[email protected]` / `[email protected]`.** All handoffs +to date are consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, +cache-warming (incl. authoritative timer + retention + cache-rate fix), **per-conversation cwd + LSP +status**, and **context size** (the `contextSize` field — `done` live + `TurnMetrics` persisted — +rendered as a current-usage readout above the composer). **Open asks:** CR-1 (Loaded Extensions as a real table) + CR-2 (optional catalog `scope` flag) below. The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed** by the backend (answers in their `frontend-lsp-cwd-handoff.md`) — see §2._ +**Context-size handoff (`frontend-context-size-handoff.md`) → CONSUMED ✅.** Re-pinned `[email protected]→0.5.0` ++ `[email protected]→0.6.0`; re-mirrored both `.dispatch/*.reference.md`; added "context size" + +"context window" to FE `GLOSSARY.md`. `core/metrics` now threads `contextSize` through the `done` fold + +durable metrics and exposes `selectCurrentContextSize` (LATEST turn's defined value, `undefined`⇒unknown, +never `0`, durable-wins-over-live); the chat store exposes `currentContextSize`; `ContextSizeBadge` +renders "N tokens in context" / "context size unknown" above the composer. 533 tests green. NO new +backend ask — but the max-limit denominator is now a live FE need; see §3. + --- ## 1. Pinned backend contracts (consumed by the FE) | Package | Used for | |---|---| | `@dispatch/ui-contract` | surfaces + surface WS protocol | -| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs` | +| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`** | | `@dispatch/transport-contract` | `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` | Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`): @@ -31,7 +40,7 @@ Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`): `GET /conversations/:id/lsp` · `POST /chat/warm` · WS `chat.send`→`chat.delta`. Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-contract}.reference.md` -(regenerate on any contract bump; all current as of `[email protected]`). +(regenerate on any contract bump; all current as of `[email protected]` / `[email protected]`). ## 2. Open asks FOR THE BACKEND @@ -101,6 +110,12 @@ harden `/chat` to treat blank as "not provided" if we ever want it — not neede ## 3. Likely NEXT backend asks (heads-up, not yet requested) +- **Model max context-window LIMIT** (the denominator for context size) — the context-size handoff + flagged this as the separate, later field. The FE now shows current size alone (e.g. "34,102 tokens + in context"); once a per-model/per-turn `contextWindow` (max token capacity) ships, the FE can render + `contextSize / limit` (e.g. "34,102 / 200,000") + a usage bar. GLOSSARY term reserved: "context window" + = the limit (distinct from "context size" = current usage). **Likely the next ask** — raise when the + backend can source the model's advertised window. - `GET /conversations` — conversation list / sidebar (history explorer / switcher); could also expose a per-conversation "last model" so a reopened tab seeds its model from the server instead of localStorage. - `POST /conversations/:id/cancel` — "stop generating". diff --git a/src/app/App.svelte b/src/app/App.svelte index daab953..32db54f 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -6,7 +6,13 @@ manifest as cacheWarmingManifest, type WarmFeedback, } from "../features/cache-warming"; - import { ChatView, Composer, manifest as chatManifest, ModelSelector } from "../features/chat"; + import { + ChatView, + Composer, + manifest as chatManifest, + ContextSizeBadge, + ModelSelector, + } from "../features/chat"; import { manifest as conversationCacheManifest } from "../features/conversation-cache"; import { manifest as markdownManifest } from "../features/markdown"; import { @@ -211,6 +217,7 @@ <ScrollToBottom show={smartScroll.showButton} onResume={() => smartScroll.resume()} /> </div> + <ContextSizeBadge contextSize={store.activeChat.currentContextSize} /> <Composer onSend={handleSend} /> </div> diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts index 3eec93d..7c143d7 100644 --- a/src/core/metrics/format.test.ts +++ b/src/core/metrics/format.test.ts @@ -4,6 +4,7 @@ import { computeCachePct, computeExpectedCachePct, computeTps, + formatContextSize, viewCacheRate, viewExpectedCache, viewStepMetrics, @@ -308,3 +309,17 @@ describe("viewExpectedCache", () => { expect(v?.isHit).toBe(true); }); }); + +describe("formatContextSize", () => { + it("formats a defined count with thousands separators", () => { + expect(formatContextSize(34102)).toBe("34,102 tokens in context"); + }); + + it("renders a placeholder for undefined (never 0)", () => { + expect(formatContextSize(undefined)).toBe("context size unknown"); + }); + + it("renders an explicit 0 as zero tokens (a real reported value)", () => { + expect(formatContextSize(0)).toBe("0 tokens in context"); + }); +}); diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts index ee8db60..d8dd2cc 100644 --- a/src/core/metrics/format.ts +++ b/src/core/metrics/format.ts @@ -17,6 +17,17 @@ function formatTps(tps: number | null): string | null { return `${Math.round(tps)} tok/s`; } +/** + * Format the current context size for display. A defined count renders as + * `"<n> tokens in context"` (thousands-separated); `undefined` ("unknown" — no + * per-step usage reported yet) renders the placeholder `"context size unknown"`. + * Never renders `0` for the unknown case. + */ +export function formatContextSize(n: number | undefined): string { + if (n === undefined) return "context size unknown"; + return `${formatTokens(n)} tokens in context`; +} + /** Compute tokens-per-second. Returns null when elapsed time is absent or zero. */ export function computeTps(outputTokens: number, elapsedMs: number | undefined): number | null { if (elapsedMs === undefined || elapsedMs <= 0) return null; diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts index 8822159..773d697 100644 --- a/src/core/metrics/index.ts +++ b/src/core/metrics/index.ts @@ -2,6 +2,7 @@ export { computeCachePct, computeExpectedCachePct, computeTps, + formatContextSize, viewCacheRate, viewExpectedCache, viewStepMetrics, @@ -12,6 +13,7 @@ export { applyDurableMetrics, foldMetricsEvent, initialMetricsState, + selectCurrentContextSize, selectOrderedTurnMetrics, } from "./reducer"; export type { diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts index 16c88b3..cd9f673 100644 --- a/src/core/metrics/reducer.test.ts +++ b/src/core/metrics/reducer.test.ts @@ -4,6 +4,7 @@ import { applyDurableMetrics, foldMetricsEvent, initialMetricsState, + selectCurrentContextSize, selectOrderedTurnMetrics, } from "./reducer"; @@ -39,7 +40,11 @@ const stepCompleteEvent = ( const doneEvent = ( turnId: string, - extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {}, + extra: { + durationMs?: number; + usage?: { inputTokens: number; outputTokens: number }; + contextSize?: number; + } = {}, ): TurnDoneEvent => ({ type: "done", conversationId: "c1", @@ -366,3 +371,72 @@ describe("applyDurableMetrics", () => { expect(s.durable.get("t1")?.usage.inputTokens).toBe(99); }); }); + +describe("contextSize / selectCurrentContextSize", () => { + it("live done carries contextSize onto the turn total", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 1234 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.contextSize).toBe(1234); + expect(selectCurrentContextSize(s)).toBe(1234); + }); + + it("contextSize is NOT the aggregate usage sum (multi-step turn)", () => { + let s = initialMetricsState(); + // Two steps: usage sums to 300 in / 130 out = 430, but contextSize is the + // backend-stamped final-step occupancy, independent of the sum. + s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1")); + s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2")); + s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2")); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 250 })); + + const ordered = selectOrderedTurnMetrics(s); + expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 }); + expect(ordered[0]?.total?.contextSize).toBe(250); + expect(selectCurrentContextSize(s)).toBe(250); + }); + + it("persisted (durable) contextSize is preserved and selected", () => { + let s = initialMetricsState(); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [], contextSize: 4096 }, + ]); + expect(s.durable.get("t1")?.contextSize).toBe(4096); + expect(selectCurrentContextSize(s)).toBe(4096); + }); + + it("selectCurrentContextSize returns the LATEST turn's value", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 100 })); + s = foldMetricsEvent(s, doneEvent("t2", { contextSize: 900 })); + expect(selectCurrentContextSize(s)).toBe(900); + }); + + it("selectCurrentContextSize skips a later turn that lacks contextSize", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 700 })); + // t2 finishes but the provider reported no per-step usage → no contextSize. + s = foldMetricsEvent(s, doneEvent("t2")); + expect(selectCurrentContextSize(s)).toBe(700); + }); + + it("selectCurrentContextSize is undefined (not 0) when nothing reported", () => { + let s = initialMetricsState(); + expect(selectCurrentContextSize(s)).toBeUndefined(); + s = foldMetricsEvent(s, doneEvent("t1")); + expect(selectCurrentContextSize(s)).toBeUndefined(); + }); + + it("durable contextSize wins over live for a shared turnId", () => { + let s = initialMetricsState(); + s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 111 })); + s = applyDurableMetrics(s, [ + { turnId: "t1", usage: { inputTokens: 1, outputTokens: 1 }, steps: [], contextSize: 222 }, + ]); + expect(selectCurrentContextSize(s)).toBe(222); + }); +}); diff --git a/src/core/metrics/reducer.ts b/src/core/metrics/reducer.ts index d36dba1..1e66cc8 100644 --- a/src/core/metrics/reducer.ts +++ b/src/core/metrics/reducer.ts @@ -62,6 +62,9 @@ function liveTurnToMetrics(lt: LiveTurn): TurnMetrics { if (lt.durationMs !== undefined) { (base as { durationMs?: number }).durationMs = lt.durationMs; } + if (lt.doneContextSize !== undefined) { + (base as { contextSize?: number }).contextSize = lt.doneContextSize; + } return base; } @@ -74,6 +77,7 @@ function ensureLiveTurn(state: MetricsState, turnId: string): [MetricsState, Liv done: false, durationMs: undefined, doneUsage: undefined, + doneContextSize: undefined, stepMap: new Map(), stepOrder: [], }; @@ -127,7 +131,7 @@ export function initialMetricsState(): MetricsState { * - `usage` with `stepId`: upsert that step's usage. * - `usage` without `stepId`: ignored. * - `step-complete`: upsert that step's timing; default usage to zeros if absent. - * - `done`: set turn's `durationMs` and optional aggregate `usage`. + * - `done`: set turn's `durationMs`, optional aggregate `usage`, and optional `contextSize`. * - All other event types: return state unchanged. */ export function foldMetricsEvent(state: MetricsState, event: AgentEvent): MetricsState { @@ -161,6 +165,7 @@ export function foldMetricsEvent(state: MetricsState, event: AgentEvent): Metric done: true, durationMs: event.durationMs ?? lt.durationMs, doneUsage: event.usage ?? lt.doneUsage, + doneContextSize: event.contextSize ?? lt.doneContextSize, }; const newLive = new Map(s1.live); newLive.set(event.turnId, updated); @@ -237,3 +242,22 @@ export function selectOrderedTurnMetrics(state: MetricsState): readonly TurnMetr return result; } + +/** + * Select the conversation's CURRENT context size — the tokens it occupies right + * now. Per the wire contract a client reads the LATEST turn's `contextSize`; we + * scan the merged ordered turns NEWEST → OLDEST and return the first DEFINED + * `contextSize` (a finalized turn whose provider reported per-step usage). + * + * Returns `undefined` ("unknown") when no finalized turn carries a context size — + * the caller renders a placeholder, NEVER `0`. Durable (sealed) data wins over + * live for a shared `turnId` (it is the persisted, authoritative value). + */ +export function selectCurrentContextSize(state: MetricsState): number | undefined { + const ordered = selectOrderedTurnMetrics(state); + for (let i = ordered.length - 1; i >= 0; i--) { + const total = ordered[i]?.total; + if (total?.contextSize !== undefined) return total.contextSize; + } + return undefined; +} diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts index f5557f7..c22fd9f 100644 --- a/src/core/metrics/types.ts +++ b/src/core/metrics/types.ts @@ -19,6 +19,12 @@ export interface LiveTurn { readonly done: boolean; readonly durationMs: number | undefined; readonly doneUsage: Usage | undefined; + /** + * Context size carried on the turn's `done` event (the turn's FINAL step + * `inputTokens + outputTokens` — current context occupancy). `undefined` when + * the provider reported no per-step usage; never coerced to `0`. + */ + readonly doneContextSize: number | undefined; readonly stepMap: ReadonlyMap<string, BuildingStep>; readonly stepOrder: readonly string[]; } diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts index 18ed693..adfb670 100644 --- a/src/features/chat/index.ts +++ b/src/features/chat/index.ts @@ -6,6 +6,7 @@ export type { ChatStore, ChatStoreDependencies } from "./store.svelte"; export { createChatStore } from "./store.svelte"; export { default as ChatView } from "./ui/ChatView.svelte"; export { default as Composer } from "./ui/Composer.svelte"; +export { default as ContextSizeBadge } from "./ui/ContextSizeBadge.svelte"; export { default as ModelSelector } from "./ui/ModelSelector.svelte"; /** Public module manifest — aggregated by the shell's "Loaded Modules" view. */ diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts index f4ad07b..6344aec 100644 --- a/src/features/chat/store.svelte.ts +++ b/src/features/chat/store.svelte.ts @@ -18,6 +18,7 @@ import { applyDurableMetrics, foldMetricsEvent, initialMetricsState, + selectCurrentContextSize, selectOrderedTurnMetrics, } from "../../core/metrics"; import type { ConversationCache } from "../conversation-cache"; @@ -36,6 +37,12 @@ export interface ChatStore { readonly messages: readonly ChatMessage[]; readonly chunks: readonly RenderedChunk[]; readonly turnMetrics: readonly TurnMetricsEntry[]; + /** + * The conversation's current context size (tokens occupied) — the latest + * finalized turn's `contextSize`, or `undefined` ("unknown") when none is + * known yet. Never `0` for the unknown case. + */ + readonly currentContextSize: number | undefined; readonly pendingSync: boolean; readonly error: string | null; readonly model: string | undefined; @@ -91,6 +98,9 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { get turnMetrics(): readonly TurnMetricsEntry[] { return selectOrderedTurnMetrics(metrics); }, + get currentContextSize(): number | undefined { + return selectCurrentContextSize(metrics); + }, get pendingSync(): boolean { return _pendingSync; }, diff --git a/src/features/chat/ui/ContextSizeBadge.svelte b/src/features/chat/ui/ContextSizeBadge.svelte new file mode 100644 index 0000000..475d54f --- /dev/null +++ b/src/features/chat/ui/ContextSizeBadge.svelte @@ -0,0 +1,20 @@ +<script lang="ts"> + import { formatContextSize } from "../../../core/metrics"; + + let { + contextSize, + }: { + // The conversation's current context size (tokens occupied), or `undefined` + // ("unknown") when no finalized turn has reported one yet. Never `0` for the + // unknown case — `formatContextSize` renders a placeholder instead. + contextSize: number | undefined; + } = $props(); + + const label = $derived(formatContextSize(contextSize)); +</script> + +<div class="px-4 pb-1 text-xs opacity-60" aria-live="polite"> + <span title="The model's max context window is not reported yet — current usage only."> + {label} + </span> +</div> |
