summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.dispatch/transport-contract.reference.md13
-rw-r--r--.dispatch/wire.reference.md36
-rw-r--r--GLOSSARY.md2
-rw-r--r--backend-handoff.md29
-rw-r--r--src/app/App.svelte9
-rw-r--r--src/core/metrics/format.test.ts15
-rw-r--r--src/core/metrics/format.ts11
-rw-r--r--src/core/metrics/index.ts2
-rw-r--r--src/core/metrics/reducer.test.ts76
-rw-r--r--src/core/metrics/reducer.ts26
-rw-r--r--src/core/metrics/types.ts6
-rw-r--r--src/features/chat/index.ts1
-rw-r--r--src/features/chat/store.svelte.ts10
-rw-r--r--src/features/chat/ui/ContextSizeBadge.svelte20
14 files changed, 241 insertions, 15 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index c2e2076..40ced1e 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -5,9 +5,16 @@
> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
> this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics
-> endpoint shipped + version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see
-> `wire.reference.md`) + `@dispatch/ui-contract` (see `ui-contract.reference.md`).
+> **Orchestrator:** SNAPSHOT of `[email protected]` (the metrics endpoint shipped +
+> version-bumped + LIVE-VERIFIED). Depends on `@dispatch/[email protected]` (see `wire.reference.md`) +
+> `@dispatch/[email protected]` (see `ui-contract.reference.md`).
+>
+> **2026-06-12 delta (context-size handoff — package bumped `0.5.0` → `0.6.0`, depends on
+> `[email protected]`):** no NEW transport shape — the optional `contextSize?: number` rides the
+> re-exported `TurnMetrics` (so `ConversationMetricsResponse.turns[].contextSize`) and, live, the
+> `TurnDoneEvent.contextSize` on the `done` AgentEvent (`chat.delta` WS / `/chat` NDJSON). On
+> (re)hydrate take the LAST `turns[]` element with a defined `contextSize`; live, update on `done`.
+> See the `wire.reference.md` context-size delta for the definition.
>
> **2026-06 delta (cache-warming handoff, additive — package still `0.4.0`):** adds
> `POST /chat/warm` (`WarmRequest` → `WarmResponse`) for an on-demand prompt-cache warm, and the
diff --git a/.dispatch/wire.reference.md b/.dispatch/wire.reference.md
index ee5488c..cf1410a 100644
--- a/.dispatch/wire.reference.md
+++ b/.dispatch/wire.reference.md
@@ -4,8 +4,15 @@
> types WITHOUT following the `file:` dep symlink out of this repo (which hangs on a permission
> prompt). Your CODE still imports `@dispatch/wire` normally — this file is for READING only.
>
-> **Orchestrator:** SNAPSHOT of `[email protected]` (committed, backend `6db12ff`; the metrics types below
-> shipped + version-bumped). Regenerate whenever `@dispatch/wire` changes.
+> **Orchestrator:** SNAPSHOT of `[email protected]` (the metrics types below shipped + version-bumped).
+> Regenerate whenever `@dispatch/wire` changes.
+>
+> **2026-06-12 delta (context-size handoff — package bumped `0.4.0` → `0.5.0`):** adds an OPTIONAL
+> `contextSize?: number` to BOTH `TurnDoneEvent` (live `done`) and `TurnMetrics` (persisted) — the
+> turn's FINAL step `inputTokens + outputTokens` (current context occupancy), NOT the aggregate
+> `usage` (which overcounts multi-step turns). The two carriers are equal for the same turn. Current
+> value = the LATEST turn's `contextSize`; `undefined` ⇒ render "unknown", never `0`. See the field
+> doc-comments on `TurnMetrics`/`TurnDoneEvent` below.
>
> **0.3.0 changes (token + timing metrics):**
> - **Live per-step/per-turn telemetry on the event stream** (transient — NOT persisted):
@@ -221,6 +228,16 @@ export interface TurnMetrics {
readonly durationMs?: number;
/** Per-step metrics in step order. */
readonly steps: readonly StepMetrics[];
+ /**
+ * **Context size** — tokens the conversation occupies as of this turn: the
+ * turn's FINAL step `inputTokens + outputTokens` (the last entry of `steps`),
+ * NOT the aggregate `usage` (which sums per-step prompts and overcounts a
+ * multi-step turn). The persisted, replayable counterpart of
+ * `TurnDoneEvent.contextSize` and equal to it for the same turn. A client
+ * reopening a past conversation reads the LAST turn's `contextSize` as the
+ * current context usage. Optional: absent when no per-step usage was available.
+ */
+ readonly contextSize?: number;
}
// ─── Outward events ─────────────────────────────────────────────────────────
@@ -393,6 +410,21 @@ export interface TurnDoneEvent {
* provider reported no usage).
*/
readonly usage?: Usage;
+ /**
+ * **Context size** — tokens the conversation occupies right now: the turn's
+ * FINAL step `inputTokens + outputTokens` (the prompt sent into the last LLM
+ * round-trip plus that round-trip's output). This is the "tokens in context"
+ * figure a client renders as the chat's current context usage, and a client
+ * treats the LATEST turn's value as the live total.
+ *
+ * Deliberately NOT the aggregate `usage` above: `usage` SUMS each step's
+ * `inputTokens`, which overcounts a multi-step / tool-calling turn because every
+ * step re-prefills the growing prompt — the final step's input already includes
+ * all prior context, so its input+output is the true occupancy. Optional: absent
+ * when no per-step usage was observed this turn (mirrors `usage`). A later field
+ * will carry the model's max context-window LIMIT; this is only the current size.
+ */
+ readonly contextSize?: number;
}
/**
diff --git a/GLOSSARY.md b/GLOSSARY.md
index 538ba7e..d632c8d 100644
--- a/GLOSSARY.md
+++ b/GLOSSARY.md
@@ -19,6 +19,8 @@
| **step metrics** | The durable per-step metrics within a `TurnMetrics`: the step's `Usage` (tokens) + `ttftMs`/`decodeMs`/`genTotalMs` timing, keyed by `stepId` (`StepMetrics`). The persisted counterpart of the live `usage` + `step-complete` events. | step stats |
| **TTFT** (time to first token) | Per-step latency: generation stream start → first content token (text or reasoning). One per step (each step re-prefills). On the wire as `step-complete.ttftMs` / `StepMetrics.ttftMs` (optional). | time-to-first-byte |
| **decode time** | Per-step generation time after the first token (first token → stream end = `genTotalMs − ttftMs`). On the wire as `step-complete.decodeMs` / `StepMetrics.decodeMs` (optional). | — |
+| **context size** | The tokens a conversation currently occupies: the most recent turn's FINAL step `inputTokens + outputTokens` (NOT the aggregate per-turn `usage`, which sums per-step prompts and overcounts a multi-step turn). On the wire as `TurnDoneEvent.contextSize` (live `done`) + `TurnMetrics.contextSize` (persisted); the FE reads the LATEST turn's value as current usage, and treats `undefined` as "unknown" (renders a placeholder, never `0`). Mirrors the backend GLOSSARY. | context usage, context length, tokens used (and do NOT call it "context window" — that's the limit) |
+| **context window** | The model's MAXIMUM token capacity (the limit a **context size** is measured against). A FUTURE backend field — not on the wire yet; the FE shows context size alone (no `size / limit` denominator) until it ships. | max context, token limit (distinct from **context size**, the current usage) |
## Frontend-specific
| Term | Meaning | Aliases to avoid |
diff --git a/backend-handoff.md b/backend-handoff.md
index 99c2964..e9b128a 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,24 +5,33 @@
> **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
> `lsp` does NOT span the repos (ORCHESTRATOR §5) — every cross-repo ask flows through here.
-_Last updated: 2026-06-11. **FE is current on `[email protected]`.** All handoffs to date are
-consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming (incl.
-authoritative timer + retention + cache-rate fix), and **per-conversation cwd + LSP status** (new
-`workspace` feature — cwd field in the Model view + a "Language Servers" view; works for drafts too).
+_Last updated: 2026-06-12. **FE is current on `[email protected]` / `[email protected]`.** All handoffs
+to date are consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector,
+cache-warming (incl. authoritative timer + retention + cache-rate fix), **per-conversation cwd + LSP
+status**, and **context size** (the `contextSize` field — `done` live + `TurnMetrics` persisted —
+rendered as a current-usage readout above the composer).
**Open asks:** CR-1 (Loaded Extensions as a real table) + CR-2 (optional catalog `scope` flag) below.
The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed** by the
backend (answers in their `frontend-lsp-cwd-handoff.md`) — see §2._
+**Context-size handoff (`frontend-context-size-handoff.md`) → CONSUMED ✅.** Re-pinned `[email protected]→0.5.0`
++ `[email protected]→0.6.0`; re-mirrored both `.dispatch/*.reference.md`; added "context size" +
+"context window" to FE `GLOSSARY.md`. `core/metrics` now threads `contextSize` through the `done` fold +
+durable metrics and exposes `selectCurrentContextSize` (LATEST turn's defined value, `undefined`⇒unknown,
+never `0`, durable-wins-over-live); the chat store exposes `currentContextSize`; `ContextSizeBadge`
+renders "N tokens in context" / "context size unknown" above the composer. 533 tests green. NO new
+backend ask — but the max-limit denominator is now a live FE need; see §3.
+
---
## 1. Pinned backend contracts (consumed by the FE)
-Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
+Pinned as `file:` deps: **`[email protected]`; `[email protected]`; `[email protected]`**.
| Package | Used for |
|---|---|
| `@dispatch/ui-contract` | surfaces + surface WS protocol |
-| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs` |
+| `@dispatch/wire` | `Chunk`/`StoredChunk`(+`seq`)/`ChatMessage`/`AgentEvent`/`TurnSealedEvent`/`Usage`/`StepId` + metrics: `StepMetrics`/`TurnMetrics`, `usage.stepId`, `step-complete`, `done.durationMs`/`done.usage`, `tool-result.durationMs`, **`done.contextSize`/`TurnMetrics.contextSize`** |
| `@dispatch/transport-contract` | `ChatRequest`/`ModelsResponse`/`ConversationHistoryResponse`/`ConversationMetricsResponse` + `WarmRequest`/`WarmResponse` + `CwdResponse`/`SetCwdRequest` + LSP (`LspStatusResponse`/`LspServerInfo`/`LspServerState`) + WS chat ops + `WsClientMessage`/`WsServerMessage` |
Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`):
@@ -31,7 +40,7 @@ Endpoints in use (HTTP **24203**, WS **24205**, CORS `*` incl. `PUT`):
`GET /conversations/:id/lsp` · `POST /chat/warm` · WS `chat.send`→`chat.delta`.
Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-contract}.reference.md`
-(regenerate on any contract bump; all current as of `[email protected]`).
+(regenerate on any contract bump; all current as of `[email protected]` / `[email protected]`).
## 2. Open asks FOR THE BACKEND
@@ -101,6 +110,12 @@ harden `/chat` to treat blank as "not provided" if we ever want it — not neede
## 3. Likely NEXT backend asks (heads-up, not yet requested)
+- **Model max context-window LIMIT** (the denominator for context size) — the context-size handoff
+ flagged this as the separate, later field. The FE now shows current size alone (e.g. "34,102 tokens
+ in context"); once a per-model/per-turn `contextWindow` (max token capacity) ships, the FE can render
+ `contextSize / limit` (e.g. "34,102 / 200,000") + a usage bar. GLOSSARY term reserved: "context window"
+ = the limit (distinct from "context size" = current usage). **Likely the next ask** — raise when the
+ backend can source the model's advertised window.
- `GET /conversations` — conversation list / sidebar (history explorer / switcher); could also expose a
per-conversation "last model" so a reopened tab seeds its model from the server instead of localStorage.
- `POST /conversations/:id/cancel` — "stop generating".
diff --git a/src/app/App.svelte b/src/app/App.svelte
index daab953..32db54f 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -6,7 +6,13 @@
manifest as cacheWarmingManifest,
type WarmFeedback,
} from "../features/cache-warming";
- import { ChatView, Composer, manifest as chatManifest, ModelSelector } from "../features/chat";
+ import {
+ ChatView,
+ Composer,
+ manifest as chatManifest,
+ ContextSizeBadge,
+ ModelSelector,
+ } from "../features/chat";
import { manifest as conversationCacheManifest } from "../features/conversation-cache";
import { manifest as markdownManifest } from "../features/markdown";
import {
@@ -211,6 +217,7 @@
<ScrollToBottom show={smartScroll.showButton} onResume={() => smartScroll.resume()} />
</div>
+ <ContextSizeBadge contextSize={store.activeChat.currentContextSize} />
<Composer onSend={handleSend} />
</div>
diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts
index 3eec93d..7c143d7 100644
--- a/src/core/metrics/format.test.ts
+++ b/src/core/metrics/format.test.ts
@@ -4,6 +4,7 @@ import {
computeCachePct,
computeExpectedCachePct,
computeTps,
+ formatContextSize,
viewCacheRate,
viewExpectedCache,
viewStepMetrics,
@@ -308,3 +309,17 @@ describe("viewExpectedCache", () => {
expect(v?.isHit).toBe(true);
});
});
+
+describe("formatContextSize", () => {
+ it("formats a defined count with thousands separators", () => {
+ expect(formatContextSize(34102)).toBe("34,102 tokens in context");
+ });
+
+ it("renders a placeholder for undefined (never 0)", () => {
+ expect(formatContextSize(undefined)).toBe("context size unknown");
+ });
+
+ it("renders an explicit 0 as zero tokens (a real reported value)", () => {
+ expect(formatContextSize(0)).toBe("0 tokens in context");
+ });
+});
diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts
index ee8db60..d8dd2cc 100644
--- a/src/core/metrics/format.ts
+++ b/src/core/metrics/format.ts
@@ -17,6 +17,17 @@ function formatTps(tps: number | null): string | null {
return `${Math.round(tps)} tok/s`;
}
+/**
+ * Format the current context size for display. A defined count renders as
+ * `"<n> tokens in context"` (thousands-separated); `undefined` ("unknown" — no
+ * per-step usage reported yet) renders the placeholder `"context size unknown"`.
+ * Never renders `0` for the unknown case.
+ */
+export function formatContextSize(n: number | undefined): string {
+ if (n === undefined) return "context size unknown";
+ return `${formatTokens(n)} tokens in context`;
+}
+
/** Compute tokens-per-second. Returns null when elapsed time is absent or zero. */
export function computeTps(outputTokens: number, elapsedMs: number | undefined): number | null {
if (elapsedMs === undefined || elapsedMs <= 0) return null;
diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts
index 8822159..773d697 100644
--- a/src/core/metrics/index.ts
+++ b/src/core/metrics/index.ts
@@ -2,6 +2,7 @@ export {
computeCachePct,
computeExpectedCachePct,
computeTps,
+ formatContextSize,
viewCacheRate,
viewExpectedCache,
viewStepMetrics,
@@ -12,6 +13,7 @@ export {
applyDurableMetrics,
foldMetricsEvent,
initialMetricsState,
+ selectCurrentContextSize,
selectOrderedTurnMetrics,
} from "./reducer";
export type {
diff --git a/src/core/metrics/reducer.test.ts b/src/core/metrics/reducer.test.ts
index 16c88b3..cd9f673 100644
--- a/src/core/metrics/reducer.test.ts
+++ b/src/core/metrics/reducer.test.ts
@@ -4,6 +4,7 @@ import {
applyDurableMetrics,
foldMetricsEvent,
initialMetricsState,
+ selectCurrentContextSize,
selectOrderedTurnMetrics,
} from "./reducer";
@@ -39,7 +40,11 @@ const stepCompleteEvent = (
const doneEvent = (
turnId: string,
- extra: { durationMs?: number; usage?: { inputTokens: number; outputTokens: number } } = {},
+ extra: {
+ durationMs?: number;
+ usage?: { inputTokens: number; outputTokens: number };
+ contextSize?: number;
+ } = {},
): TurnDoneEvent => ({
type: "done",
conversationId: "c1",
@@ -366,3 +371,72 @@ describe("applyDurableMetrics", () => {
expect(s.durable.get("t1")?.usage.inputTokens).toBe(99);
});
});
+
+describe("contextSize / selectCurrentContextSize", () => {
+ it("live done carries contextSize onto the turn total", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 1234 }));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.contextSize).toBe(1234);
+ expect(selectCurrentContextSize(s)).toBe(1234);
+ });
+
+ it("contextSize is NOT the aggregate usage sum (multi-step turn)", () => {
+ let s = initialMetricsState();
+ // Two steps: usage sums to 300 in / 130 out = 430, but contextSize is the
+ // backend-stamped final-step occupancy, independent of the sum.
+ s = foldMetricsEvent(s, usageEvent("t1", 100, 50, "s1"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s1"));
+ s = foldMetricsEvent(s, usageEvent("t1", 200, 80, "s2"));
+ s = foldMetricsEvent(s, stepCompleteEvent("t1", "s2"));
+ s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 250 }));
+
+ const ordered = selectOrderedTurnMetrics(s);
+ expect(ordered[0]?.total?.usage).toEqual({ inputTokens: 300, outputTokens: 130 });
+ expect(ordered[0]?.total?.contextSize).toBe(250);
+ expect(selectCurrentContextSize(s)).toBe(250);
+ });
+
+ it("persisted (durable) contextSize is preserved and selected", () => {
+ let s = initialMetricsState();
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 10, outputTokens: 5 }, steps: [], contextSize: 4096 },
+ ]);
+ expect(s.durable.get("t1")?.contextSize).toBe(4096);
+ expect(selectCurrentContextSize(s)).toBe(4096);
+ });
+
+ it("selectCurrentContextSize returns the LATEST turn's value", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 100 }));
+ s = foldMetricsEvent(s, doneEvent("t2", { contextSize: 900 }));
+ expect(selectCurrentContextSize(s)).toBe(900);
+ });
+
+ it("selectCurrentContextSize skips a later turn that lacks contextSize", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 700 }));
+ // t2 finishes but the provider reported no per-step usage → no contextSize.
+ s = foldMetricsEvent(s, doneEvent("t2"));
+ expect(selectCurrentContextSize(s)).toBe(700);
+ });
+
+ it("selectCurrentContextSize is undefined (not 0) when nothing reported", () => {
+ let s = initialMetricsState();
+ expect(selectCurrentContextSize(s)).toBeUndefined();
+ s = foldMetricsEvent(s, doneEvent("t1"));
+ expect(selectCurrentContextSize(s)).toBeUndefined();
+ });
+
+ it("durable contextSize wins over live for a shared turnId", () => {
+ let s = initialMetricsState();
+ s = foldMetricsEvent(s, doneEvent("t1", { contextSize: 111 }));
+ s = applyDurableMetrics(s, [
+ { turnId: "t1", usage: { inputTokens: 1, outputTokens: 1 }, steps: [], contextSize: 222 },
+ ]);
+ expect(selectCurrentContextSize(s)).toBe(222);
+ });
+});
diff --git a/src/core/metrics/reducer.ts b/src/core/metrics/reducer.ts
index d36dba1..1e66cc8 100644
--- a/src/core/metrics/reducer.ts
+++ b/src/core/metrics/reducer.ts
@@ -62,6 +62,9 @@ function liveTurnToMetrics(lt: LiveTurn): TurnMetrics {
if (lt.durationMs !== undefined) {
(base as { durationMs?: number }).durationMs = lt.durationMs;
}
+ if (lt.doneContextSize !== undefined) {
+ (base as { contextSize?: number }).contextSize = lt.doneContextSize;
+ }
return base;
}
@@ -74,6 +77,7 @@ function ensureLiveTurn(state: MetricsState, turnId: string): [MetricsState, Liv
done: false,
durationMs: undefined,
doneUsage: undefined,
+ doneContextSize: undefined,
stepMap: new Map(),
stepOrder: [],
};
@@ -127,7 +131,7 @@ export function initialMetricsState(): MetricsState {
* - `usage` with `stepId`: upsert that step's usage.
* - `usage` without `stepId`: ignored.
* - `step-complete`: upsert that step's timing; default usage to zeros if absent.
- * - `done`: set turn's `durationMs` and optional aggregate `usage`.
+ * - `done`: set turn's `durationMs`, optional aggregate `usage`, and optional `contextSize`.
* - All other event types: return state unchanged.
*/
export function foldMetricsEvent(state: MetricsState, event: AgentEvent): MetricsState {
@@ -161,6 +165,7 @@ export function foldMetricsEvent(state: MetricsState, event: AgentEvent): Metric
done: true,
durationMs: event.durationMs ?? lt.durationMs,
doneUsage: event.usage ?? lt.doneUsage,
+ doneContextSize: event.contextSize ?? lt.doneContextSize,
};
const newLive = new Map(s1.live);
newLive.set(event.turnId, updated);
@@ -237,3 +242,22 @@ export function selectOrderedTurnMetrics(state: MetricsState): readonly TurnMetr
return result;
}
+
+/**
+ * Select the conversation's CURRENT context size — the tokens it occupies right
+ * now. Per the wire contract a client reads the LATEST turn's `contextSize`; we
+ * scan the merged ordered turns NEWEST → OLDEST and return the first DEFINED
+ * `contextSize` (a finalized turn whose provider reported per-step usage).
+ *
+ * Returns `undefined` ("unknown") when no finalized turn carries a context size —
+ * the caller renders a placeholder, NEVER `0`. Durable (sealed) data wins over
+ * live for a shared `turnId` (it is the persisted, authoritative value).
+ */
+export function selectCurrentContextSize(state: MetricsState): number | undefined {
+ const ordered = selectOrderedTurnMetrics(state);
+ for (let i = ordered.length - 1; i >= 0; i--) {
+ const total = ordered[i]?.total;
+ if (total?.contextSize !== undefined) return total.contextSize;
+ }
+ return undefined;
+}
diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts
index f5557f7..c22fd9f 100644
--- a/src/core/metrics/types.ts
+++ b/src/core/metrics/types.ts
@@ -19,6 +19,12 @@ export interface LiveTurn {
readonly done: boolean;
readonly durationMs: number | undefined;
readonly doneUsage: Usage | undefined;
+ /**
+ * Context size carried on the turn's `done` event (the turn's FINAL step
+ * `inputTokens + outputTokens` — current context occupancy). `undefined` when
+ * the provider reported no per-step usage; never coerced to `0`.
+ */
+ readonly doneContextSize: number | undefined;
readonly stepMap: ReadonlyMap<string, BuildingStep>;
readonly stepOrder: readonly string[];
}
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 18ed693..adfb670 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -6,6 +6,7 @@ export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
export { createChatStore } from "./store.svelte";
export { default as ChatView } from "./ui/ChatView.svelte";
export { default as Composer } from "./ui/Composer.svelte";
+export { default as ContextSizeBadge } from "./ui/ContextSizeBadge.svelte";
export { default as ModelSelector } from "./ui/ModelSelector.svelte";
/** Public module manifest — aggregated by the shell's "Loaded Modules" view. */
diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts
index f4ad07b..6344aec 100644
--- a/src/features/chat/store.svelte.ts
+++ b/src/features/chat/store.svelte.ts
@@ -18,6 +18,7 @@ import {
applyDurableMetrics,
foldMetricsEvent,
initialMetricsState,
+ selectCurrentContextSize,
selectOrderedTurnMetrics,
} from "../../core/metrics";
import type { ConversationCache } from "../conversation-cache";
@@ -36,6 +37,12 @@ export interface ChatStore {
readonly messages: readonly ChatMessage[];
readonly chunks: readonly RenderedChunk[];
readonly turnMetrics: readonly TurnMetricsEntry[];
+ /**
+ * The conversation's current context size (tokens occupied) — the latest
+ * finalized turn's `contextSize`, or `undefined` ("unknown") when none is
+ * known yet. Never `0` for the unknown case.
+ */
+ readonly currentContextSize: number | undefined;
readonly pendingSync: boolean;
readonly error: string | null;
readonly model: string | undefined;
@@ -91,6 +98,9 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
get turnMetrics(): readonly TurnMetricsEntry[] {
return selectOrderedTurnMetrics(metrics);
},
+ get currentContextSize(): number | undefined {
+ return selectCurrentContextSize(metrics);
+ },
get pendingSync(): boolean {
return _pendingSync;
},
diff --git a/src/features/chat/ui/ContextSizeBadge.svelte b/src/features/chat/ui/ContextSizeBadge.svelte
new file mode 100644
index 0000000..475d54f
--- /dev/null
+++ b/src/features/chat/ui/ContextSizeBadge.svelte
@@ -0,0 +1,20 @@
+<script lang="ts">
+ import { formatContextSize } from "../../../core/metrics";
+
+ let {
+ contextSize,
+ }: {
+ // The conversation's current context size (tokens occupied), or `undefined`
+ // ("unknown") when no finalized turn has reported one yet. Never `0` for the
+ // unknown case — `formatContextSize` renders a placeholder instead.
+ contextSize: number | undefined;
+ } = $props();
+
+ const label = $derived(formatContextSize(contextSize));
+</script>
+
+<div class="px-4 pb-1 text-xs opacity-60" aria-live="polite">
+ <span title="The model's max context window is not reported yet — current usage only.">
+ {label}
+ </span>
+</div>