summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.dispatch/transport-contract.reference.md266
-rw-r--r--backend-handoff.md17
-rw-r--r--src/app/App.svelte19
-rw-r--r--src/app/store.svelte.ts77
-rw-r--r--src/features/chat/index.ts2
-rw-r--r--src/features/chat/ui/CompactionView.svelte101
-rw-r--r--src/features/chat/ui/Composer.svelte10
7 files changed, 156 insertions, 336 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index 608211d..02b48a0 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -1,228 +1,22 @@
-# `@dispatch/transport-contract` — in-repo reference (read THIS, not node_modules)
-
-> MIRRORS the backend's `@dispatch/transport-contract` package source so headless FE agents can read
-> the HTTP + WebSocket wire shapes WITHOUT following the `file:` dep symlink out of this repo (which
-> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
-> this file is for READING only.
->
-> **Orchestrator:** SNAPSHOT of `[email protected]` (compaction).
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see
-> `ui-contract.reference.md`).
->
-> **2026-06-22 delta (compaction handoff — package bumped `0.14.0` → `0.15.0`, ADDITIVE):**
-> adds conversation compaction — summarize old history + retain recent N messages. Manual:
-> `POST /conversations/:id/compact` (optional `{ keepLastN, modelName }`) → `CompactResponse`.
-> Automatic: after each turn settles, if the last turn's input tokens exceeded the per-conversation
-> `compactThreshold`, compaction runs automatically. `GET`/`PUT /conversations/:id/compact-threshold`
-> (`CompactThresholdResponse`/`SetCompactThresholdRequest`) — `threshold: 0` = disabled; default
-> 350000 when not stored. Re-exports `CompactionResult` from `[email protected]`.
->
-> **2026-06-22 delta (conversation lifecycle handoff — package bumped `0.13.0` → `0.14.0`, ADDITIVE):**
-> adds conversation lifecycle **status** (`active`/`idle`/`closed`) for cross-device tab
-> persistence. `ConversationMeta` (re-exported from `[email protected]`) gains a `status` field. New
-> WS message `ConversationStatusChangedMessage` (`{ type: "conversation.statusChanged";
-> conversationId; status }`) is broadcast to ALL clients on every status change. `GET
-> /conversations` gains an optional `?status=active,idle` filter (comma-separated; default = all).
-> `POST /conversations/:id/close` now also sets status to `closed` (persists across restarts).
-> The FE fetches `?status=active,idle` on connect to restore the tab bar across devices.
->
-> **2026-06-21 delta (conversation.open handoff — package bumped `0.12.0` → `0.13.0`, ADDITIVE):**
-> adds the `conversation.open` WS broadcast — when the CLI's `--open` flag fires
-> (`POST /conversations/:id/open`), the backend broadcasts a `ConversationOpenMessage`
-> (`{ type: "conversation.open"; conversationId }`) to ALL connected WS clients. Additive to
-> `WsServerMessage`. The FE handles it by opening/focusing a tab for the `conversationId`. Also
-> adds conversation metadata endpoints (not yet consumed by the FE): `GET /conversations` (list,
-> `ConversationListResponse`/`ConversationMeta`), `GET /conversations/:id/last` (blocking last
-> message, `LastMessageResponse`), `GET`/`PUT /conversations/:id/title` (`TitleResponse`/
-> `SetTitleRequest`), and `POST /conversations/:id/open` (`OpenConversationResponse`). Re-exports
-> `ConversationMeta` from `[email protected]`.
->
-> **2026-06-21 delta (message-queue + steering handoff — package bumped `0.11.0` → `0.12.0`, ADDITIVE):**
-> adds the enqueue surface for the per-conversation message queue (the wire types `QueuedMessage` /
-> `QueuePayload` + the new `steering` `AgentEvent` live in `[email protected]`, re-exported here). Two
-> additive shapes:
-> 1. **WS `chat.queue` op** — `ChatQueueMessage { type: "chat.queue"; conversationId; text }` (a
-> new `WsClientMessage` union member). Fire-and-forget: on success the server emits NOTHING back
-> — the message-queue SURFACE updates (the new message appears in the snapshot). On failure (empty
-> `text`, unknown conversation) the server replies `chat.error`. **Auto-start when idle
-> (server-owned):** if no turn is active, `chat.queue` does NOT queue — it STARTS A NEW TURN with
-> the message as its opening prompt (equivalent to `chat.send`). So a single op works for both
-> "steer during generation" and "send"; the client doesn't pick. `text` must be non-empty after trim.
-> 2. **HTTP `POST /conversations/:id/queue`** — body `QueueRequest { text }` → `QueueResponse
-> { conversationId; startedTurn: boolean; queue: QueuedMessage[] }`. `startedTurn: true` = was
-> idle, a new turn started (the message is the turn's opening prompt, NOT a queued steering
-> message); `startedTurn: false` = a turn was active, the message was queued (the `queue`
-> snapshot includes it). Empty/whitespace `text` → HTTP 400 `{ error }`. The FE uses the WS op.
->
-> The queue is read via a per-conversation SURFACE (`message-queue`, scope `conversation`; one
-> `custom` field, `rendererId: "message-queue"`, `payload: QueuePayload`) — NOT via the chat stream.
-> See the handoff for the full flow (steering event, carry-to-new-turn, move-vs-duplicate).
->
-> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.10.0` → `0.11.0`, ADDITIVE):**
-> the thinking-depth knob (`ReasoningEffort`, re-exported from `[email protected]`) lands in TWO scopes,
-> resolved server-side per turn (per-turn override → persisted conversation value → default
-> `"high"`; do NOT re-implement the chain client-side):
-> 1. **Per-turn override** — optional `reasoningEffort?: ReasoningEffort` on `ChatRequest` (and
-> therefore on WS `chat.send`, which extends it). Applies to THAT turn only; never persists.
-> OMIT the key for "no override" (never send `null`/`""`).
-> 2. **Persisted per-conversation setting** — `GET /conversations/:id/reasoning-effort` →
-> `ReasoningEffortResponse { conversationId, reasoningEffort: ReasoningEffort | null }`
-> (`null` = never set ⇒ the default `"high"` applies, NOT "off") and
-> `PUT /conversations/:id/reasoning-effort` body `SetReasoningEffortRequest
-> { reasoningEffort }`. Takes effect from the NEXT turn.
-> Validation: an unrecognized level → HTTP 400 `{ error }` listing the valid levels (same for the
-> WS path via the standard `chat.send` error reply). Cache note: CHANGING the level changes the
-> provider request shape and can bust the prompt cache for the next turn (one-time re-prefill);
-> a stable setting stays cache-safe (warming uses the same resolved effort).
->
-> **2026-06-12 delta (CR-5 history windowing — package bumped `0.9.0` → `0.10.0`):** NO type-shape
-> change — `GET /conversations/:id` gains two OPTIONAL query params alongside `sinceSeq`:
-> **`limit=<k>`** (the NEWEST `k` chunks of the selection, still ASCENDING; a selection with ≤ `k`
-> chunks is returned whole; omitted = full selection, byte-identical to the old behavior) and
-> **`beforeSeq=<s>`** (exclusive upper bound `seq < s`; combined: `sinceSeq < seq < beforeSeq`).
-> `limit`/`beforeSeq` must be POSITIVE integers (`sinceSeq` may still be 0); malformed/zero/negative
-> → HTTP 400 `{ error }` naming the param. Seq numbering is now a WRITTEN CONTRACT: 1-based,
-> monotonic, gap-free (see `[email protected]` `StoredChunk`), so `hasOlder = oldestLoaded.seq > 1` — there
-> is deliberately NO `earliestSeq`/`hasOlder` field. CAVEAT: on a windowed read, `latestSeq`
-> describes the returned WINDOW; never regress a tail cursor from a `beforeSeq` backfill page.
-> Intended flows: fresh load `?sinceSeq=0&limit=<k>` · tail sync `?sinceSeq=<cursor>` (no limit) ·
-> page older in `?beforeSeq=<oldestLoadedSeq>&limit=<k>`.
->
-> **2026-06-12 delta (CR-4 cache-warming lifecycle — package bumped `0.8.0` → `0.9.0`):** adds
-> `POST /conversations/:id/close` (`CloseConversationResponse`) — the EXPLICIT "user closed this
-> conversation's tab" affordance, distinct from a socket disconnect / `chat.unsubscribe` (which
-> still NEVER touch the turn or the warming schedule). Closing (1) aborts any in-flight turn — the
-> kernel stops at the next event boundary, partial messages are PERSISTED, and the turn SEALS
-> normally with `finishReason: "aborted"` (watchers receive `done` then `turn-sealed`, so a
-> stream-derived "generating" flag clears with no special-casing) — and (2) stops + DISABLES
-> cache-warming for the conversation (persisted OFF; reopening does not resume warming). Idempotent:
-> closing an idle/unknown conversation is `200` with `abortedTurn: false`. Backend behavior fixes
-> riding EXISTING shapes (no other contract change): warming now defaults OFF for a new conversation
-> (240s interval default kept; re-enable restores the persisted interval); post-warm surface updates
-> now carry the FUTURE `nextWarmAt` (notify-before-reschedule fixed); `nextWarmAt: null` is pushed on
-> `turn-start` (nothing scheduled while generating) and when warming is/became disabled. Caveat: the
-> warming opt-in is NOT yet re-hydrated across a backend restart (reads disabled until toggled again).
->
-> **2026-06-12 delta (CR-3 user-message handoff — package bumped `0.7.0` → `0.8.0`):** NO transport
-> shape change — it re-exports `AgentEvent` (which `chat.delta` / `/chat` NDJSON carry), and that union
-> gained the additive `TurnInputEvent` (`{ type: "user-message"; conversationId; turnId; text }`), the
-> turn's user prompt, emitted as the FIRST event of every turn (before `turn-start`) and replayed to
-> watchers/late-joiners. See the `wire.reference.md` CR-3 delta + `TurnInputEvent` for the definition.
->
-> **2026-06-12 delta (turn-continuity handoff — package bumped `0.6.0` → `0.7.0`, ADDITIVE):** a turn
-> is no longer bound to the WS connection — it runs to completion server-side regardless of any
-> client, and any number of connections can watch the same conversation (incl. a late-joiner that
-> connects mid-turn). Two new client→server WS messages: `ChatSubscribeMessage`
-> (`{ type: "chat.subscribe"; conversationId }`) and `ChatUnsubscribeMessage`
-> (`{ type: "chat.unsubscribe"; conversationId }`); `WsClientMessage` now unions both. Server→client
-> is UNCHANGED (turn events still arrive as `chat.delta`, replayed AND live). Semantics: `chat.subscribe`
-> registers the connection + immediately REPLAYS the in-flight turn's events so far (from its
-> `turn-start`) then streams live (nothing replayed if idle); `chat.send` AUTO-subscribes the sending
-> connection (a 2nd send while generating ⇒ `chat.error` + you stay subscribed to watch the running
-> turn); `chat.unsubscribe`/socket-close drops the subscription but NEVER stops the turn; subscriptions
-> persist across turns. FE consumes via the `chat` feature + app store (re-subscribe every open
-> conversation on (re)connect + page load; derive a "running" state structurally from
-> `turn-start`…no-`done`/`turn-sealed`-yet). OUT of scope: per-step crash-resume, concurrent-send
-> arbitration.
->
-> **2026-06-12 delta (context-size handoff — package bumped `0.5.0` → `0.6.0`, depends on
-> `[email protected]`):** no NEW transport shape — the optional `contextSize?: number` rides the
-> re-exported `TurnMetrics` (so `ConversationMetricsResponse.turns[].contextSize`) and, live, the
-> `TurnDoneEvent.contextSize` on the `done` AgentEvent (`chat.delta` WS / `/chat` NDJSON). On
-> (re)hydrate take the LAST `turns[]` element with a defined `contextSize`; live, update on `done`.
-> See the `wire.reference.md` context-size delta for the definition.
->
-> **2026-06 delta (cache-warming handoff, additive — package still `0.4.0`):** adds
-> `POST /chat/warm` (`WarmRequest` → `WarmResponse`) for an on-demand prompt-cache warm, and the
-> throughput axis `GET /metrics/throughput` (`ThroughputResponse`/`ThroughputModelStat`/
-> `ThroughputPeriod`). The warm is NEVER persisted/streamed and NEVER folded into a conversation's
-> real usage. Pairs with the `cache-warming` conversation-scoped surface + `NumberField` in
-> `ui-contract.reference.md`.
->
-> **2026-06-11 delta (cache-rate fix handoff, additive — package still `0.4.0`):** `WarmResponse`
-> gains `expectedCacheRate` (the warming HEALTH/retention signal,
-> `round(cacheReadTokens / (cacheReadTokens + cacheWriteTokens) * 100)`). Consumed FE-side: headlined
-> on the "Warm now" result. (No `ui-contract` change — the `cache-warming` surface's new
-> `cache-warming-timer` payload + second "cache retention" `stat` ride the EXISTING `custom`/`stat`
-> kinds; the FE cache-warming feature parses them.)
->
-> **2026-06-11 delta (LSP + cwd handoff — package bumped to `0.5.0`):** adds per-conversation working
-> directory `GET /conversations/:id/cwd` + `PUT /conversations/:id/cwd` (`CwdResponse`/`SetCwdRequest`,
-> CORS now allows `PUT`) and per-conversation LSP status `GET /conversations/:id/lsp`
-> (`LspStatusResponse`/`LspServerInfo`/`LspServerState`). The LSP GET LAZILY spawns+initializes the
-> configured servers (can take a moment the first time per cwd; cached after) and returns once each
-> server settles to `connected`/`error`. `servers` is `[]` when `cwd` is null. A `/chat`(`/warm`)
-> request that omits `cwd` now defaults to the conversation's persisted cwd; one that sends `cwd`
-> persists it. Consumed FE-side by the `workspace` feature (cwd field in the Model view + a
-> "Language Servers" view).
->
-> **0.3.0 change (token + timing metrics):** adds the durable metrics READ endpoint
-> `GET /conversations/:id/metrics` → `ConversationMetricsResponse` (`{ turns: TurnMetrics[] }`), and
-> re-exports `StepMetrics` / `TurnMetrics` from `@dispatch/wire`. This is a SEPARATE read axis from
-> the seq-cursor history (`GET /conversations/:id`): metrics are keyed PER TURN (not per chunk), so
-> they get their own route. `turns` is every SEALED turn's `TurnMetrics` in turn order (an in-flight
-> turn is absent until its metrics persist post-seal). The live `usage`/`step-complete`/`done`
-> packets it mirrors are transient (NOT persisted) and ride the `chat.delta`/NDJSON `AgentEvent`
-> stream you already consume — see `wire.reference.md`. The contract's OWN chat/history shapes are
-> otherwise unchanged from 0.2.0.
-
-## Endpoints (backend — CORS wildcard `*`, HTTP port 24203, WS port 24205)
-
-- `POST /chat` — body `ChatRequest` (JSON); response NDJSON stream, one `AgentEvent` per line;
- resolved id also in `X-Conversation-Id` header.
-- `GET /models` — `ModelsResponse`.
-- `GET /conversations/:id?sinceSeq=<n>&beforeSeq=<s>&limit=<k>` — `ConversationHistoryResponse`:
- RAW, append-order, seq-ordered slice with `n < seq < s`, windowed to the NEWEST `k` (all params
- optional; NOT reconciled — dangling tool-calls returned as-is). `latestSeq` = last chunk's `seq`,
- or the requested `sinceSeq` when caught up (empty `chunks`) — a TAIL cursor only; do not regress
- a cursor from a windowed/backfill read. `limit`/`beforeSeq` must be positive ints → else 400.
-- `GET /conversations/:id/metrics` — `ConversationMetricsResponse`: every SEALED turn's `TurnMetrics`
- in turn order (per-turn token + timing; NOT seq-filtered). IMPLEMENTED + LIVE-VERIFIED (probe 17/17).
-- `POST /chat/warm` — body `WarmRequest` (JSON) → `200 WarmResponse` (cache-warm usage incl.
- `cachePct`); `409 { error }` when the conversation is currently generating; `400 { error }` on a
- missing/invalid `conversationId`. The warm is NEVER persisted/streamed/folded into real usage.
-- `POST /conversations/:id/close` — no body → `200 CloseConversationResponse`. The EXPLICIT tab-close
- affordance: aborts any in-flight turn (persists the partial; seals with `finishReason: "aborted"`)
- AND stops + disables cache-warming (persisted OFF). Idempotent (`abortedTurn: false` when idle/unknown).
-- `POST /conversations/:id/queue` — body `QueueRequest { text }` → `200 QueueResponse`. Enqueue a user
- message for mid-turn steering delivery (the WS `chat.queue` op is the FE's path). When a turn is
- active, the message is queued + delivered at the next tool-result boundary (a `steering` `AgentEvent`
- fires; the message-queue SURFACE updates). When idle, the enqueue STARTS a new turn with the message
- as its opening prompt (`startedTurn: true`). Empty/whitespace `text` → `400 { error }`.
-- `GET /metrics/throughput?period=day|week|month&date=<...>` — `ThroughputResponse` (token-weighted
- tokens/sec per model over the window). Not part of cache-warming; listed for completeness.
-- `GET /conversations/:id/cwd` — `CwdResponse` (`cwd` is `null` until set).
-- `PUT /conversations/:id/cwd` — body `SetCwdRequest` → `200 CwdResponse`; `400 { error }` if `cwd`
- missing/empty. CORS allows `PUT`.
-- `GET /conversations/:id/lsp` — `LspStatusResponse`. LAZILY spawns+initializes the configured servers
- on the first call per cwd (can take a moment; cached after); returns once each settles to
- `connected`/`error`. `servers` is `[]` when `cwd` is null.
-- `GET /conversations/:id/reasoning-effort` — `ReasoningEffortResponse` (`reasoningEffort` is `null`
- when never set ⇒ default `"high"` applies). Works for an unseen/draft id.
-- `PUT /conversations/:id/reasoning-effort` — body `SetReasoningEffortRequest` →
- `200 ReasoningEffortResponse`; `400 { error }` on an unrecognized level (the message lists the
- valid levels). Persists the conversation's sticky level; effective from the NEXT turn.
-- WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops
- (`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive
- `WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`**
- (seq lives only on `StoredChunk`, obtained via the `sinceSeq` sync after `turn-sealed`).
-- DEFERRED (not built; do not depend on): `GET /conversations` (list). (The former deferred
- `POST /conversations/:id/cancel` is superseded by `POST /conversations/:id/close`.)
-
-```ts
/**
* Transport contract — the typed description of Dispatch's client–server API
* (HTTP + WebSocket).
*
* This package is types-only (zero runtime). It is the single shared surface
- * every client imports to know how to talk to the backend. Each side owns its
- * OWN (de)serialization: the contract is the SHAPES, not the codec. The
- * streaming response payload is the kernel's `AgentEvent` union, re-exported
- * here so a client has one import for the whole wire.
+ * every client imports to know how to talk to the backend — the CLI, the web
+ * frontend (in its own repo), any third-party client — and the transport-http /
+ * transport-ws servers import to know what they must accept and emit.
+ *
+ * Each side owns its OWN (de)serialization: there is deliberately no shared
+ * parse/serialize helper here (isolation-over-DRY). The contract is the SHAPES,
+ * not the codec. The streaming response payload is the kernel's `AgentEvent`
+ * union, re-exported here so a client has one import for the whole wire.
*
* The WebSocket carries BOTH chat ops (defined here) and surface ops (defined in
* `@dispatch/ui-contract`) over one connection; the unified `WsClientMessage` /
- * `WsServerMessage` unions below compose them.
+ * `WsServerMessage` unions below compose them. Chat ops are new, non-colliding
+ * `type` variants — there is no channel wrapper, so the shipped surface protocol
+ * is unchanged.
*/
import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract";
@@ -238,6 +32,7 @@ import type {
export type {
AgentEvent,
+ CompactionResult,
ConversationMeta,
ConversationStatus,
QueuedMessage,
@@ -290,11 +85,20 @@ export interface ChatRequest {
/**
* Response body for `GET /models` — the model catalog.
*
- * Each entry is a model name in `<credentialName>/<model>` form: exactly the
- * string a client passes back as `ChatRequest.model`.
+ * Each entry in `models` is a model name in `<credentialName>/<model>` form:
+ * exactly the string a client passes back as `ChatRequest.model`.
+ * `modelInfo` is an optional map from the same `<credentialName>/<model>` key
+ * to model metadata (e.g. `contextWindow`). Additive — clients that only
+ * read `models` are unaffected.
*/
export interface ModelsResponse {
readonly models: readonly string[];
+ readonly modelInfo?: Readonly<Record<string, ModelMetadata>>;
+}
+
+/** Per-model metadata returned alongside the model catalog. */
+export interface ModelMetadata {
+ readonly contextWindow?: number;
}
/**
@@ -352,6 +156,12 @@ export interface ConversationHistoryResponse {
* (and per-step) token + timing metrics for a conversation, for a client
* reopening a past conversation to render historical usage/latency.
*
+ * This is a SEPARATE axis from the two other read concerns and is deliberately
+ * its own endpoint: the live `usage`/`step-complete`/`done` events are transient
+ * (not persisted), and `ConversationHistoryResponse` carries seq-cursor chunk
+ * CONTENT. Metrics are keyed per TURN (not per chunk) and so are not seq-filtered
+ * — hence a sibling route rather than a field on the history response.
+ *
* `turns` is every SEALED turn's `TurnMetrics` in turn order. A turn appears only
* after its metrics were persisted (post-seal); an in-flight or unsealed turn is
* absent until then.
@@ -703,8 +513,8 @@ export interface ConversationOpenMessage {
/**
* Broadcast to all connected WS clients when a conversation's lifecycle status
- * changes (`active`/`idle`/`closed`). The FE uses this for cross-device tab
- * sync: `closed` → remove the tab; `active` → show a generating indicator.
+ * changes (active/idle/closed). The frontend uses this to sync tab state across
+ * devices in real time.
*/
export interface ConversationStatusChangedMessage {
readonly type: "conversation.statusChanged";
@@ -770,8 +580,6 @@ export interface TitleResponse {
readonly title: string;
}
-// ─── Compaction ──────────────────────────────────────────────────────────────
-
/**
* Response for `POST /conversations/:id/compact` — confirms the conversation
* history was compacted (old messages summarized, recent messages retained).
@@ -784,19 +592,17 @@ export interface CompactResponse {
}
/**
- * Response for `GET /conversations/:id/compact-threshold` — the token count
- * at which automatic compaction triggers (0 = manual only; default 350000
- * when not stored).
+ * Response for `GET /conversations/:id/compact-percent` — the token count
+ * at which automatic compaction triggers (0 = manual only).
*/
-export interface CompactThresholdResponse {
+export interface CompactPercentResponse {
readonly conversationId: string;
readonly threshold: number;
}
/**
- * Request body for `PUT /conversations/:id/compact-threshold`.
+ * Request body for `PUT /conversations/:id/compact-percent`.
*/
-export interface SetCompactThresholdRequest {
+export interface SetCompactPercentRequest {
readonly threshold: number;
}
-```
diff --git a/backend-handoff.md b/backend-handoff.md
index 8e86ce4..2768493 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,10 +5,10 @@
> **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
> `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here.
-_Last updated: 2026-06-22 (CR-6 resolved by backend — incremental seq at step boundaries).
+_Last updated: 2026-06-22 (context window + percentage-based compact consumed).
**FE is current on `[email protected]` / `[email protected]` / `[email protected]`.** 686 tests green.
-**Open asks: NONE.** All CRs resolved (CR-1 through CR-6). CR-6 not yet consumed by the FE —
-see §2 for the adoption plan._
+**Open asks: NONE.** All CRs resolved (CR-1 through CR-6) + context-window + compact-percent
+handoff consumed._
---
@@ -93,11 +93,12 @@ the turn seals and `syncTail` fetches everything.
## 3. Likely NEXT backend asks (heads-up, not yet requested)
-- **Model max context-window LIMIT** (the denominator for context size) — the FE renders
- `contextSize / limit · pct%` + a fill bar in the composer status bar, but the limit is currently
- HARDCODED to `1,000,000` as a placeholder (`MAX_CONTEXT` in `features/chat/ui/Composer.svelte`).
- When a per-model `contextWindow` (max token capacity) ships, wire the real value through so the
- bar/percent are accurate.
+- **Model max context-window LIMIT** → **CONSUMED ✅** — `GET /models` now returns
+ `modelInfo[model].contextWindow`. The Composer uses the real value (falls back to
+ 1,000,000 when absent). The hardcoded `MAX_CONTEXT` is gone.
+- **Percentage-based auto-compact** → **CONSUMED ✅** — `compact-threshold` endpoint
+ renamed to `compact-percent`; field is now `percent` (0-100, default 85, 0 = manual).
+ CompactionView UI updated from token count to percent input (0-100).
- **`GET /conversations`** — conversation list / sidebar (history explorer / switcher); could also
expose a per-conversation "last model" so a reopened tab seeds its model from the server.
- **LSP status over WS** (push) — today the FE HTTP-polls `GET /conversations/:id/lsp` on panel mount
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 57fe16f..ae09bd5 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -17,7 +17,7 @@
ReasoningEffortSelector,
type CompactNowResult,
type ReasoningEffortSaveResult,
- type SaveCompactThresholdResult,
+ type SaveCompactPercentResult,
} from "../features/chat";
import { manifest as conversationCacheManifest } from "../features/conversation-cache";
import { manifest as markdownManifest } from "../features/markdown";
@@ -249,13 +249,13 @@
: { ok: false, error: result.error };
}
- async function saveCompactThreshold(
- threshold: number,
- ): Promise<SaveCompactThresholdResult | null> {
- const result = await store.setCompactThreshold(threshold);
+ async function saveCompactPercent(
+ percent: number,
+ ): Promise<SaveCompactPercentResult | null> {
+ const result = await store.setCompactPercent(percent);
if (result === null) return null;
return result.ok
- ? { ok: true, threshold: result.threshold }
+ ? { ok: true, percent: result.percent }
: { ok: false, error: result.error };
}
@@ -393,6 +393,7 @@
onQueue={handleQueue}
onStop={handleStop}
contextSize={store.activeChat.currentContextSize}
+ contextWindow={store.modelInfo[store.activeModel]?.contextWindow}
status={store.activeChat.error
? "error"
: store.activeChat.generating
@@ -482,13 +483,13 @@
{/if}
{/key}
{:else if kind === "compaction"}
- <!-- Re-mount per conversation so the threshold + feedback can't bleed across tabs. -->
+ <!-- Re-mount per conversation so the percent + feedback can't bleed across tabs. -->
{#key store.currentConversationId}
<CompactionView
- threshold={store.compactThreshold}
+ percent={store.compactPercent}
canCompact={store.activeConversationId !== null}
{compactNow}
- saveThreshold={saveCompactThreshold}
+ savePercent={saveCompactPercent}
/>
{/key}
{:else if kind === "settings"}
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index bb08585..3f78a97 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -1,8 +1,8 @@
import type {
ChatDeltaMessage,
ChatErrorMessage,
+ CompactPercentResponse,
CompactResponse,
- CompactThresholdResponse,
ConversationCompactedMessage,
ConversationHistoryResponse,
ConversationListResponse,
@@ -11,10 +11,11 @@ import type {
ConversationStatusChangedMessage,
CwdResponse,
LspStatusResponse,
+ ModelMetadata,
ModelsResponse,
ReasoningEffort,
ReasoningEffortResponse,
- SetCompactThresholdRequest,
+ SetCompactPercentRequest,
SetCwdRequest,
SetReasoningEffortRequest,
WarmRequest,
@@ -73,9 +74,9 @@ export type CompactResult =
| { readonly ok: true; readonly response: CompactResponse }
| { readonly ok: false; readonly error: string };
-/** Outcome of `PUT /conversations/:id/compact-threshold`. */
-export type CompactThresholdResult =
- | { readonly ok: true; readonly threshold: number }
+/** Outcome of `PUT /conversations/:id/compact-percent`. */
+export type CompactPercentResult =
+ | { readonly ok: true; readonly percent: number }
| { readonly ok: false; readonly error: string };
/** Outcome of persisting a chat-limit setting (localStorage; FE-local). */
@@ -88,6 +89,8 @@ export interface AppStore {
readonly activeConversationId: string | null;
readonly activeChat: ChatStore;
readonly models: readonly string[];
+ /** Per-model metadata (contextWindow, etc.) from `GET /models`. */
+ readonly modelInfo: Readonly<Record<string, ModelMetadata>>;
readonly activeModel: string;
readonly catalog: ProtocolState["catalog"];
/** Every received surface spec, in catalog order — all auto-subscribed + expanded. */
@@ -152,17 +155,18 @@ export interface AppStore {
*/
stopGeneration(): void;
/**
- * The workspace conversation's auto-compact threshold (tokens). `0` = disabled
+ * The workspace conversation's auto-compact percent (0-100). `0` = disabled
* (manual only); a positive number = auto-compact triggers when the last
* turn's input tokens exceed it. Seeded from the backend on focus change.
*/
- readonly compactThreshold: number | null;
+ readonly compactPercent: number | null;
/**
- * Persist the workspace conversation's auto-compact threshold
- * (`PUT /conversations/:id/compact-threshold`). `0` disables; any positive
+ * Persist the workspace conversation's auto-compact percent
+ * (`PUT /conversations/:id/compact-percent`). `0` disables; 1-100 sets the
+ * trigger percentage of the model's context window. Default (null) is 85.
* number enables. Works for a draft too (its id survives promotion).
*/
- setCompactThreshold(threshold: number): Promise<CompactThresholdResult | null>;
+ setCompactPercent(percent: number): Promise<CompactPercentResult | null>;
/**
* Fetch the workspace conversation's language-server status (`GET /conversations/:id/lsp`).
* The backend lazily spawns servers, so this may take a moment on the first call for a cwd.
@@ -233,6 +237,7 @@ function createMetricsSync(httpBase: string, fetchImpl: typeof fetch): MetricsSy
export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
let protocol = $state<ProtocolState>(protocolInitialState());
let models = $state<readonly string[]>([]);
+ let modelInfo = $state<Readonly<Record<string, ModelMetadata>>>({});
let activeModel = $state(DEFAULT_MODEL);
const wsLocation = typeof location !== "undefined" ? location : undefined;
@@ -358,23 +363,23 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
}
}
- // The workspace conversation's auto-compact threshold. Seeded from the
+ // The workspace conversation's auto-compact percent. Seeded from the
// backend on focus change; null = not yet fetched. 0 = disabled.
- let compactThreshold = $state<number | null>(null);
+ let compactPercent = $state<number | null>(null);
- /** Refetch the workspace conversation's compact threshold (works for a draft too). */
- async function refreshCompactThreshold(): Promise<void> {
+ /** Refetch the workspace conversation's compact percent (works for a draft too). */
+ async function refreshCompactPercent(): Promise<void> {
const id = workspaceConversationId();
- compactThreshold = null;
+ compactPercent = null;
try {
const res = await fetchImpl(
- `${httpBase}/conversations/${encodeURIComponent(id)}/compact-threshold`,
+ `${httpBase}/conversations/${encodeURIComponent(id)}/compact-percent`,
);
if (!res.ok) return;
- const data = (await res.json()) as CompactThresholdResponse;
- if (workspaceConversationId() === id) compactThreshold = data.threshold;
+ const data = (await res.json()) as CompactPercentResponse;
+ if (workspaceConversationId() === id) compactPercent = data.threshold;
} catch {
- // Non-fatal: a threshold fetch failure just leaves null.
+ // Non-fatal: a percent fetch failure just leaves null.
}
}
@@ -542,7 +547,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
syncSubscriptions();
void refreshCwd();
void refreshReasoningEffort();
- void refreshCompactThreshold();
+ void refreshCompactPercent();
}
// Conversation lifecycle status (backend-owned, pushed via WS +
@@ -676,6 +681,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
.then((data) => {
if (data === undefined) return;
models = data.models;
+ modelInfo = data.modelInfo ?? {};
if (data.models.length > 0 && !data.models.includes(activeModel)) {
const first = data.models[0];
if (first !== undefined) {
@@ -713,7 +719,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
refreshActiveChat();
void refreshCwd();
void refreshReasoningEffort();
- void refreshCompactThreshold();
+ void refreshCompactPercent();
// Fetch the authoritative open-conversation list from the backend (cross-
// device tab sync). Merges with the localStorage-restored tabs: opens new
@@ -733,6 +739,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
get models(): readonly string[] {
return models;
},
+ get modelInfo(): Readonly<Record<string, ModelMetadata>> {
+ return modelInfo;
+ },
get activeModel(): string {
return activeModel;
},
@@ -759,8 +768,8 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
get reasoningEffort(): ReasoningEffort | null {
return reasoningEffort;
},
- get compactThreshold(): number | null {
- return compactThreshold;
+ get compactPercent(): number | null {
+ return compactPercent;
},
get chatLimit(): number {
return chatLimit;
@@ -800,7 +809,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
syncSubscriptions();
void refreshCwd();
void refreshReasoningEffort();
- void refreshCompactThreshold();
+ void refreshCompactPercent();
// Now send on the promoted store
chatStores.get(conversationId)?.send(text);
} else {
@@ -837,7 +846,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
syncSubscriptions();
void refreshCwd();
void refreshReasoningEffort();
- void refreshCompactThreshold();
+ void refreshCompactPercent();
},
selectTab(conversationId: string): void {
@@ -850,7 +859,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
syncSubscriptions();
void refreshCwd();
void refreshReasoningEffort();
- void refreshCompactThreshold();
+ void refreshCompactPercent();
},
closeTab(conversationId: string): void {
@@ -988,12 +997,12 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
}
},
- async setCompactThreshold(threshold: number): Promise<CompactThresholdResult | null> {
+ async setCompactPercent(percent: number): Promise<CompactPercentResult | null> {
const id = workspaceConversationId();
- const body: SetCompactThresholdRequest = { threshold };
+ const body: SetCompactPercentRequest = { threshold: percent };
try {
const res = await fetchImpl(
- `${httpBase}/conversations/${encodeURIComponent(id)}/compact-threshold`,
+ `${httpBase}/conversations/${encodeURIComponent(id)}/compact-percent`,
{
method: "PUT",
headers: { "content-type": "application/json" },
@@ -1004,16 +1013,16 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
const errBody = (await res.json().catch(() => null)) as { error?: string } | null;
return {
ok: false,
- error: errBody?.error ?? `Set compact threshold failed (HTTP ${res.status})`,
+ error: errBody?.error ?? `Set compact percent failed (HTTP ${res.status})`,
};
}
- const data = (await res.json()) as CompactThresholdResponse;
- if (workspaceConversationId() === id) compactThreshold = data.threshold;
- return { ok: true, threshold: data.threshold };
+ const data = (await res.json()) as CompactPercentResponse;
+ if (workspaceConversationId() === id) compactPercent = data.threshold;
+ return { ok: true, percent: data.threshold };
} catch (err) {
return {
ok: false,
- error: err instanceof Error ? err.message : "Set compact threshold request failed",
+ error: err instanceof Error ? err.message : "Set compact percent request failed",
};
}
},
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 9c65cd4..1596c53 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -17,7 +17,7 @@ export {
export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
export { createChatStore } from "./store.svelte";
export { default as ChatView } from "./ui/ChatView.svelte";
-export type { CompactNowResult, SaveCompactThresholdResult } from "./ui/CompactionView.svelte";
+export type { CompactNowResult, SaveCompactPercentResult } from "./ui/CompactionView.svelte";
export { default as CompactionView } from "./ui/CompactionView.svelte";
export { default as Composer } from "./ui/Composer.svelte";
export { default as ModelSelector } from "./ui/ModelSelector.svelte";
diff --git a/src/features/chat/ui/CompactionView.svelte b/src/features/chat/ui/CompactionView.svelte
index ce2a0a0..7bec984 100644
--- a/src/features/chat/ui/CompactionView.svelte
+++ b/src/features/chat/ui/CompactionView.svelte
@@ -3,54 +3,54 @@
| { readonly ok: true; readonly messagesSummarized: number; readonly messagesKept: number }
| { readonly ok: false; readonly error: string };
- export type SaveCompactThresholdResult =
- | { readonly ok: true; readonly threshold: number }
+ export type SaveCompactPercentResult =
+ | { readonly ok: true; readonly percent: number }
| { readonly ok: false; readonly error: string };
let {
- threshold,
+ percent,
canCompact,
compactNow,
- saveThreshold,
+ savePercent,
}: {
- /** The conversation's auto-compact threshold, or null when not yet fetched. 0 = disabled. */
- threshold: number | null;
+ /** The conversation's auto-compact percent (0-100), or null when not yet fetched. 0 = disabled. */
+ percent: number | null;
/** Whether a real conversation is focused (a draft has nothing to compact). */
canCompact: boolean;
compactNow: () => Promise<CompactNowResult | null>;
- saveThreshold: (threshold: number) => Promise<SaveCompactThresholdResult | null>;
+ savePercent: (percent: number) => Promise<SaveCompactPercentResult | null>;
} = $props();
- const DEFAULT_THRESHOLD = 350000;
+ const DEFAULT_PERCENT = 85;
let compacting = $state(false);
let compactError = $state<string | null>(null);
let compactResult = $state<{ summarized: number; kept: number } | null>(null);
- let thresholdInput = $state("");
- let savingThreshold = $state(false);
- let thresholdError = $state<string | null>(null);
- let thresholdSaved = $state(false);
+ let percentInput = $state("");
+ let savingPercent = $state(false);
+ let percentError = $state<string | null>(null);
+ let percentSaved = $state(false);
// Sync the input from the prop when it changes (focus switch / initial load).
- let lastThreshold = $state<number | null>(null);
+ let lastPercent = $state<number | null>(null);
$effect(() => {
- if (threshold !== lastThreshold) {
- lastThreshold = threshold;
- thresholdInput = threshold !== null ? String(threshold) : "";
- thresholdError = null;
- thresholdSaved = false;
+ if (percent !== lastPercent) {
+ lastPercent = percent;
+ percentInput = percent !== null ? String(percent) : "";
+ percentError = null;
+ percentSaved = false;
}
});
- const thresholdLabel = $derived(
- threshold == null
+ const percentLabel = $derived(
+ percent == null
? "Loading…"
- : threshold === 0
+ : percent === 0
? "Disabled (manual only)"
- : threshold === DEFAULT_THRESHOLD
- ? `${threshold.toLocaleString("en-US")} (default)`
- : threshold.toLocaleString("en-US"),
+ : percent === DEFAULT_PERCENT
+ ? `${percent}% (default)`
+ : `${percent}%`,
);
async function handleCompact() {
@@ -68,22 +68,22 @@
}
}
- async function handleSaveThreshold() {
- const value = Number.parseInt(thresholdInput, 10);
- if (Number.isNaN(value) || value < 0) {
- thresholdError = "Must be a non-negative number";
+ async function handleSavePercent() {
+ const value = Number.parseInt(percentInput, 10);
+ if (Number.isNaN(value) || value < 0 || value > 100) {
+ percentError = "Must be 0-100";
return;
}
- savingThreshold = true;
- thresholdError = null;
- thresholdSaved = false;
- const result = await saveThreshold(value);
- savingThreshold = false;
+ savingPercent = true;
+ percentError = null;
+ percentSaved = false;
+ const result = await savePercent(value);
+ savingPercent = false;
if (result === null) return;
if (result.ok) {
- thresholdSaved = true;
+ percentSaved = true;
} else {
- thresholdError = result.error;
+ percentError = result.error;
}
}
</script>
@@ -120,33 +120,34 @@
{/if}
</section>
- <!-- Auto-compact threshold -->
+ <!-- Auto-compact percent -->
<section class="flex flex-col gap-1">
- <span class="text-xs font-semibold uppercase opacity-60">Auto-compact threshold</span>
+ <span class="text-xs font-semibold uppercase opacity-60">Auto-compact percent</span>
<div class="flex items-center gap-2">
<input
type="number"
- class="input input-bordered input-sm w-32"
+ class="input input-bordered input-sm w-24"
min="0"
- placeholder={DEFAULT_THRESHOLD.toLocaleString("en-US")}
- value={thresholdInput}
- disabled={savingThreshold}
- onchange={handleSaveThreshold}
- aria-label="Compact threshold (tokens)"
+ max="100"
+ placeholder={String(DEFAULT_PERCENT)}
+ value={percentInput}
+ disabled={savingPercent}
+ onchange={handleSavePercent}
+ aria-label="Compact percent (0-100)"
/>
- <span class="text-xs opacity-60">tokens</span>
- {#if savingThreshold}
+ <span class="text-xs opacity-60">%</span>
+ {#if savingPercent}
<span class="loading loading-spinner loading-xs"></span>
{/if}
</div>
<p class="text-xs opacity-50">
- Current: {thresholdLabel}
+ Current: {percentLabel}
<br />
- 0 disables auto-compact. Default is {DEFAULT_THRESHOLD.toLocaleString("en-US")}.
+ 0 disables auto-compact. Default is {DEFAULT_PERCENT}%.
</p>
- {#if thresholdError}
- <p class="text-xs text-error">{thresholdError}</p>
- {:else if thresholdSaved}
+ {#if percentError}
+ <p class="text-xs text-error">{percentError}</p>
+ {:else if percentSaved}
<p class="text-xs text-success">Saved.</p>
{/if}
</section>
diff --git a/src/features/chat/ui/Composer.svelte b/src/features/chat/ui/Composer.svelte
index 7030153..fe9ea94 100644
--- a/src/features/chat/ui/Composer.svelte
+++ b/src/features/chat/ui/Composer.svelte
@@ -1,9 +1,7 @@
<script lang="ts">
import { computeContextUsage, formatCompactTokens } from "../../../core/metrics";
- // Placeholder context-window limit until the backend reports a real
- // per-model max (see backend-handoff §3). Hardcoded to 1,000,000 tokens.
- const MAX_CONTEXT = 1_000_000;
+ const FALLBACK_CONTEXT_WINDOW = 1_000_000;
const MAX_LINES = 7;
let {
@@ -11,6 +9,7 @@
onQueue,
onStop,
contextSize = undefined,
+ contextWindow = undefined,
status = "idle",
}: {
onSend: (text: string) => void;
@@ -26,6 +25,8 @@
// Current context occupancy (latest turn's contextSize), or `undefined`
// when unknown — the status bar then shows "— tokens", never 0%.
contextSize?: number | undefined;
+ /** Per-model context window (max tokens) from `GET /models` modelInfo. */
+ contextWindow?: number | undefined;
// Coarse agent status for the status-bar icon.
status?: "idle" | "running" | "error";
} = $props();
@@ -34,7 +35,8 @@
let inputEl: HTMLTextAreaElement | undefined;
const hasText = $derived(text.trim().length > 0);
- const usage = $derived(computeContextUsage(contextSize, MAX_CONTEXT));
+ const effectiveMax = $derived(contextWindow ?? FALLBACK_CONTEXT_WINDOW);
+ const usage = $derived(computeContextUsage(contextSize, effectiveMax));
const hasUsage = $derived(contextSize !== undefined);
// One button, three modes: