From 0c7e7ceae36930e87fc30993f18e30cf54888295 Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Mon, 22 Jun 2026 15:14:31 +0900 Subject: feat: consume context window + percentage-based compact handoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Real context window: GET /models now returns modelInfo[model].contextWindow. The Composer uses this instead of the hardcoded MAX_CONTEXT = 1,000,000. Falls back to 1M when modelInfo is absent or the model has no contextWindow. 2. Percentage-based auto-compact: the compact-threshold endpoint is renamed to compact-percent. The CompactionView now shows a percent input (0-100, default 85, 0 = manual) instead of a token count input. Types renamed: CompactThresholdResponse → CompactPercentResponse, SetCompactThresholdRequest → SetCompactPercentRequest. Note: the field name in the backend types is still 'threshold' (not 'percent') — the FE maps between them. Re-mirrored .dispatch/transport-contract.reference.md. 686 tests green. 0 svelte-check errors + warnings. --- .dispatch/transport-contract.reference.md | 266 ++++------------------------- backend-handoff.md | 17 +- src/app/App.svelte | 19 ++- src/app/store.svelte.ts | 77 +++++---- src/features/chat/index.ts | 2 +- src/features/chat/ui/CompactionView.svelte | 101 +++++------ src/features/chat/ui/Composer.svelte | 10 +- 7 files changed, 156 insertions(+), 336 deletions(-) diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md index 608211d..02b48a0 100644 --- a/.dispatch/transport-contract.reference.md +++ b/.dispatch/transport-contract.reference.md @@ -1,228 +1,22 @@ -# `@dispatch/transport-contract` — in-repo reference (read THIS, not node_modules) - -> MIRRORS the backend's `@dispatch/transport-contract` package source so headless FE agents can read -> the HTTP + WebSocket wire shapes WITHOUT following the `file:` dep symlink out of this repo (which -> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally — -> this file is for READING only. -> -> **Orchestrator:** SNAPSHOT of `transport-contract@0.15.0` (compaction). -> Depends on `@dispatch/wire@0.11.0` (see `wire.reference.md`) + `@dispatch/ui-contract@0.2.0` (see -> `ui-contract.reference.md`). -> -> **2026-06-22 delta (compaction handoff — package bumped `0.14.0` → `0.15.0`, ADDITIVE):** -> adds conversation compaction — summarize old history + retain recent N messages. Manual: -> `POST /conversations/:id/compact` (optional `{ keepLastN, modelName }`) → `CompactResponse`. -> Automatic: after each turn settles, if the last turn's input tokens exceeded the per-conversation -> `compactThreshold`, compaction runs automatically. `GET`/`PUT /conversations/:id/compact-threshold` -> (`CompactThresholdResponse`/`SetCompactThresholdRequest`) — `threshold: 0` = disabled; default -> 350000 when not stored. Re-exports `CompactionResult` from `wire@0.11.0`. -> -> **2026-06-22 delta (conversation lifecycle handoff — package bumped `0.13.0` → `0.14.0`, ADDITIVE):** -> adds conversation lifecycle **status** (`active`/`idle`/`closed`) for cross-device tab -> persistence. `ConversationMeta` (re-exported from `wire@0.10.0`) gains a `status` field. New -> WS message `ConversationStatusChangedMessage` (`{ type: "conversation.statusChanged"; -> conversationId; status }`) is broadcast to ALL clients on every status change. `GET -> /conversations` gains an optional `?status=active,idle` filter (comma-separated; default = all). -> `POST /conversations/:id/close` now also sets status to `closed` (persists across restarts). -> The FE fetches `?status=active,idle` on connect to restore the tab bar across devices. -> -> **2026-06-21 delta (conversation.open handoff — package bumped `0.12.0` → `0.13.0`, ADDITIVE):** -> adds the `conversation.open` WS broadcast — when the CLI's `--open` flag fires -> (`POST /conversations/:id/open`), the backend broadcasts a `ConversationOpenMessage` -> (`{ type: "conversation.open"; conversationId }`) to ALL connected WS clients. Additive to -> `WsServerMessage`. The FE handles it by opening/focusing a tab for the `conversationId`. Also -> adds conversation metadata endpoints (not yet consumed by the FE): `GET /conversations` (list, -> `ConversationListResponse`/`ConversationMeta`), `GET /conversations/:id/last` (blocking last -> message, `LastMessageResponse`), `GET`/`PUT /conversations/:id/title` (`TitleResponse`/ -> `SetTitleRequest`), and `POST /conversations/:id/open` (`OpenConversationResponse`). Re-exports -> `ConversationMeta` from `wire@0.9.0`. -> -> **2026-06-21 delta (message-queue + steering handoff — package bumped `0.11.0` → `0.12.0`, ADDITIVE):** -> adds the enqueue surface for the per-conversation message queue (the wire types `QueuedMessage` / -> `QueuePayload` + the new `steering` `AgentEvent` live in `wire@0.8.0`, re-exported here). Two -> additive shapes: -> 1. **WS `chat.queue` op** — `ChatQueueMessage { type: "chat.queue"; conversationId; text }` (a -> new `WsClientMessage` union member). Fire-and-forget: on success the server emits NOTHING back -> — the message-queue SURFACE updates (the new message appears in the snapshot). On failure (empty -> `text`, unknown conversation) the server replies `chat.error`. **Auto-start when idle -> (server-owned):** if no turn is active, `chat.queue` does NOT queue — it STARTS A NEW TURN with -> the message as its opening prompt (equivalent to `chat.send`). So a single op works for both -> "steer during generation" and "send"; the client doesn't pick. `text` must be non-empty after trim. -> 2. **HTTP `POST /conversations/:id/queue`** — body `QueueRequest { text }` → `QueueResponse -> { conversationId; startedTurn: boolean; queue: QueuedMessage[] }`. `startedTurn: true` = was -> idle, a new turn started (the message is the turn's opening prompt, NOT a queued steering -> message); `startedTurn: false` = a turn was active, the message was queued (the `queue` -> snapshot includes it). Empty/whitespace `text` → HTTP 400 `{ error }`. The FE uses the WS op. -> -> The queue is read via a per-conversation SURFACE (`message-queue`, scope `conversation`; one -> `custom` field, `rendererId: "message-queue"`, `payload: QueuePayload`) — NOT via the chat stream. -> See the handoff for the full flow (steering event, carry-to-new-turn, move-vs-duplicate). -> -> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.10.0` → `0.11.0`, ADDITIVE):** -> the thinking-depth knob (`ReasoningEffort`, re-exported from `wire@0.7.0`) lands in TWO scopes, -> resolved server-side per turn (per-turn override → persisted conversation value → default -> `"high"`; do NOT re-implement the chain client-side): -> 1. **Per-turn override** — optional `reasoningEffort?: ReasoningEffort` on `ChatRequest` (and -> therefore on WS `chat.send`, which extends it). Applies to THAT turn only; never persists. -> OMIT the key for "no override" (never send `null`/`""`). -> 2. **Persisted per-conversation setting** — `GET /conversations/:id/reasoning-effort` → -> `ReasoningEffortResponse { conversationId, reasoningEffort: ReasoningEffort | null }` -> (`null` = never set ⇒ the default `"high"` applies, NOT "off") and -> `PUT /conversations/:id/reasoning-effort` body `SetReasoningEffortRequest -> { reasoningEffort }`. Takes effect from the NEXT turn. -> Validation: an unrecognized level → HTTP 400 `{ error }` listing the valid levels (same for the -> WS path via the standard `chat.send` error reply). Cache note: CHANGING the level changes the -> provider request shape and can bust the prompt cache for the next turn (one-time re-prefill); -> a stable setting stays cache-safe (warming uses the same resolved effort). -> -> **2026-06-12 delta (CR-5 history windowing — package bumped `0.9.0` → `0.10.0`):** NO type-shape -> change — `GET /conversations/:id` gains two OPTIONAL query params alongside `sinceSeq`: -> **`limit=`** (the NEWEST `k` chunks of the selection, still ASCENDING; a selection with ≤ `k` -> chunks is returned whole; omitted = full selection, byte-identical to the old behavior) and -> **`beforeSeq=`** (exclusive upper bound `seq < s`; combined: `sinceSeq < seq < beforeSeq`). -> `limit`/`beforeSeq` must be POSITIVE integers (`sinceSeq` may still be 0); malformed/zero/negative -> → HTTP 400 `{ error }` naming the param. Seq numbering is now a WRITTEN CONTRACT: 1-based, -> monotonic, gap-free (see `wire@0.6.1` `StoredChunk`), so `hasOlder = oldestLoaded.seq > 1` — there -> is deliberately NO `earliestSeq`/`hasOlder` field. CAVEAT: on a windowed read, `latestSeq` -> describes the returned WINDOW; never regress a tail cursor from a `beforeSeq` backfill page. -> Intended flows: fresh load `?sinceSeq=0&limit=` · tail sync `?sinceSeq=` (no limit) · -> page older in `?beforeSeq=&limit=`. -> -> **2026-06-12 delta (CR-4 cache-warming lifecycle — package bumped `0.8.0` → `0.9.0`):** adds -> `POST /conversations/:id/close` (`CloseConversationResponse`) — the EXPLICIT "user closed this -> conversation's tab" affordance, distinct from a socket disconnect / `chat.unsubscribe` (which -> still NEVER touch the turn or the warming schedule). Closing (1) aborts any in-flight turn — the -> kernel stops at the next event boundary, partial messages are PERSISTED, and the turn SEALS -> normally with `finishReason: "aborted"` (watchers receive `done` then `turn-sealed`, so a -> stream-derived "generating" flag clears with no special-casing) — and (2) stops + DISABLES -> cache-warming for the conversation (persisted OFF; reopening does not resume warming). Idempotent: -> closing an idle/unknown conversation is `200` with `abortedTurn: false`. Backend behavior fixes -> riding EXISTING shapes (no other contract change): warming now defaults OFF for a new conversation -> (240s interval default kept; re-enable restores the persisted interval); post-warm surface updates -> now carry the FUTURE `nextWarmAt` (notify-before-reschedule fixed); `nextWarmAt: null` is pushed on -> `turn-start` (nothing scheduled while generating) and when warming is/became disabled. Caveat: the -> warming opt-in is NOT yet re-hydrated across a backend restart (reads disabled until toggled again). -> -> **2026-06-12 delta (CR-3 user-message handoff — package bumped `0.7.0` → `0.8.0`):** NO transport -> shape change — it re-exports `AgentEvent` (which `chat.delta` / `/chat` NDJSON carry), and that union -> gained the additive `TurnInputEvent` (`{ type: "user-message"; conversationId; turnId; text }`), the -> turn's user prompt, emitted as the FIRST event of every turn (before `turn-start`) and replayed to -> watchers/late-joiners. See the `wire.reference.md` CR-3 delta + `TurnInputEvent` for the definition. -> -> **2026-06-12 delta (turn-continuity handoff — package bumped `0.6.0` → `0.7.0`, ADDITIVE):** a turn -> is no longer bound to the WS connection — it runs to completion server-side regardless of any -> client, and any number of connections can watch the same conversation (incl. a late-joiner that -> connects mid-turn). Two new client→server WS messages: `ChatSubscribeMessage` -> (`{ type: "chat.subscribe"; conversationId }`) and `ChatUnsubscribeMessage` -> (`{ type: "chat.unsubscribe"; conversationId }`); `WsClientMessage` now unions both. Server→client -> is UNCHANGED (turn events still arrive as `chat.delta`, replayed AND live). Semantics: `chat.subscribe` -> registers the connection + immediately REPLAYS the in-flight turn's events so far (from its -> `turn-start`) then streams live (nothing replayed if idle); `chat.send` AUTO-subscribes the sending -> connection (a 2nd send while generating ⇒ `chat.error` + you stay subscribed to watch the running -> turn); `chat.unsubscribe`/socket-close drops the subscription but NEVER stops the turn; subscriptions -> persist across turns. FE consumes via the `chat` feature + app store (re-subscribe every open -> conversation on (re)connect + page load; derive a "running" state structurally from -> `turn-start`…no-`done`/`turn-sealed`-yet). OUT of scope: per-step crash-resume, concurrent-send -> arbitration. -> -> **2026-06-12 delta (context-size handoff — package bumped `0.5.0` → `0.6.0`, depends on -> `wire@0.5.0`):** no NEW transport shape — the optional `contextSize?: number` rides the -> re-exported `TurnMetrics` (so `ConversationMetricsResponse.turns[].contextSize`) and, live, the -> `TurnDoneEvent.contextSize` on the `done` AgentEvent (`chat.delta` WS / `/chat` NDJSON). On -> (re)hydrate take the LAST `turns[]` element with a defined `contextSize`; live, update on `done`. -> See the `wire.reference.md` context-size delta for the definition. -> -> **2026-06 delta (cache-warming handoff, additive — package still `0.4.0`):** adds -> `POST /chat/warm` (`WarmRequest` → `WarmResponse`) for an on-demand prompt-cache warm, and the -> throughput axis `GET /metrics/throughput` (`ThroughputResponse`/`ThroughputModelStat`/ -> `ThroughputPeriod`). The warm is NEVER persisted/streamed and NEVER folded into a conversation's -> real usage. Pairs with the `cache-warming` conversation-scoped surface + `NumberField` in -> `ui-contract.reference.md`. -> -> **2026-06-11 delta (cache-rate fix handoff, additive — package still `0.4.0`):** `WarmResponse` -> gains `expectedCacheRate` (the warming HEALTH/retention signal, -> `round(cacheReadTokens / (cacheReadTokens + cacheWriteTokens) * 100)`). Consumed FE-side: headlined -> on the "Warm now" result. (No `ui-contract` change — the `cache-warming` surface's new -> `cache-warming-timer` payload + second "cache retention" `stat` ride the EXISTING `custom`/`stat` -> kinds; the FE cache-warming feature parses them.) -> -> **2026-06-11 delta (LSP + cwd handoff — package bumped to `0.5.0`):** adds per-conversation working -> directory `GET /conversations/:id/cwd` + `PUT /conversations/:id/cwd` (`CwdResponse`/`SetCwdRequest`, -> CORS now allows `PUT`) and per-conversation LSP status `GET /conversations/:id/lsp` -> (`LspStatusResponse`/`LspServerInfo`/`LspServerState`). The LSP GET LAZILY spawns+initializes the -> configured servers (can take a moment the first time per cwd; cached after) and returns once each -> server settles to `connected`/`error`. `servers` is `[]` when `cwd` is null. A `/chat`(`/warm`) -> request that omits `cwd` now defaults to the conversation's persisted cwd; one that sends `cwd` -> persists it. Consumed FE-side by the `workspace` feature (cwd field in the Model view + a -> "Language Servers" view). -> -> **0.3.0 change (token + timing metrics):** adds the durable metrics READ endpoint -> `GET /conversations/:id/metrics` → `ConversationMetricsResponse` (`{ turns: TurnMetrics[] }`), and -> re-exports `StepMetrics` / `TurnMetrics` from `@dispatch/wire`. This is a SEPARATE read axis from -> the seq-cursor history (`GET /conversations/:id`): metrics are keyed PER TURN (not per chunk), so -> they get their own route. `turns` is every SEALED turn's `TurnMetrics` in turn order (an in-flight -> turn is absent until its metrics persist post-seal). The live `usage`/`step-complete`/`done` -> packets it mirrors are transient (NOT persisted) and ride the `chat.delta`/NDJSON `AgentEvent` -> stream you already consume — see `wire.reference.md`. The contract's OWN chat/history shapes are -> otherwise unchanged from 0.2.0. - -## Endpoints (backend — CORS wildcard `*`, HTTP port 24203, WS port 24205) - -- `POST /chat` — body `ChatRequest` (JSON); response NDJSON stream, one `AgentEvent` per line; - resolved id also in `X-Conversation-Id` header. -- `GET /models` — `ModelsResponse`. -- `GET /conversations/:id?sinceSeq=&beforeSeq=&limit=` — `ConversationHistoryResponse`: - RAW, append-order, seq-ordered slice with `n < seq < s`, windowed to the NEWEST `k` (all params - optional; NOT reconciled — dangling tool-calls returned as-is). `latestSeq` = last chunk's `seq`, - or the requested `sinceSeq` when caught up (empty `chunks`) — a TAIL cursor only; do not regress - a cursor from a windowed/backfill read. `limit`/`beforeSeq` must be positive ints → else 400. -- `GET /conversations/:id/metrics` — `ConversationMetricsResponse`: every SEALED turn's `TurnMetrics` - in turn order (per-turn token + timing; NOT seq-filtered). IMPLEMENTED + LIVE-VERIFIED (probe 17/17). -- `POST /chat/warm` — body `WarmRequest` (JSON) → `200 WarmResponse` (cache-warm usage incl. - `cachePct`); `409 { error }` when the conversation is currently generating; `400 { error }` on a - missing/invalid `conversationId`. The warm is NEVER persisted/streamed/folded into real usage. -- `POST /conversations/:id/close` — no body → `200 CloseConversationResponse`. The EXPLICIT tab-close - affordance: aborts any in-flight turn (persists the partial; seals with `finishReason: "aborted"`) - AND stops + disables cache-warming (persisted OFF). Idempotent (`abortedTurn: false` when idle/unknown). -- `POST /conversations/:id/queue` — body `QueueRequest { text }` → `200 QueueResponse`. Enqueue a user - message for mid-turn steering delivery (the WS `chat.queue` op is the FE's path). When a turn is - active, the message is queued + delivered at the next tool-result boundary (a `steering` `AgentEvent` - fires; the message-queue SURFACE updates). When idle, the enqueue STARTS a new turn with the message - as its opening prompt (`startedTurn: true`). Empty/whitespace `text` → `400 { error }`. -- `GET /metrics/throughput?period=day|week|month&date=<...>` — `ThroughputResponse` (token-weighted - tokens/sec per model over the window). Not part of cache-warming; listed for completeness. -- `GET /conversations/:id/cwd` — `CwdResponse` (`cwd` is `null` until set). -- `PUT /conversations/:id/cwd` — body `SetCwdRequest` → `200 CwdResponse`; `400 { error }` if `cwd` - missing/empty. CORS allows `PUT`. -- `GET /conversations/:id/lsp` — `LspStatusResponse`. LAZILY spawns+initializes the configured servers - on the first call per cwd (can take a moment; cached after); returns once each settles to - `connected`/`error`. `servers` is `[]` when `cwd` is null. -- `GET /conversations/:id/reasoning-effort` — `ReasoningEffortResponse` (`reasoningEffort` is `null` - when never set ⇒ default `"high"` applies). Works for an unseen/draft id. -- `PUT /conversations/:id/reasoning-effort` — body `SetReasoningEffortRequest` → - `200 ReasoningEffortResponse`; `400 { error }` on an unrecognized level (the message lists the - valid levels). Persists the conversation's sticky level; effective from the NEXT turn. -- WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops - (`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive - `WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`** - (seq lives only on `StoredChunk`, obtained via the `sinceSeq` sync after `turn-sealed`). -- DEFERRED (not built; do not depend on): `GET /conversations` (list). (The former deferred - `POST /conversations/:id/cancel` is superseded by `POST /conversations/:id/close`.) - -```ts /** * Transport contract — the typed description of Dispatch's client–server API * (HTTP + WebSocket). * * This package is types-only (zero runtime). It is the single shared surface - * every client imports to know how to talk to the backend. Each side owns its - * OWN (de)serialization: the contract is the SHAPES, not the codec. The - * streaming response payload is the kernel's `AgentEvent` union, re-exported - * here so a client has one import for the whole wire. + * every client imports to know how to talk to the backend — the CLI, the web + * frontend (in its own repo), any third-party client — and the transport-http / + * transport-ws servers import to know what they must accept and emit. + * + * Each side owns its OWN (de)serialization: there is deliberately no shared + * parse/serialize helper here (isolation-over-DRY). The contract is the SHAPES, + * not the codec. The streaming response payload is the kernel's `AgentEvent` + * union, re-exported here so a client has one import for the whole wire. * * The WebSocket carries BOTH chat ops (defined here) and surface ops (defined in * `@dispatch/ui-contract`) over one connection; the unified `WsClientMessage` / - * `WsServerMessage` unions below compose them. + * `WsServerMessage` unions below compose them. Chat ops are new, non-colliding + * `type` variants — there is no channel wrapper, so the shipped surface protocol + * is unchanged. */ import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract"; @@ -238,6 +32,7 @@ import type { export type { AgentEvent, + CompactionResult, ConversationMeta, ConversationStatus, QueuedMessage, @@ -290,11 +85,20 @@ export interface ChatRequest { /** * Response body for `GET /models` — the model catalog. * - * Each entry is a model name in `/` form: exactly the - * string a client passes back as `ChatRequest.model`. + * Each entry in `models` is a model name in `/` form: + * exactly the string a client passes back as `ChatRequest.model`. + * `modelInfo` is an optional map from the same `/` key + * to model metadata (e.g. `contextWindow`). Additive — clients that only + * read `models` are unaffected. */ export interface ModelsResponse { readonly models: readonly string[]; + readonly modelInfo?: Readonly>; +} + +/** Per-model metadata returned alongside the model catalog. */ +export interface ModelMetadata { + readonly contextWindow?: number; } /** @@ -352,6 +156,12 @@ export interface ConversationHistoryResponse { * (and per-step) token + timing metrics for a conversation, for a client * reopening a past conversation to render historical usage/latency. * + * This is a SEPARATE axis from the two other read concerns and is deliberately + * its own endpoint: the live `usage`/`step-complete`/`done` events are transient + * (not persisted), and `ConversationHistoryResponse` carries seq-cursor chunk + * CONTENT. Metrics are keyed per TURN (not per chunk) and so are not seq-filtered + * — hence a sibling route rather than a field on the history response. + * * `turns` is every SEALED turn's `TurnMetrics` in turn order. A turn appears only * after its metrics were persisted (post-seal); an in-flight or unsealed turn is * absent until then. @@ -703,8 +513,8 @@ export interface ConversationOpenMessage { /** * Broadcast to all connected WS clients when a conversation's lifecycle status - * changes (`active`/`idle`/`closed`). The FE uses this for cross-device tab - * sync: `closed` → remove the tab; `active` → show a generating indicator. + * changes (active/idle/closed). The frontend uses this to sync tab state across + * devices in real time. */ export interface ConversationStatusChangedMessage { readonly type: "conversation.statusChanged"; @@ -770,8 +580,6 @@ export interface TitleResponse { readonly title: string; } -// ─── Compaction ────────────────────────────────────────────────────────────── - /** * Response for `POST /conversations/:id/compact` — confirms the conversation * history was compacted (old messages summarized, recent messages retained). @@ -784,19 +592,17 @@ export interface CompactResponse { } /** - * Response for `GET /conversations/:id/compact-threshold` — the token count - * at which automatic compaction triggers (0 = manual only; default 350000 - * when not stored). + * Response for `GET /conversations/:id/compact-percent` — the token count + * at which automatic compaction triggers (0 = manual only). */ -export interface CompactThresholdResponse { +export interface CompactPercentResponse { readonly conversationId: string; readonly threshold: number; } /** - * Request body for `PUT /conversations/:id/compact-threshold`. + * Request body for `PUT /conversations/:id/compact-percent`. */ -export interface SetCompactThresholdRequest { +export interface SetCompactPercentRequest { readonly threshold: number; } -``` diff --git a/backend-handoff.md b/backend-handoff.md index 8e86ce4..2768493 100644 --- a/backend-handoff.md +++ b/backend-handoff.md @@ -5,10 +5,10 @@ > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user. > `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here. -_Last updated: 2026-06-22 (CR-6 resolved by backend — incremental seq at step boundaries). +_Last updated: 2026-06-22 (context window + percentage-based compact consumed). **FE is current on `ui-contract@0.2.0` / `transport-contract@0.15.0` / `wire@0.11.0`.** 686 tests green. -**Open asks: NONE.** All CRs resolved (CR-1 through CR-6). CR-6 not yet consumed by the FE — -see §2 for the adoption plan._ +**Open asks: NONE.** All CRs resolved (CR-1 through CR-6) + context-window + compact-percent +handoff consumed._ --- @@ -93,11 +93,12 @@ the turn seals and `syncTail` fetches everything. ## 3. Likely NEXT backend asks (heads-up, not yet requested) -- **Model max context-window LIMIT** (the denominator for context size) — the FE renders - `contextSize / limit · pct%` + a fill bar in the composer status bar, but the limit is currently - HARDCODED to `1,000,000` as a placeholder (`MAX_CONTEXT` in `features/chat/ui/Composer.svelte`). - When a per-model `contextWindow` (max token capacity) ships, wire the real value through so the - bar/percent are accurate. +- **Model max context-window LIMIT** → **CONSUMED ✅** — `GET /models` now returns + `modelInfo[model].contextWindow`. The Composer uses the real value (falls back to + 1,000,000 when absent). The hardcoded `MAX_CONTEXT` is gone. +- **Percentage-based auto-compact** → **CONSUMED ✅** — `compact-threshold` endpoint + renamed to `compact-percent`; field is now `percent` (0-100, default 85, 0 = manual). + CompactionView UI updated from token count to percent input (0-100). - **`GET /conversations`** — conversation list / sidebar (history explorer / switcher); could also expose a per-conversation "last model" so a reopened tab seeds its model from the server. - **LSP status over WS** (push) — today the FE HTTP-polls `GET /conversations/:id/lsp` on panel mount diff --git a/src/app/App.svelte b/src/app/App.svelte index 57fe16f..ae09bd5 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -17,7 +17,7 @@ ReasoningEffortSelector, type CompactNowResult, type ReasoningEffortSaveResult, - type SaveCompactThresholdResult, + type SaveCompactPercentResult, } from "../features/chat"; import { manifest as conversationCacheManifest } from "../features/conversation-cache"; import { manifest as markdownManifest } from "../features/markdown"; @@ -249,13 +249,13 @@ : { ok: false, error: result.error }; } - async function saveCompactThreshold( - threshold: number, - ): Promise { - const result = await store.setCompactThreshold(threshold); + async function saveCompactPercent( + percent: number, + ): Promise { + const result = await store.setCompactPercent(percent); if (result === null) return null; return result.ok - ? { ok: true, threshold: result.threshold } + ? { ok: true, percent: result.percent } : { ok: false, error: result.error }; } @@ -393,6 +393,7 @@ onQueue={handleQueue} onStop={handleStop} contextSize={store.activeChat.currentContextSize} + contextWindow={store.modelInfo[store.activeModel]?.contextWindow} status={store.activeChat.error ? "error" : store.activeChat.generating @@ -482,13 +483,13 @@ {/if} {/key} {:else if kind === "compaction"} - + {#key store.currentConversationId} {/key} {:else if kind === "settings"} diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts index bb08585..3f78a97 100644 --- a/src/app/store.svelte.ts +++ b/src/app/store.svelte.ts @@ -1,8 +1,8 @@ import type { ChatDeltaMessage, ChatErrorMessage, + CompactPercentResponse, CompactResponse, - CompactThresholdResponse, ConversationCompactedMessage, ConversationHistoryResponse, ConversationListResponse, @@ -11,10 +11,11 @@ import type { ConversationStatusChangedMessage, CwdResponse, LspStatusResponse, + ModelMetadata, ModelsResponse, ReasoningEffort, ReasoningEffortResponse, - SetCompactThresholdRequest, + SetCompactPercentRequest, SetCwdRequest, SetReasoningEffortRequest, WarmRequest, @@ -73,9 +74,9 @@ export type CompactResult = | { readonly ok: true; readonly response: CompactResponse } | { readonly ok: false; readonly error: string }; -/** Outcome of `PUT /conversations/:id/compact-threshold`. */ -export type CompactThresholdResult = - | { readonly ok: true; readonly threshold: number } +/** Outcome of `PUT /conversations/:id/compact-percent`. */ +export type CompactPercentResult = + | { readonly ok: true; readonly percent: number } | { readonly ok: false; readonly error: string }; /** Outcome of persisting a chat-limit setting (localStorage; FE-local). */ @@ -88,6 +89,8 @@ export interface AppStore { readonly activeConversationId: string | null; readonly activeChat: ChatStore; readonly models: readonly string[]; + /** Per-model metadata (contextWindow, etc.) from `GET /models`. */ + readonly modelInfo: Readonly>; readonly activeModel: string; readonly catalog: ProtocolState["catalog"]; /** Every received surface spec, in catalog order — all auto-subscribed + expanded. */ @@ -152,17 +155,18 @@ export interface AppStore { */ stopGeneration(): void; /** - * The workspace conversation's auto-compact threshold (tokens). `0` = disabled + * The workspace conversation's auto-compact percent (0-100). `0` = disabled * (manual only); a positive number = auto-compact triggers when the last * turn's input tokens exceed it. Seeded from the backend on focus change. */ - readonly compactThreshold: number | null; + readonly compactPercent: number | null; /** - * Persist the workspace conversation's auto-compact threshold - * (`PUT /conversations/:id/compact-threshold`). `0` disables; any positive + * Persist the workspace conversation's auto-compact percent + * (`PUT /conversations/:id/compact-percent`). `0` disables; 1-100 sets the + * trigger percentage of the model's context window. Default (null) is 85. * number enables. Works for a draft too (its id survives promotion). */ - setCompactThreshold(threshold: number): Promise; + setCompactPercent(percent: number): Promise; /** * Fetch the workspace conversation's language-server status (`GET /conversations/:id/lsp`). * The backend lazily spawns servers, so this may take a moment on the first call for a cwd. @@ -233,6 +237,7 @@ function createMetricsSync(httpBase: string, fetchImpl: typeof fetch): MetricsSy export function createAppStore(opts?: CreateAppStoreOptions): AppStore { let protocol = $state(protocolInitialState()); let models = $state([]); + let modelInfo = $state>>({}); let activeModel = $state(DEFAULT_MODEL); const wsLocation = typeof location !== "undefined" ? location : undefined; @@ -358,23 +363,23 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { } } - // The workspace conversation's auto-compact threshold. Seeded from the + // The workspace conversation's auto-compact percent. Seeded from the // backend on focus change; null = not yet fetched. 0 = disabled. - let compactThreshold = $state(null); + let compactPercent = $state(null); - /** Refetch the workspace conversation's compact threshold (works for a draft too). */ - async function refreshCompactThreshold(): Promise { + /** Refetch the workspace conversation's compact percent (works for a draft too). */ + async function refreshCompactPercent(): Promise { const id = workspaceConversationId(); - compactThreshold = null; + compactPercent = null; try { const res = await fetchImpl( - `${httpBase}/conversations/${encodeURIComponent(id)}/compact-threshold`, + `${httpBase}/conversations/${encodeURIComponent(id)}/compact-percent`, ); if (!res.ok) return; - const data = (await res.json()) as CompactThresholdResponse; - if (workspaceConversationId() === id) compactThreshold = data.threshold; + const data = (await res.json()) as CompactPercentResponse; + if (workspaceConversationId() === id) compactPercent = data.threshold; } catch { - // Non-fatal: a threshold fetch failure just leaves null. + // Non-fatal: a percent fetch failure just leaves null. } } @@ -542,7 +547,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { syncSubscriptions(); void refreshCwd(); void refreshReasoningEffort(); - void refreshCompactThreshold(); + void refreshCompactPercent(); } // Conversation lifecycle status (backend-owned, pushed via WS + @@ -676,6 +681,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { .then((data) => { if (data === undefined) return; models = data.models; + modelInfo = data.modelInfo ?? {}; if (data.models.length > 0 && !data.models.includes(activeModel)) { const first = data.models[0]; if (first !== undefined) { @@ -713,7 +719,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { refreshActiveChat(); void refreshCwd(); void refreshReasoningEffort(); - void refreshCompactThreshold(); + void refreshCompactPercent(); // Fetch the authoritative open-conversation list from the backend (cross- // device tab sync). Merges with the localStorage-restored tabs: opens new @@ -733,6 +739,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { get models(): readonly string[] { return models; }, + get modelInfo(): Readonly> { + return modelInfo; + }, get activeModel(): string { return activeModel; }, @@ -759,8 +768,8 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { get reasoningEffort(): ReasoningEffort | null { return reasoningEffort; }, - get compactThreshold(): number | null { - return compactThreshold; + get compactPercent(): number | null { + return compactPercent; }, get chatLimit(): number { return chatLimit; @@ -800,7 +809,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { syncSubscriptions(); void refreshCwd(); void refreshReasoningEffort(); - void refreshCompactThreshold(); + void refreshCompactPercent(); // Now send on the promoted store chatStores.get(conversationId)?.send(text); } else { @@ -837,7 +846,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { syncSubscriptions(); void refreshCwd(); void refreshReasoningEffort(); - void refreshCompactThreshold(); + void refreshCompactPercent(); }, selectTab(conversationId: string): void { @@ -850,7 +859,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { syncSubscriptions(); void refreshCwd(); void refreshReasoningEffort(); - void refreshCompactThreshold(); + void refreshCompactPercent(); }, closeTab(conversationId: string): void { @@ -988,12 +997,12 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { } }, - async setCompactThreshold(threshold: number): Promise { + async setCompactPercent(percent: number): Promise { const id = workspaceConversationId(); - const body: SetCompactThresholdRequest = { threshold }; + const body: SetCompactPercentRequest = { threshold: percent }; try { const res = await fetchImpl( - `${httpBase}/conversations/${encodeURIComponent(id)}/compact-threshold`, + `${httpBase}/conversations/${encodeURIComponent(id)}/compact-percent`, { method: "PUT", headers: { "content-type": "application/json" }, @@ -1004,16 +1013,16 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore { const errBody = (await res.json().catch(() => null)) as { error?: string } | null; return { ok: false, - error: errBody?.error ?? `Set compact threshold failed (HTTP ${res.status})`, + error: errBody?.error ?? `Set compact percent failed (HTTP ${res.status})`, }; } - const data = (await res.json()) as CompactThresholdResponse; - if (workspaceConversationId() === id) compactThreshold = data.threshold; - return { ok: true, threshold: data.threshold }; + const data = (await res.json()) as CompactPercentResponse; + if (workspaceConversationId() === id) compactPercent = data.threshold; + return { ok: true, percent: data.threshold }; } catch (err) { return { ok: false, - error: err instanceof Error ? err.message : "Set compact threshold request failed", + error: err instanceof Error ? err.message : "Set compact percent request failed", }; } }, diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts index 9c65cd4..1596c53 100644 --- a/src/features/chat/index.ts +++ b/src/features/chat/index.ts @@ -17,7 +17,7 @@ export { export type { ChatStore, ChatStoreDependencies } from "./store.svelte"; export { createChatStore } from "./store.svelte"; export { default as ChatView } from "./ui/ChatView.svelte"; -export type { CompactNowResult, SaveCompactThresholdResult } from "./ui/CompactionView.svelte"; +export type { CompactNowResult, SaveCompactPercentResult } from "./ui/CompactionView.svelte"; export { default as CompactionView } from "./ui/CompactionView.svelte"; export { default as Composer } from "./ui/Composer.svelte"; export { default as ModelSelector } from "./ui/ModelSelector.svelte"; diff --git a/src/features/chat/ui/CompactionView.svelte b/src/features/chat/ui/CompactionView.svelte index ce2a0a0..7bec984 100644 --- a/src/features/chat/ui/CompactionView.svelte +++ b/src/features/chat/ui/CompactionView.svelte @@ -3,54 +3,54 @@ | { readonly ok: true; readonly messagesSummarized: number; readonly messagesKept: number } | { readonly ok: false; readonly error: string }; - export type SaveCompactThresholdResult = - | { readonly ok: true; readonly threshold: number } + export type SaveCompactPercentResult = + | { readonly ok: true; readonly percent: number } | { readonly ok: false; readonly error: string }; let { - threshold, + percent, canCompact, compactNow, - saveThreshold, + savePercent, }: { - /** The conversation's auto-compact threshold, or null when not yet fetched. 0 = disabled. */ - threshold: number | null; + /** The conversation's auto-compact percent (0-100), or null when not yet fetched. 0 = disabled. */ + percent: number | null; /** Whether a real conversation is focused (a draft has nothing to compact). */ canCompact: boolean; compactNow: () => Promise; - saveThreshold: (threshold: number) => Promise; + savePercent: (percent: number) => Promise; } = $props(); - const DEFAULT_THRESHOLD = 350000; + const DEFAULT_PERCENT = 85; let compacting = $state(false); let compactError = $state(null); let compactResult = $state<{ summarized: number; kept: number } | null>(null); - let thresholdInput = $state(""); - let savingThreshold = $state(false); - let thresholdError = $state(null); - let thresholdSaved = $state(false); + let percentInput = $state(""); + let savingPercent = $state(false); + let percentError = $state(null); + let percentSaved = $state(false); // Sync the input from the prop when it changes (focus switch / initial load). - let lastThreshold = $state(null); + let lastPercent = $state(null); $effect(() => { - if (threshold !== lastThreshold) { - lastThreshold = threshold; - thresholdInput = threshold !== null ? String(threshold) : ""; - thresholdError = null; - thresholdSaved = false; + if (percent !== lastPercent) { + lastPercent = percent; + percentInput = percent !== null ? String(percent) : ""; + percentError = null; + percentSaved = false; } }); - const thresholdLabel = $derived( - threshold == null + const percentLabel = $derived( + percent == null ? "Loading…" - : threshold === 0 + : percent === 0 ? "Disabled (manual only)" - : threshold === DEFAULT_THRESHOLD - ? `${threshold.toLocaleString("en-US")} (default)` - : threshold.toLocaleString("en-US"), + : percent === DEFAULT_PERCENT + ? `${percent}% (default)` + : `${percent}%`, ); async function handleCompact() { @@ -68,22 +68,22 @@ } } - async function handleSaveThreshold() { - const value = Number.parseInt(thresholdInput, 10); - if (Number.isNaN(value) || value < 0) { - thresholdError = "Must be a non-negative number"; + async function handleSavePercent() { + const value = Number.parseInt(percentInput, 10); + if (Number.isNaN(value) || value < 0 || value > 100) { + percentError = "Must be 0-100"; return; } - savingThreshold = true; - thresholdError = null; - thresholdSaved = false; - const result = await saveThreshold(value); - savingThreshold = false; + savingPercent = true; + percentError = null; + percentSaved = false; + const result = await savePercent(value); + savingPercent = false; if (result === null) return; if (result.ok) { - thresholdSaved = true; + percentSaved = true; } else { - thresholdError = result.error; + percentError = result.error; } } @@ -120,33 +120,34 @@ {/if} - +
- Auto-compact threshold + Auto-compact percent
- tokens - {#if savingThreshold} + % + {#if savingPercent} {/if}

- Current: {thresholdLabel} + Current: {percentLabel}
- 0 disables auto-compact. Default is {DEFAULT_THRESHOLD.toLocaleString("en-US")}. + 0 disables auto-compact. Default is {DEFAULT_PERCENT}%.

- {#if thresholdError} -

{thresholdError}

- {:else if thresholdSaved} + {#if percentError} +

{percentError}

+ {:else if percentSaved}

Saved.

{/if}
diff --git a/src/features/chat/ui/Composer.svelte b/src/features/chat/ui/Composer.svelte index 7030153..fe9ea94 100644 --- a/src/features/chat/ui/Composer.svelte +++ b/src/features/chat/ui/Composer.svelte @@ -1,9 +1,7 @@