7 files changed, 156 insertions, 336 deletions
diff --git a/.dispatch/transport-contract.reference.md b/.dispatch/transport-contract.reference.md
index 608211d..02b48a0 100644
--- a/.dispatch/transport-contract.reference.md
+++ b/.dispatch/transport-contract.reference.md
@@ -1,228 +1,22 @@
-# `@dispatch/transport-contract` — in-repo reference (read THIS, not node_modules)
-
-> MIRRORS the backend's `@dispatch/transport-contract` package source so headless FE agents can read
-> the HTTP + WebSocket wire shapes WITHOUT following the `file:` dep symlink out of this repo (which
-> hangs on a permission prompt). Your CODE still imports `@dispatch/transport-contract` normally —
-> this file is for READING only.
->
-> **Orchestrator:** SNAPSHOT of `[email protected]` (compaction).
-> Depends on `@dispatch/[email protected]` (see `wire.reference.md`) + `@dispatch/[email protected]` (see
-> `ui-contract.reference.md`).
->
-> **2026-06-22 delta (compaction handoff — package bumped `0.14.0` → `0.15.0`, ADDITIVE):**
-> adds conversation compaction — summarize old history + retain recent N messages. Manual:
-> `POST /conversations/:id/compact` (optional `{ keepLastN, modelName }`) → `CompactResponse`.
-> Automatic: after each turn settles, if the last turn's input tokens exceeded the per-conversation
-> `compactThreshold`, compaction runs automatically. `GET`/`PUT /conversations/:id/compact-threshold`
-> (`CompactThresholdResponse`/`SetCompactThresholdRequest`) — `threshold: 0` = disabled; default
-> 350000 when not stored. Re-exports `CompactionResult` from `[email protected]`.
->
-> **2026-06-22 delta (conversation lifecycle handoff — package bumped `0.13.0` → `0.14.0`, ADDITIVE):**
-> adds conversation lifecycle **status** (`active`/`idle`/`closed`) for cross-device tab
-> persistence. `ConversationMeta` (re-exported from `[email protected]`) gains a `status` field. New
-> WS message `ConversationStatusChangedMessage` (`{ type: "conversation.statusChanged";
-> conversationId; status }`) is broadcast to ALL clients on every status change. `GET
-> /conversations` gains an optional `?status=active,idle` filter (comma-separated; default = all).
-> `POST /conversations/:id/close` now also sets status to `closed` (persists across restarts).
-> The FE fetches `?status=active,idle` on connect to restore the tab bar across devices.
->
-> **2026-06-21 delta (conversation.open handoff — package bumped `0.12.0` → `0.13.0`, ADDITIVE):**
-> adds the `conversation.open` WS broadcast — when the CLI's `--open` flag fires
-> (`POST /conversations/:id/open`), the backend broadcasts a `ConversationOpenMessage`
-> (`{ type: "conversation.open"; conversationId }`) to ALL connected WS clients. Additive to
-> `WsServerMessage`. The FE handles it by opening/focusing a tab for the `conversationId`. Also
-> adds conversation metadata endpoints (not yet consumed by the FE): `GET /conversations` (list,
-> `ConversationListResponse`/`ConversationMeta`), `GET /conversations/:id/last` (blocking last
-> message, `LastMessageResponse`), `GET`/`PUT /conversations/:id/title` (`TitleResponse`/
-> `SetTitleRequest`), and `POST /conversations/:id/open` (`OpenConversationResponse`). Re-exports
-> `ConversationMeta` from `[email protected]`.
->
-> **2026-06-21 delta (message-queue + steering handoff — package bumped `0.11.0` → `0.12.0`, ADDITIVE):**
-> adds the enqueue surface for the per-conversation message queue (the wire types `QueuedMessage` /
-> `QueuePayload` + the new `steering` `AgentEvent` live in `[email protected]`, re-exported here). Two
-> additive shapes:
-> 1. **WS `chat.queue` op** — `ChatQueueMessage { type: "chat.queue"; conversationId; text }` (a
->    new `WsClientMessage` union member). Fire-and-forget: on success the server emits NOTHING back
->    — the message-queue SURFACE updates (the new message appears in the snapshot). On failure (empty
->    `text`, unknown conversation) the server replies `chat.error`. **Auto-start when idle
->    (server-owned):** if no turn is active, `chat.queue` does NOT queue — it STARTS A NEW TURN with
->    the message as its opening prompt (equivalent to `chat.send`). So a single op works for both
->    "steer during generation" and "send"; the client doesn't pick. `text` must be non-empty after trim.
-> 2. **HTTP `POST /conversations/:id/queue`** — body `QueueRequest { text }` → `QueueResponse
->    { conversationId; startedTurn: boolean; queue: QueuedMessage[] }`. `startedTurn: true` = was
->    idle, a new turn started (the message is the turn's opening prompt, NOT a queued steering
->    message); `startedTurn: false` = a turn was active, the message was queued (the `queue`
->    snapshot includes it). Empty/whitespace `text` → HTTP 400 `{ error }`. The FE uses the WS op.
->
-> The queue is read via a per-conversation SURFACE (`message-queue`, scope `conversation`; one
-> `custom` field, `rendererId: "message-queue"`, `payload: QueuePayload`) — NOT via the chat stream.
-> See the handoff for the full flow (steering event, carry-to-new-turn, move-vs-duplicate).
->
-> **2026-06-12 delta (reasoning-effort handoff — package bumped `0.10.0` → `0.11.0`, ADDITIVE):**
-> the thinking-depth knob (`ReasoningEffort`, re-exported from `[email protected]`) lands in TWO scopes,
-> resolved server-side per turn (per-turn override → persisted conversation value → default
-> `"high"`; do NOT re-implement the chain client-side):
-> 1. **Per-turn override** — optional `reasoningEffort?: ReasoningEffort` on `ChatRequest` (and
->    therefore on WS `chat.send`, which extends it). Applies to THAT turn only; never persists.
->    OMIT the key for "no override" (never send `null`/`""`).
-> 2. **Persisted per-conversation setting** — `GET /conversations/:id/reasoning-effort` →
->    `ReasoningEffortResponse { conversationId, reasoningEffort: ReasoningEffort | null }`
->    (`null` = never set ⇒ the default `"high"` applies, NOT "off") and
->    `PUT /conversations/:id/reasoning-effort` body `SetReasoningEffortRequest
->    { reasoningEffort }`. Takes effect from the NEXT turn.
-> Validation: an unrecognized level → HTTP 400 `{ error }` listing the valid levels (same for the
-> WS path via the standard `chat.send` error reply). Cache note: CHANGING the level changes the
-> provider request shape and can bust the prompt cache for the next turn (one-time re-prefill);
-> a stable setting stays cache-safe (warming uses the same resolved effort).
->
-> **2026-06-12 delta (CR-5 history windowing — package bumped `0.9.0` → `0.10.0`):** NO type-shape
-> change — `GET /conversations/:id` gains two OPTIONAL query params alongside `sinceSeq`:
-> **`limit=<k>`** (the NEWEST `k` chunks of the selection, still ASCENDING; a selection with ≤ `k`
-> chunks is returned whole; omitted = full selection, byte-identical to the old behavior) and
-> **`beforeSeq=<s>`** (exclusive upper bound `seq < s`; combined: `sinceSeq < seq < beforeSeq`).
-> `limit`/`beforeSeq` must be POSITIVE integers (`sinceSeq` may still be 0); malformed/zero/negative
-> → HTTP 400 `{ error }` naming the param. Seq numbering is now a WRITTEN CONTRACT: 1-based,
-> monotonic, gap-free (see `[email protected]` `StoredChunk`), so `hasOlder = oldestLoaded.seq > 1` — there
-> is deliberately NO `earliestSeq`/`hasOlder` field. CAVEAT: on a windowed read, `latestSeq`
-> describes the returned WINDOW; never regress a tail cursor from a `beforeSeq` backfill page.
-> Intended flows: fresh load `?sinceSeq=0&limit=<k>` · tail sync `?sinceSeq=<cursor>` (no limit) ·
-> page older in `?beforeSeq=<oldestLoadedSeq>&limit=<k>`.
->
-> **2026-06-12 delta (CR-4 cache-warming lifecycle — package bumped `0.8.0` → `0.9.0`):** adds
-> `POST /conversations/:id/close` (`CloseConversationResponse`) — the EXPLICIT "user closed this
-> conversation's tab" affordance, distinct from a socket disconnect / `chat.unsubscribe` (which
-> still NEVER touch the turn or the warming schedule). Closing (1) aborts any in-flight turn — the
-> kernel stops at the next event boundary, partial messages are PERSISTED, and the turn SEALS
-> normally with `finishReason: "aborted"` (watchers receive `done` then `turn-sealed`, so a
-> stream-derived "generating" flag clears with no special-casing) — and (2) stops + DISABLES
-> cache-warming for the conversation (persisted OFF; reopening does not resume warming). Idempotent:
-> closing an idle/unknown conversation is `200` with `abortedTurn: false`. Backend behavior fixes
-> riding EXISTING shapes (no other contract change): warming now defaults OFF for a new conversation
-> (240s interval default kept; re-enable restores the persisted interval); post-warm surface updates
-> now carry the FUTURE `nextWarmAt` (notify-before-reschedule fixed); `nextWarmAt: null` is pushed on
-> `turn-start` (nothing scheduled while generating) and when warming is/became disabled. Caveat: the
-> warming opt-in is NOT yet re-hydrated across a backend restart (reads disabled until toggled again).
->
-> **2026-06-12 delta (CR-3 user-message handoff — package bumped `0.7.0` → `0.8.0`):** NO transport
-> shape change — it re-exports `AgentEvent` (which `chat.delta` / `/chat` NDJSON carry), and that union
-> gained the additive `TurnInputEvent` (`{ type: "user-message"; conversationId; turnId; text }`), the
-> turn's user prompt, emitted as the FIRST event of every turn (before `turn-start`) and replayed to
-> watchers/late-joiners. See the `wire.reference.md` CR-3 delta + `TurnInputEvent` for the definition.
->
-> **2026-06-12 delta (turn-continuity handoff — package bumped `0.6.0` → `0.7.0`, ADDITIVE):** a turn
-> is no longer bound to the WS connection — it runs to completion server-side regardless of any
-> client, and any number of connections can watch the same conversation (incl. a late-joiner that
-> connects mid-turn). Two new client→server WS messages: `ChatSubscribeMessage`
-> (`{ type: "chat.subscribe"; conversationId }`) and `ChatUnsubscribeMessage`
-> (`{ type: "chat.unsubscribe"; conversationId }`); `WsClientMessage` now unions both. Server→client
-> is UNCHANGED (turn events still arrive as `chat.delta`, replayed AND live). Semantics: `chat.subscribe`
-> registers the connection + immediately REPLAYS the in-flight turn's events so far (from its
-> `turn-start`) then streams live (nothing replayed if idle); `chat.send` AUTO-subscribes the sending
-> connection (a 2nd send while generating ⇒ `chat.error` + you stay subscribed to watch the running
-> turn); `chat.unsubscribe`/socket-close drops the subscription but NEVER stops the turn; subscriptions
-> persist across turns. FE consumes via the `chat` feature + app store (re-subscribe every open
-> conversation on (re)connect + page load; derive a "running" state structurally from
-> `turn-start`…no-`done`/`turn-sealed`-yet). OUT of scope: per-step crash-resume, concurrent-send
-> arbitration.
->
-> **2026-06-12 delta (context-size handoff — package bumped `0.5.0` → `0.6.0`, depends on
-> `[email protected]`):** no NEW transport shape — the optional `contextSize?: number` rides the
-> re-exported `TurnMetrics` (so `ConversationMetricsResponse.turns[].contextSize`) and, live, the
-> `TurnDoneEvent.contextSize` on the `done` AgentEvent (`chat.delta` WS / `/chat` NDJSON). On
-> (re)hydrate take the LAST `turns[]` element with a defined `contextSize`; live, update on `done`.
-> See the `wire.reference.md` context-size delta for the definition.
->
-> **2026-06 delta (cache-warming handoff, additive — package still `0.4.0`):** adds
-> `POST /chat/warm` (`WarmRequest` → `WarmResponse`) for an on-demand prompt-cache warm, and the
-> throughput axis `GET /metrics/throughput` (`ThroughputResponse`/`ThroughputModelStat`/
-> `ThroughputPeriod`). The warm is NEVER persisted/streamed and NEVER folded into a conversation's
-> real usage. Pairs with the `cache-warming` conversation-scoped surface + `NumberField` in
-> `ui-contract.reference.md`.
->
-> **2026-06-11 delta (cache-rate fix handoff, additive — package still `0.4.0`):** `WarmResponse`
-> gains `expectedCacheRate` (the warming HEALTH/retention signal,
-> `round(cacheReadTokens / (cacheReadTokens + cacheWriteTokens) * 100)`). Consumed FE-side: headlined
-> on the "Warm now" result. (No `ui-contract` change — the `cache-warming` surface's new
-> `cache-warming-timer` payload + second "cache retention" `stat` ride the EXISTING `custom`/`stat`
-> kinds; the FE cache-warming feature parses them.)
->
-> **2026-06-11 delta (LSP + cwd handoff — package bumped to `0.5.0`):** adds per-conversation working
-> directory `GET /conversations/:id/cwd` + `PUT /conversations/:id/cwd` (`CwdResponse`/`SetCwdRequest`,
-> CORS now allows `PUT`) and per-conversation LSP status `GET /conversations/:id/lsp`
-> (`LspStatusResponse`/`LspServerInfo`/`LspServerState`). The LSP GET LAZILY spawns+initializes the
-> configured servers (can take a moment the first time per cwd; cached after) and returns once each
-> server settles to `connected`/`error`. `servers` is `[]` when `cwd` is null. A `/chat`(`/warm`)
-> request that omits `cwd` now defaults to the conversation's persisted cwd; one that sends `cwd`
-> persists it. Consumed FE-side by the `workspace` feature (cwd field in the Model view + a
-> "Language Servers" view).
->
-> **0.3.0 change (token + timing metrics):** adds the durable metrics READ endpoint
-> `GET /conversations/:id/metrics` → `ConversationMetricsResponse` (`{ turns: TurnMetrics[] }`), and
-> re-exports `StepMetrics` / `TurnMetrics` from `@dispatch/wire`. This is a SEPARATE read axis from
-> the seq-cursor history (`GET /conversations/:id`): metrics are keyed PER TURN (not per chunk), so
-> they get their own route. `turns` is every SEALED turn's `TurnMetrics` in turn order (an in-flight
-> turn is absent until its metrics persist post-seal). The live `usage`/`step-complete`/`done`
-> packets it mirrors are transient (NOT persisted) and ride the `chat.delta`/NDJSON `AgentEvent`
-> stream you already consume — see `wire.reference.md`. The contract's OWN chat/history shapes are
-> otherwise unchanged from 0.2.0.
-
-## Endpoints (backend — CORS wildcard `*`, HTTP port 24203, WS port 24205)
-
-- `POST /chat` — body `ChatRequest` (JSON); response NDJSON stream, one `AgentEvent` per line;
-  resolved id also in `X-Conversation-Id` header.
-- `GET /models` — `ModelsResponse`.
-- `GET /conversations/:id?sinceSeq=<n>&beforeSeq=<s>&limit=<k>` — `ConversationHistoryResponse`:
-  RAW, append-order, seq-ordered slice with `n < seq < s`, windowed to the NEWEST `k` (all params
-  optional; NOT reconciled — dangling tool-calls returned as-is). `latestSeq` = last chunk's `seq`,
-  or the requested `sinceSeq` when caught up (empty `chunks`) — a TAIL cursor only; do not regress
-  a cursor from a windowed/backfill read. `limit`/`beforeSeq` must be positive ints → else 400.
-- `GET /conversations/:id/metrics` — `ConversationMetricsResponse`: every SEALED turn's `TurnMetrics`
-  in turn order (per-turn token + timing; NOT seq-filtered). IMPLEMENTED + LIVE-VERIFIED (probe 17/17).
-- `POST /chat/warm` — body `WarmRequest` (JSON) → `200 WarmResponse` (cache-warm usage incl.
-  `cachePct`); `409 { error }` when the conversation is currently generating; `400 { error }` on a
-  missing/invalid `conversationId`. The warm is NEVER persisted/streamed/folded into real usage.
-- `POST /conversations/:id/close` — no body → `200 CloseConversationResponse`. The EXPLICIT tab-close
-  affordance: aborts any in-flight turn (persists the partial; seals with `finishReason: "aborted"`)
-  AND stops + disables cache-warming (persisted OFF). Idempotent (`abortedTurn: false` when idle/unknown).
-- `POST /conversations/:id/queue` — body `QueueRequest { text }` → `200 QueueResponse`. Enqueue a user
-  message for mid-turn steering delivery (the WS `chat.queue` op is the FE's path). When a turn is
-  active, the message is queued + delivered at the next tool-result boundary (a `steering` `AgentEvent`
-  fires; the message-queue SURFACE updates). When idle, the enqueue STARTS a new turn with the message
-  as its opening prompt (`startedTurn: true`). Empty/whitespace `text` → `400 { error }`.
-- `GET /metrics/throughput?period=day|week|month&date=<...>` — `ThroughputResponse` (token-weighted
-  tokens/sec per model over the window). Not part of cache-warming; listed for completeness.
-- `GET /conversations/:id/cwd` — `CwdResponse` (`cwd` is `null` until set).
-- `PUT /conversations/:id/cwd` — body `SetCwdRequest` → `200 CwdResponse`; `400 { error }` if `cwd`
-  missing/empty. CORS allows `PUT`.
-- `GET /conversations/:id/lsp` — `LspStatusResponse`. LAZILY spawns+initializes the configured servers
-  on the first call per cwd (can take a moment; cached after); returns once each settles to
-  `connected`/`error`. `servers` is `[]` when `cwd` is null.
-- `GET /conversations/:id/reasoning-effort` — `ReasoningEffortResponse` (`reasoningEffort` is `null`
-  when never set ⇒ default `"high"` applies). Works for an unseen/draft id.
-- `PUT /conversations/:id/reasoning-effort` — body `SetReasoningEffortRequest` →
-  `200 ReasoningEffortResponse`; `400 { error }` on an unrecognized level (the message lists the
-  valid levels). Persists the conversation's sticky level; effective from the NEXT turn.
-- WebSocket on :24205 — ONE path-agnostic socket multiplexes surface ops
-  (`@dispatch/ui-contract`) + chat ops (below). Open once, send `WsClientMessage`, receive
-  `WsServerMessage`. Live `AgentEvent` deltas carry `conversationId`+`turnId` but **no `seq`**
-  (seq lives only on `StoredChunk`, obtained via the `sinceSeq` sync after `turn-sealed`).
-- DEFERRED (not built; do not depend on): `GET /conversations` (list). (The former deferred
-  `POST /conversations/:id/cancel` is superseded by `POST /conversations/:id/close`.)
-
-```ts
 /**
  * Transport contract — the typed description of Dispatch's client–server API
  * (HTTP + WebSocket).
  *
  * This package is types-only (zero runtime). It is the single shared surface
- * every client imports to know how to talk to the backend. Each side owns its
- * OWN (de)serialization: the contract is the SHAPES, not the codec. The
- * streaming response payload is the kernel's `AgentEvent` union, re-exported
- * here so a client has one import for the whole wire.
+ * every client imports to know how to talk to the backend — the CLI, the web
+ * frontend (in its own repo), any third-party client — and the transport-http /
+ * transport-ws servers import to know what they must accept and emit.
+ *
+ * Each side owns its OWN (de)serialization: there is deliberately no shared
+ * parse/serialize helper here (isolation-over-DRY). The contract is the SHAPES,
+ * not the codec. The streaming response payload is the kernel's `AgentEvent`
+ * union, re-exported here so a client has one import for the whole wire.
  *
  * The WebSocket carries BOTH chat ops (defined here) and surface ops (defined in
  * `@dispatch/ui-contract`) over one connection; the unified `WsClientMessage` /
- * `WsServerMessage` unions below compose them.
+ * `WsServerMessage` unions below compose them. Chat ops are new, non-colliding
+ * `type` variants — there is no channel wrapper, so the shipped surface protocol
+ * is unchanged.
  */
 
 import type { SurfaceClientMessage, SurfaceServerMessage } from "@dispatch/ui-contract";
@@ -238,6 +32,7 @@ import type {
 
 export type {
 	AgentEvent,
+	CompactionResult,
 	ConversationMeta,
 	ConversationStatus,
 	QueuedMessage,
@@ -290,11 +85,20 @@ export interface ChatRequest {
 /**
  * Response body for `GET /models` — the model catalog.
  *
- * Each entry is a model name in `<credentialName>/<model>` form: exactly the
- * string a client passes back as `ChatRequest.model`.
+ * Each entry in `models` is a model name in `<credentialName>/<model>` form:
+ * exactly the string a client passes back as `ChatRequest.model`.
+ * `modelInfo` is an optional map from the same `<credentialName>/<model>` key
+ * to model metadata (e.g. `contextWindow`). Additive — clients that only
+ * read `models` are unaffected.
  */
 export interface ModelsResponse {
 	readonly models: readonly string[];
+	readonly modelInfo?: Readonly<Record<string, ModelMetadata>>;
+}
+
+/** Per-model metadata returned alongside the model catalog. */
+export interface ModelMetadata {
+	readonly contextWindow?: number;
 }
 
 /**
@@ -352,6 +156,12 @@ export interface ConversationHistoryResponse {
  * (and per-step) token + timing metrics for a conversation, for a client
  * reopening a past conversation to render historical usage/latency.
  *
+ * This is a SEPARATE axis from the two other read concerns and is deliberately
+ * its own endpoint: the live `usage`/`step-complete`/`done` events are transient
+ * (not persisted), and `ConversationHistoryResponse` carries seq-cursor chunk
+ * CONTENT. Metrics are keyed per TURN (not per chunk) and so are not seq-filtered
+ * — hence a sibling route rather than a field on the history response.
+ *
  * `turns` is every SEALED turn's `TurnMetrics` in turn order. A turn appears only
  * after its metrics were persisted (post-seal); an in-flight or unsealed turn is
  * absent until then.
@@ -703,8 +513,8 @@ export interface ConversationOpenMessage {
 
 /**
  * Broadcast to all connected WS clients when a conversation's lifecycle status
- * changes (`active`/`idle`/`closed`). The FE uses this for cross-device tab
- * sync: `closed` → remove the tab; `active` → show a generating indicator.
+ * changes (active/idle/closed). The frontend uses this to sync tab state across
+ * devices in real time.
  */
 export interface ConversationStatusChangedMessage {
 	readonly type: "conversation.statusChanged";
@@ -770,8 +580,6 @@ export interface TitleResponse {
 	readonly title: string;
 }
 
-// ─── Compaction ──────────────────────────────────────────────────────────────
-
 /**
  * Response for `POST /conversations/:id/compact` — confirms the conversation
  * history was compacted (old messages summarized, recent messages retained).
@@ -784,19 +592,17 @@ export interface CompactResponse {
 }
 
 /**
- * Response for `GET /conversations/:id/compact-threshold` — the token count
- * at which automatic compaction triggers (0 = manual only; default 350000
- * when not stored).
+ * Response for `GET /conversations/:id/compact-percent` — the token count
+ * at which automatic compaction triggers (0 = manual only).
  */
-export interface CompactThresholdResponse {
+export interface CompactPercentResponse {
 	readonly conversationId: string;
 	readonly threshold: number;
 }
 
 /**
- * Request body for `PUT /conversations/:id/compact-threshold`.
+ * Request body for `PUT /conversations/:id/compact-percent`.
  */
-export interface SetCompactThresholdRequest {
+export interface SetCompactPercentRequest {
 	readonly threshold: number;
 }
-```
diff --git a/backend-handoff.md b/backend-handoff.md
index 8e86ce4..2768493 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,10 +5,10 @@
 > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
 > `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here.
 
-_Last updated: 2026-06-22 (CR-6 resolved by backend — incremental seq at step boundaries).
+_Last updated: 2026-06-22 (context window + percentage-based compact consumed).
 **FE is current on `[email protected]` / `[email protected]` / `[email protected]`.** 686 tests green.
-**Open asks: NONE.** All CRs resolved (CR-1 through CR-6). CR-6 not yet consumed by the FE —
-see §2 for the adoption plan._
+**Open asks: NONE.** All CRs resolved (CR-1 through CR-6) + context-window + compact-percent
+handoff consumed._
 
 ---
 
@@ -93,11 +93,12 @@ the turn seals and `syncTail` fetches everything.
 
 ## 3. Likely NEXT backend asks (heads-up, not yet requested)
 
-- **Model max context-window LIMIT** (the denominator for context size) — the FE renders
-  `contextSize / limit · pct%` + a fill bar in the composer status bar, but the limit is currently
-  HARDCODED to `1,000,000` as a placeholder (`MAX_CONTEXT` in `features/chat/ui/Composer.svelte`).
-  When a per-model `contextWindow` (max token capacity) ships, wire the real value through so the
-  bar/percent are accurate.
+- **Model max context-window LIMIT** → **CONSUMED ✅** — `GET /models` now returns
+  `modelInfo[model].contextWindow`. The Composer uses the real value (falls back to
+  1,000,000 when absent). The hardcoded `MAX_CONTEXT` is gone.
+- **Percentage-based auto-compact** → **CONSUMED ✅** — `compact-threshold` endpoint
+  renamed to `compact-percent`; field is now `percent` (0-100, default 85, 0 = manual).
+  CompactionView UI updated from token count to percent input (0-100).
 - **`GET /conversations`** — conversation list / sidebar (history explorer / switcher); could also
   expose a per-conversation "last model" so a reopened tab seeds its model from the server.
 - **LSP status over WS** (push) — today the FE HTTP-polls `GET /conversations/:id/lsp` on panel mount
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 57fe16f..ae09bd5 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -17,7 +17,7 @@
 		ReasoningEffortSelector,
 		type CompactNowResult,
 		type ReasoningEffortSaveResult,
-		type SaveCompactThresholdResult,
+		type SaveCompactPercentResult,
 	} from "../features/chat";
 	import { manifest as conversationCacheManifest } from "../features/conversation-cache";
 	import { manifest as markdownManifest } from "../features/markdown";
@@ -249,13 +249,13 @@
 			: { ok: false, error: result.error };
 	}
 
-	async function saveCompactThreshold(
-		threshold: number,
-	): Promise<SaveCompactThresholdResult | null> {
-		const result = await store.setCompactThreshold(threshold);
+	async function saveCompactPercent(
+		percent: number,
+	): Promise<SaveCompactPercentResult | null> {
+		const result = await store.setCompactPercent(percent);
 		if (result === null) return null;
 		return result.ok
-			? { ok: true, threshold: result.threshold }
+			? { ok: true, percent: result.percent }
 			: { ok: false, error: result.error };
 	}
 
@@ -393,6 +393,7 @@
 			onQueue={handleQueue}
 			onStop={handleStop}
 			contextSize={store.activeChat.currentContextSize}
+			contextWindow={store.modelInfo[store.activeModel]?.contextWindow}
 			status={store.activeChat.error
 				? "error"
 				: store.activeChat.generating
@@ -482,13 +483,13 @@
 			{/if}
 		{/key}
 	{:else if kind === "compaction"}
-		<!-- Re-mount per conversation so the threshold + feedback can't bleed across tabs. -->
+		<!-- Re-mount per conversation so the percent + feedback can't bleed across tabs. -->
 		{#key store.currentConversationId}
 			<CompactionView
-				threshold={store.compactThreshold}
+				percent={store.compactPercent}
 				canCompact={store.activeConversationId !== null}
 				{compactNow}
-				saveThreshold={saveCompactThreshold}
+				savePercent={saveCompactPercent}
 			/>
 		{/key}
 	{:else if kind === "settings"}
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index bb08585..3f78a97 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -1,8 +1,8 @@
 import type {
 	ChatDeltaMessage,
 	ChatErrorMessage,
+	CompactPercentResponse,
 	CompactResponse,
-	CompactThresholdResponse,
 	ConversationCompactedMessage,
 	ConversationHistoryResponse,
 	ConversationListResponse,
@@ -11,10 +11,11 @@ import type {
 	ConversationStatusChangedMessage,
 	CwdResponse,
 	LspStatusResponse,
+	ModelMetadata,
 	ModelsResponse,
 	ReasoningEffort,
 	ReasoningEffortResponse,
-	SetCompactThresholdRequest,
+	SetCompactPercentRequest,
 	SetCwdRequest,
 	SetReasoningEffortRequest,
 	WarmRequest,
@@ -73,9 +74,9 @@ export type CompactResult =
 	| { readonly ok: true; readonly response: CompactResponse }
 	| { readonly ok: false; readonly error: string };
 
-/** Outcome of `PUT /conversations/:id/compact-threshold`. */
-export type CompactThresholdResult =
-	| { readonly ok: true; readonly threshold: number }
+/** Outcome of `PUT /conversations/:id/compact-percent`. */
+export type CompactPercentResult =
+	| { readonly ok: true; readonly percent: number }
 	| { readonly ok: false; readonly error: string };
 
 /** Outcome of persisting a chat-limit setting (localStorage; FE-local). */
@@ -88,6 +89,8 @@ export interface AppStore {
 	readonly activeConversationId: string | null;
 	readonly activeChat: ChatStore;
 	readonly models: readonly string[];
+	/** Per-model metadata (contextWindow, etc.) from `GET /models`. */
+	readonly modelInfo: Readonly<Record<string, ModelMetadata>>;
 	readonly activeModel: string;
 	readonly catalog: ProtocolState["catalog"];
 	/** Every received surface spec, in catalog order — all auto-subscribed + expanded. */
@@ -152,17 +155,18 @@ export interface AppStore {
 	 */
 	stopGeneration(): void;
 	/**
-	 * The workspace conversation's auto-compact threshold (tokens). `0` = disabled
+	 * The workspace conversation's auto-compact percent (0-100). `0` = disabled
 	 * (manual only); a positive number = auto-compact triggers when the last
 	 * turn's input tokens exceed it. Seeded from the backend on focus change.
 	 */
-	readonly compactThreshold: number | null;
+	readonly compactPercent: number | null;
 	/**
-	 * Persist the workspace conversation's auto-compact threshold
-	 * (`PUT /conversations/:id/compact-threshold`). `0` disables; any positive
+	 * Persist the workspace conversation's auto-compact percent
+	 * (`PUT /conversations/:id/compact-percent`). `0` disables; 1-100 sets the
+	 * trigger percentage of the model's context window. Default (null) is 85.
 	 * number enables. Works for a draft too (its id survives promotion).
 	 */
-	setCompactThreshold(threshold: number): Promise<CompactThresholdResult | null>;
+	setCompactPercent(percent: number): Promise<CompactPercentResult | null>;
 	/**
 	 * Fetch the workspace conversation's language-server status (`GET /conversations/:id/lsp`).
 	 * The backend lazily spawns servers, so this may take a moment on the first call for a cwd.
@@ -233,6 +237,7 @@ function createMetricsSync(httpBase: string, fetchImpl: typeof fetch): MetricsSy
 export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 	let protocol = $state<ProtocolState>(protocolInitialState());
 	let models = $state<readonly string[]>([]);
+	let modelInfo = $state<Readonly<Record<string, ModelMetadata>>>({});
 	let activeModel = $state(DEFAULT_MODEL);
 
 	const wsLocation = typeof location !== "undefined" ? location : undefined;
@@ -358,23 +363,23 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		}
 	}
 
-	// The workspace conversation's auto-compact threshold. Seeded from the
+	// The workspace conversation's auto-compact percent. Seeded from the
 	// backend on focus change; null = not yet fetched. 0 = disabled.
-	let compactThreshold = $state<number | null>(null);
+	let compactPercent = $state<number | null>(null);
 
-	/** Refetch the workspace conversation's compact threshold (works for a draft too). */
-	async function refreshCompactThreshold(): Promise<void> {
+	/** Refetch the workspace conversation's compact percent (works for a draft too). */
+	async function refreshCompactPercent(): Promise<void> {
 		const id = workspaceConversationId();
-		compactThreshold = null;
+		compactPercent = null;
 		try {
 			const res = await fetchImpl(
-				`${httpBase}/conversations/${encodeURIComponent(id)}/compact-threshold`,
+				`${httpBase}/conversations/${encodeURIComponent(id)}/compact-percent`,
 			);
 			if (!res.ok) return;
-			const data = (await res.json()) as CompactThresholdResponse;
-			if (workspaceConversationId() === id) compactThreshold = data.threshold;
+			const data = (await res.json()) as CompactPercentResponse;
+			if (workspaceConversationId() === id) compactPercent = data.threshold;
 		} catch {
-			// Non-fatal: a threshold fetch failure just leaves null.
+			// Non-fatal: a percent fetch failure just leaves null.
 		}
 	}
 
@@ -542,7 +547,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		syncSubscriptions();
 		void refreshCwd();
 		void refreshReasoningEffort();
-		void refreshCompactThreshold();
+		void refreshCompactPercent();
 	}
 
 	// Conversation lifecycle status (backend-owned, pushed via WS +
@@ -676,6 +681,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		.then((data) => {
 			if (data === undefined) return;
 			models = data.models;
+			modelInfo = data.modelInfo ?? {};
 			if (data.models.length > 0 && !data.models.includes(activeModel)) {
 				const first = data.models[0];
 				if (first !== undefined) {
@@ -713,7 +719,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 	refreshActiveChat();
 	void refreshCwd();
 	void refreshReasoningEffort();
-	void refreshCompactThreshold();
+	void refreshCompactPercent();
 
 	// Fetch the authoritative open-conversation list from the backend (cross-
 	// device tab sync). Merges with the localStorage-restored tabs: opens new
@@ -733,6 +739,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		get models(): readonly string[] {
 			return models;
 		},
+		get modelInfo(): Readonly<Record<string, ModelMetadata>> {
+			return modelInfo;
+		},
 		get activeModel(): string {
 			return activeModel;
 		},
@@ -759,8 +768,8 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		get reasoningEffort(): ReasoningEffort | null {
 			return reasoningEffort;
 		},
-		get compactThreshold(): number | null {
-			return compactThreshold;
+		get compactPercent(): number | null {
+			return compactPercent;
 		},
 		get chatLimit(): number {
 			return chatLimit;
@@ -800,7 +809,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 				syncSubscriptions();
 				void refreshCwd();
 				void refreshReasoningEffort();
-				void refreshCompactThreshold();
+				void refreshCompactPercent();
 				// Now send on the promoted store
 				chatStores.get(conversationId)?.send(text);
 			} else {
@@ -837,7 +846,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			syncSubscriptions();
 			void refreshCwd();
 			void refreshReasoningEffort();
-			void refreshCompactThreshold();
+			void refreshCompactPercent();
 		},
 
 		selectTab(conversationId: string): void {
@@ -850,7 +859,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			syncSubscriptions();
 			void refreshCwd();
 			void refreshReasoningEffort();
-			void refreshCompactThreshold();
+			void refreshCompactPercent();
 		},
 
 		closeTab(conversationId: string): void {
@@ -988,12 +997,12 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			}
 		},
 
-		async setCompactThreshold(threshold: number): Promise<CompactThresholdResult | null> {
+		async setCompactPercent(percent: number): Promise<CompactPercentResult | null> {
 			const id = workspaceConversationId();
-			const body: SetCompactThresholdRequest = { threshold };
+			const body: SetCompactPercentRequest = { threshold: percent };
 			try {
 				const res = await fetchImpl(
-					`${httpBase}/conversations/${encodeURIComponent(id)}/compact-threshold`,
+					`${httpBase}/conversations/${encodeURIComponent(id)}/compact-percent`,
 					{
 						method: "PUT",
 						headers: { "content-type": "application/json" },
@@ -1004,16 +1013,16 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 					const errBody = (await res.json().catch(() => null)) as { error?: string } | null;
 					return {
 						ok: false,
-						error: errBody?.error ?? `Set compact threshold failed (HTTP ${res.status})`,
+						error: errBody?.error ?? `Set compact percent failed (HTTP ${res.status})`,
 					};
 				}
-				const data = (await res.json()) as CompactThresholdResponse;
-				if (workspaceConversationId() === id) compactThreshold = data.threshold;
-				return { ok: true, threshold: data.threshold };
+				const data = (await res.json()) as CompactPercentResponse;
+				if (workspaceConversationId() === id) compactPercent = data.threshold;
+				return { ok: true, percent: data.threshold };
 			} catch (err) {
 				return {
 					ok: false,
-					error: err instanceof Error ? err.message : "Set compact threshold request failed",
+					error: err instanceof Error ? err.message : "Set compact percent request failed",
 				};
 			}
 		},
diff --git a/src/features/chat/index.ts b/src/features/chat/index.ts
index 9c65cd4..1596c53 100644
--- a/src/features/chat/index.ts
+++ b/src/features/chat/index.ts
@@ -17,7 +17,7 @@ export {
 export type { ChatStore, ChatStoreDependencies } from "./store.svelte";
 export { createChatStore } from "./store.svelte";
 export { default as ChatView } from "./ui/ChatView.svelte";
-export type { CompactNowResult, SaveCompactThresholdResult } from "./ui/CompactionView.svelte";
+export type { CompactNowResult, SaveCompactPercentResult } from "./ui/CompactionView.svelte";
 export { default as CompactionView } from "./ui/CompactionView.svelte";
 export { default as Composer } from "./ui/Composer.svelte";
 export { default as ModelSelector } from "./ui/ModelSelector.svelte";
diff --git a/src/features/chat/ui/CompactionView.svelte b/src/features/chat/ui/CompactionView.svelte
index ce2a0a0..7bec984 100644
--- a/src/features/chat/ui/CompactionView.svelte
+++ b/src/features/chat/ui/CompactionView.svelte
@@ -3,54 +3,54 @@
 		| { readonly ok: true; readonly messagesSummarized: number; readonly messagesKept: number }
 		| { readonly ok: false; readonly error: string };
 
-	export type SaveCompactThresholdResult =
-		| { readonly ok: true; readonly threshold: number }
+	export type SaveCompactPercentResult =
+		| { readonly ok: true; readonly percent: number }
 		| { readonly ok: false; readonly error: string };
 
 	let {
-		threshold,
+		percent,
 		canCompact,
 		compactNow,
-		saveThreshold,
+		savePercent,
 	}: {
-		/** The conversation's auto-compact threshold, or null when not yet fetched. 0 = disabled. */
-		threshold: number | null;
+		/** The conversation's auto-compact percent (0-100), or null when not yet fetched. 0 = disabled. */
+		percent: number | null;
 		/** Whether a real conversation is focused (a draft has nothing to compact). */
 		canCompact: boolean;
 		compactNow: () => Promise<CompactNowResult | null>;
-		saveThreshold: (threshold: number) => Promise<SaveCompactThresholdResult | null>;
+		savePercent: (percent: number) => Promise<SaveCompactPercentResult | null>;
 	} = $props();
 
-	const DEFAULT_THRESHOLD = 350000;
+	const DEFAULT_PERCENT = 85;
 
 	let compacting = $state(false);
 	let compactError = $state<string | null>(null);
 	let compactResult = $state<{ summarized: number; kept: number } | null>(null);
 
-	let thresholdInput = $state("");
-	let savingThreshold = $state(false);
-	let thresholdError = $state<string | null>(null);
-	let thresholdSaved = $state(false);
+	let percentInput = $state("");
+	let savingPercent = $state(false);
+	let percentError = $state<string | null>(null);
+	let percentSaved = $state(false);
 
 	// Sync the input from the prop when it changes (focus switch / initial load).
-	let lastThreshold = $state<number | null>(null);
+	let lastPercent = $state<number | null>(null);
 	$effect(() => {
-		if (threshold !== lastThreshold) {
-			lastThreshold = threshold;
-			thresholdInput = threshold !== null ? String(threshold) : "";
-			thresholdError = null;
-			thresholdSaved = false;
+		if (percent !== lastPercent) {
+			lastPercent = percent;
+			percentInput = percent !== null ? String(percent) : "";
+			percentError = null;
+			percentSaved = false;
 		}
 	});
 
-	const thresholdLabel = $derived(
-		threshold == null
+	const percentLabel = $derived(
+		percent == null
 			? "Loading…"
-			: threshold === 0
+			: percent === 0
 				? "Disabled (manual only)"
-				: threshold === DEFAULT_THRESHOLD
-					? `${threshold.toLocaleString("en-US")} (default)`
-					: threshold.toLocaleString("en-US"),
+				: percent === DEFAULT_PERCENT
+					? `${percent}% (default)`
+					: `${percent}%`,
 	);
 
 	async function handleCompact() {
@@ -68,22 +68,22 @@
 		}
 	}
 
-	async function handleSaveThreshold() {
-		const value = Number.parseInt(thresholdInput, 10);
-		if (Number.isNaN(value) || value < 0) {
-			thresholdError = "Must be a non-negative number";
+	async function handleSavePercent() {
+		const value = Number.parseInt(percentInput, 10);
+		if (Number.isNaN(value) || value < 0 || value > 100) {
+			percentError = "Must be 0-100";
 			return;
 		}
-		savingThreshold = true;
-		thresholdError = null;
-		thresholdSaved = false;
-		const result = await saveThreshold(value);
-		savingThreshold = false;
+		savingPercent = true;
+		percentError = null;
+		percentSaved = false;
+		const result = await savePercent(value);
+		savingPercent = false;
 		if (result === null) return;
 		if (result.ok) {
-			thresholdSaved = true;
+			percentSaved = true;
 		} else {
-			thresholdError = result.error;
+			percentError = result.error;
 		}
 	}
 </script>
@@ -120,33 +120,34 @@
 		{/if}
 	</section>
 
-	<!-- Auto-compact threshold -->
+	<!-- Auto-compact percent -->
 	<section class="flex flex-col gap-1">
-		<span class="text-xs font-semibold uppercase opacity-60">Auto-compact threshold</span>
+		<span class="text-xs font-semibold uppercase opacity-60">Auto-compact percent</span>
 		<div class="flex items-center gap-2">
 			<input
 				type="number"
-				class="input input-bordered input-sm w-32"
+				class="input input-bordered input-sm w-24"
 				min="0"
-				placeholder={DEFAULT_THRESHOLD.toLocaleString("en-US")}
-				value={thresholdInput}
-				disabled={savingThreshold}
-				onchange={handleSaveThreshold}
-				aria-label="Compact threshold (tokens)"
+				max="100"
+				placeholder={String(DEFAULT_PERCENT)}
+				value={percentInput}
+				disabled={savingPercent}
+				onchange={handleSavePercent}
+				aria-label="Compact percent (0-100)"
 			/>
-			<span class="text-xs opacity-60">tokens</span>
-			{#if savingThreshold}
+			<span class="text-xs opacity-60">%</span>
+			{#if savingPercent}
 				<span class="loading loading-spinner loading-xs"></span>
 			{/if}
 		</div>
 		<p class="text-xs opacity-50">
-			Current: {thresholdLabel}
+			Current: {percentLabel}
 			<br />
-			0 disables auto-compact. Default is {DEFAULT_THRESHOLD.toLocaleString("en-US")}.
+			0 disables auto-compact. Default is {DEFAULT_PERCENT}%.
 		</p>
-		{#if thresholdError}
-			<p class="text-xs text-error">{thresholdError}</p>
-		{:else if thresholdSaved}
+		{#if percentError}
+			<p class="text-xs text-error">{percentError}</p>
+		{:else if percentSaved}
 			<p class="text-xs text-success">Saved.</p>
 		{/if}
 	</section>
diff --git a/src/features/chat/ui/Composer.svelte b/src/features/chat/ui/Composer.svelte
index 7030153..fe9ea94 100644
--- a/src/features/chat/ui/Composer.svelte
+++ b/src/features/chat/ui/Composer.svelte
@@ -1,9 +1,7 @@
 <script lang="ts">
 	import { computeContextUsage, formatCompactTokens } from "../../../core/metrics";
 
-	// Placeholder context-window limit until the backend reports a real
-	// per-model max (see backend-handoff §3). Hardcoded to 1,000,000 tokens.
-	const MAX_CONTEXT = 1_000_000;
+	const FALLBACK_CONTEXT_WINDOW = 1_000_000;
 	const MAX_LINES = 7;
 
 	let {
@@ -11,6 +9,7 @@
 		onQueue,
 		onStop,
 		contextSize = undefined,
+		contextWindow = undefined,
 		status = "idle",
 	}: {
 		onSend: (text: string) => void;
@@ -26,6 +25,8 @@
 		// Current context occupancy (latest turn's contextSize), or `undefined`
 		// when unknown — the status bar then shows "— tokens", never 0%.
 		contextSize?: number | undefined;
+		/** Per-model context window (max tokens) from `GET /models` modelInfo. */
+		contextWindow?: number | undefined;
 		// Coarse agent status for the status-bar icon.
 		status?: "idle" | "running" | "error";
 	} = $props();
@@ -34,7 +35,8 @@
 	let inputEl: HTMLTextAreaElement | undefined;
 
 	const hasText = $derived(text.trim().length > 0);
-	const usage = $derived(computeContextUsage(contextSize, MAX_CONTEXT));
+	const effectiveMax = $derived(contextWindow ?? FALLBACK_CONTEXT_WINDOW);
+	const usage = $derived(computeContextUsage(contextSize, effectiveMax));
 	const hasUsage = $derived(contextSize !== undefined);
 
 	// One button, three modes: