From 1764e3e5dff836255d121a933dd92542368346f9 Mon Sep 17 00:00:00 2001
From: Adam Malczewski <github@tradam.dev>
Date: Fri, 12 Jun 2026 18:26:00 +0900
Subject: feat(chat): chat limit — bulk quarter-unload, 75% fresh-load window,
 show-earlier page-in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Long transcripts no longer grow unbounded: past the chat limit (default 256
chunks, localStorage dispatch.chatLimit) the oldest ceil(limit/4) committed
chunks are unloaded in ONE bulk pass — never one-per-delta (old Dispatch's
scroll-jump-per-step bug) — and only while the reader is stuck to the bottom
(scrolled-up readers defer the trim; it catches up in whole quarters). A fresh
page load windows to the newest floor(0.75*limit). Unloading is purely local
(IndexedDB cache + server keep everything); a hiddenBeforeSeq watermark keeps
history merges from resurrecting unloaded chunks, and a 'Show earlier messages'
affordance pages a quarter back in from the cache with scroll-anchor
preservation. Thinking-collapse render keys stay stable across trims via a
hiddenThinkingCount ordinal base.

- core/chunks/trim.ts: pure policy (trim/window/restore/normalize) + tests
- chat store: chatLimit + canUnload deps, windowed load, showEarlier()
- composition root: dispatch.chatLimit localStorage knob + unload gate wired
  to smart-scroll isAtBottom()
- backend CR-5 OPENED (not a blocker): ?limit=/?beforeSeq= on
  GET /conversations/:id (courier backend-handoff-chat-limit.md)
- scripts/live-probe.ts: fix pre-existing stale TurnMetricsEntry reads
  (m1.usage -> total.usage) that crashed the probe; 17/17 live checks pass
---
 AGENTS.md                                         |   6 +-
 GLOSSARY.md                                       |   3 +
 backend-handoff-chat-limit.md                     |  66 +++++
 backend-handoff.md                                |  34 ++-
 scripts/live-probe.ts                             |  20 +-
 src/app/App.svelte                                |  34 ++-
 src/app/store.svelte.ts                           |  32 +++
 src/core/chunks/index.ts                          |  12 +
 src/core/chunks/reducer.ts                        |  11 +-
 src/core/chunks/trim.test.ts                      | 218 ++++++++++++++++
 src/core/chunks/trim.ts                           | 149 +++++++++++
 src/core/chunks/types.ts                          |  17 ++
 src/features/chat/store.svelte.ts                 |  75 +++++-
 src/features/chat/store.test.ts                   | 289 ++++++++++++++++++++++
 src/features/chat/ui.test.ts                      |  39 +++
 src/features/chat/ui/ChatView.svelte              |  46 +++-
 src/features/smart-scroll/ui/controller.svelte.ts |  10 +
 17 files changed, 1038 insertions(+), 23 deletions(-)
 create mode 100644 backend-handoff-chat-limit.md
 create mode 100644 src/core/chunks/trim.test.ts
 create mode 100644 src/core/chunks/trim.ts

diff --git a/AGENTS.md b/AGENTS.md
index 3f7d428..a4c2f36 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -136,8 +136,10 @@ browser effects, run a LIVE probe:
 ## Status
 Slices 1–3 DONE + committed (surface system + WS; conversation transcript cache + delta
 streaming; tabs + model selector + DaisyUI/dracula), plus per-conversation cwd + LSP view,
-context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, and
-multi-client live view (subscribe/reconnect + the user prompt on the event stream). Plan in
+context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, multi-client
+live view (subscribe/reconnect + the user prompt on the event stream), and the chat limit
+(bulk quarter-unload past `dispatch.chatLimit`, 75% fresh-load window, show-earlier page-in;
+`core/chunks/trim.ts`; backend CR-5 open for `?limit=`/`?beforeSeq=`). Plan in
 `../arch-rewrite/notes/frontend-design.md` §10.
 
 ## Reports
diff --git a/GLOSSARY.md b/GLOSSARY.md
index 2a6904a..2f4f199 100644
--- a/GLOSSARY.md
+++ b/GLOSSARY.md
@@ -37,3 +37,6 @@
 | **surface interpreter** | The generic renderer: field kind → component. Knows kinds, never surface ids. | — |
 | **metrics bubble** | The FE chat element that renders a turn's **turn metrics** (one per-turn total) and **step metrics** (one per step) as muted system-style bubbles at a turn's tail. UI presentation of `TurnMetrics`/`StepMetrics`; never a surface. | telemetry bubble, usage bubble, stats bubble |
 | **TPS** (tokens per second) | A FE-DERIVED decode rate: `outputTokens / (decodeMs / 1000)` (per step; per turn over Σ `decodeMs`), falling back to `genTotalMs` when `decodeMs` is absent. The backend-recommended basis (excludes first-token latency). Not carried on the wire; omitted when timing is absent. | throughput |
+| **chat limit** | The max LOADED chunks per conversation (default 256; localStorage `dispatch.chatLimit`, no UI yet) before the oldest quarter is unloaded. Counts **chunks** (committed + provisional + accumulating). Policy in `core/chunks/trim.ts`. | chunk limit, message limit, history limit |
+| **unload** | Drop the oldest COMMITTED chunks from the in-memory transcript (and DOM) past the **chat limit** — in BULK (`ceil(limit/4)` per pass, deferred while the reader is scrolled up), never one-per-delta (old Dispatch's scroll-jump bug). Purely local: the IndexedDB cache and the server keep everything; `TranscriptState.hiddenBeforeSeq` is the watermark. Distinct from the conversation-cache's cross-conversation **eviction**. | evict (reserved for the cross-conversation cache), prune, drop |
+| **show earlier** | The affordance at the top of a transcript with unloaded history ("Show earlier messages"): pages one unload-unit back in from the local cache (later: the server via CR-5 `?beforeSeq=`), preserving the reader's scroll position. | load more, pagination |
diff --git a/backend-handoff-chat-limit.md b/backend-handoff-chat-limit.md
new file mode 100644
index 0000000..da20583
--- /dev/null
+++ b/backend-handoff-chat-limit.md
@@ -0,0 +1,66 @@
+# Backend handoff — CR-5: history windowing for the FE chat limit (courier doc)
+
+> **From:** dispatch-web · **To:** arch-rewrite · **Courier:** the user.
+> Companion to the living `backend-handoff.md` (§2 CR-5). 2026-06-12.
+
+## Context — what the FE is building (no backend blocker)
+
+The FE is adding a **chat limit**: in very long conversations the transcript unloads old
+chunks from memory/DOM so the browser stays fast. Policy (already decided with the user):
+
+- Limit `L` counts **chunks** (default 256, localStorage-configurable).
+- When the loaded count exceeds `L`, the FE unloads the oldest `ceil(L/4)` chunks in ONE
+  bulk pass (e.g. `L=100`: at 101 chunks it unloads 25 → 76 remain). Bulk-on-threshold —
+  NOT one-per-delta like old Dispatch — to kill the scroll-jump-per-step failure mode.
+- A fresh page load shows only the newest `floor(0.75 × L)` chunks (192 for the default).
+- A "Show earlier messages" affordance pages older history back in (today: from the FE's
+  IndexedDB cache, which still holds it).
+
+**This works TODAY with no backend change** — the FE fetches everything and windows in
+memory. The ask below makes the *fresh-browser* case cheap: with an empty IndexedDB cache,
+`GET /conversations/:id?sinceSeq=0` currently returns the ENTIRE conversation, so a
+10k-chunk chat downloads + parses megabytes only for the FE to display 192 chunks.
+
+## The ask (additive, `transport-contract` bump)
+
+Extend `GET /conversations/:id` with two OPTIONAL query params:
+
+1. **`limit=<n>`** — return only the **newest** `n` chunks of the selection (still
+   ascending seq order in the response). Selection semantics otherwise unchanged
+   (`seq > sinceSeq`).
+   - **If the selection has ≤ `n` chunks, return everything** — the FE will routinely send
+     a largish number (e.g. `limit=192`) against short conversations and expects the
+     normal full response (that flow must stay cheap and exact).
+   - `limit` absent → exactly today's behavior (full selection). Existing FE versions keep
+     working unchanged.
+2. **`beforeSeq=<s>`** — restrict the selection to `seq < s` (combined with `limit`: the
+   newest `n` chunks below `s`, ascending). This is the "Show earlier messages" page-in
+   path for history the FE's local cache doesn't have (e.g. a fresh browser that
+   initial-loaded with `limit`). `beforeSeq` + `sinceSeq` together = `sinceSeq < seq < s`
+   (we only ever send one of them, but defined semantics beat undefined).
+
+And one additive response field on `ConversationHistoryResponse`:
+
+3. **`earliestSeq?: number`** (or `hasOlder: boolean` — your pick, flag your choice in the
+   reply) — the conversation's overall lowest seq (or whether chunks exist below the
+   returned window). The FE needs to know whether to OFFER "Show earlier messages" when
+   its local cache is exhausted. Without it the FE can only guess (seq 1 = start works if
+   seqs are guaranteed to start at 1 and be gap-free — if you'd rather just CONFIRM that
+   invariant in writing, the FE can derive `hasOlder` from `chunks[0].seq > 1` and we skip
+   the new field entirely; cheapest option, totally fine).
+
+## How the FE will consume it
+
+- Fresh load (empty cache): `GET /conversations/:id?sinceSeq=0&limit=<floor(0.75×L)>`.
+- Incremental tail sync (cache warm): unchanged `?sinceSeq=<maxCachedSeq>` (no limit — the
+  tail since last sync is small by construction).
+- Show-earlier beyond local cache: `GET /conversations/:id?beforeSeq=<oldestLoadedSeq>&limit=<ceil(L/4)>`.
+- The FE's IndexedDB cache is seq-keyed + dedup-by-seq and already tolerates a
+  non-contiguous prefix (a windowed suffix), so no cache-format change is needed FE-side.
+
+## Priority / sequencing
+
+Not a blocker — the FE ships the limit feature against the current contract (full fetch +
+in-memory windowing) and lights up the `limit`/`beforeSeq` params when you ship. Ship
+whenever convenient; please bump `transport-contract` and note the params in the reply
+handoff so the FE re-pins + re-mirrors.
diff --git a/backend-handoff.md b/backend-handoff.md
index 30a1d64..4410b44 100644
--- a/backend-handoff.md
+++ b/backend-handoff.md
@@ -5,12 +5,15 @@
 > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user.
 > `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here.
 
-_Last updated: 2026-06-12 (CR-4 consumed). **FE is current on `ui-contract@0.2.0` /
-`transport-contract@0.9.0` / `wire@0.6.0`.** All handoffs to date are consumed: surfaces + WS,
-conversation transcript/metrics, tabs + model selector, cache-warming (incl. authoritative timer +
-retention + cache-rate fix + the CR-4 lifecycle below), **per-conversation cwd + LSP status**,
-**context size**, and **turn continuity + multi-client live view**.
-**Open asks: NONE.** CR-1/CR-2/CR-4 all RESOLVED ✅ (see §2); §3 lists likely next asks.
+_Last updated: 2026-06-12 (CR-5 opened: chat-limit history windowing). **FE is current on
+`ui-contract@0.2.0` / `transport-contract@0.9.0` / `wire@0.6.0`.** All handoffs to date are
+consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming
+(incl. authoritative timer + retention + cache-rate fix + the CR-4 lifecycle below),
+**per-conversation cwd + LSP status**, **context size**, and **turn continuity + multi-client
+live view**.
+**Open asks: ONE — CR-5** (`?limit=`/`?beforeSeq=` on `GET /conversations/:id`; NOT a blocker,
+courier doc `backend-handoff-chat-limit.md`). CR-1/CR-2/CR-4 all RESOLVED ✅ (see §2); §3 lists
+likely next asks.
 **CR-3 (watcher couldn't see the USER prompt until seal) → RESOLVED ✅** — backend shipped the
 `user-message` turn event; FE re-pinned + consumption live.
 The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed**._
@@ -81,7 +84,24 @@ Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-con
 
 ## 2. Open asks FOR THE BACKEND
 
-**None open.** Resolved history below.
+**One open: CR-5.** Resolved history below it.
+
+### CR-5 — history windowing for the FE chat limit → **OPEN (not a blocker)** (courier `backend-handoff-chat-limit.md`)
+
+The FE is shipping a **chat limit** (default 256 chunks, localStorage-configurable): past the
+limit it bulk-unloads the oldest `ceil(L/4)` chunks (scroll-jump-free, unlike old Dispatch's
+one-per-delta eviction), and a fresh page load shows only the newest `floor(0.75×L)`. Works
+today by fetching the full history and windowing in memory — the ask makes the FRESH-BROWSER
+load cheap (today `?sinceSeq=0` returns the whole conversation; a 10k-chunk chat downloads
+megabytes to show 192 chunks). Additive `transport-contract` asks:
+- **`?limit=<n>`** on `GET /conversations/:id` — newest `n` of the selection, still ascending;
+  **≤ `n` chunks exist ⇒ return everything** (the FE always sends it; short chats must stay
+  exact). Absent ⇒ today's behavior.
+- **`?beforeSeq=<s>`** — selection `seq < s` (with `limit`: newest `n` below `s`) — the
+  "Show earlier messages" page-in path once the FE's local cache is exhausted.
+- **`earliestSeq?`/`hasOlder?` response field** — OR simply confirm in writing that seqs start
+  at 1 gap-free, and the FE derives `hasOlder` from `chunks[0].seq > 1` (cheapest, preferred).
+Full consumption plan + sequencing in the courier doc.
 
 ### CR-1 — Loaded Extensions as a true table → **RESOLVED ✅** (shipped + consumed)
 
diff --git a/scripts/live-probe.ts b/scripts/live-probe.ts
index 2b2880b..7099b44 100644
--- a/scripts/live-probe.ts
+++ b/scripts/live-probe.ts
@@ -204,25 +204,28 @@ async function main() {
 	record("turn 1 committed transcript has assistant text", committedText.length > 0);
 
 	// ─── Metrics: LIVE token + timing (wire@0.3.0 usage/step-complete/done) ──────
+	// (TurnMetricsEntry is `{ turnId, steps, total }` — the turn aggregate lives on
+	// `total`, present once the live `done` folded.)
 	const liveTurns = selectOrderedTurnMetrics(t1.metrics);
 	const m1 = liveTurns[0];
+	const m1Total = m1?.total ?? null;
 	record(
 		"turn 1 LIVE metrics: a turn with output tokens",
-		m1 !== undefined && m1.usage.outputTokens > 0,
-		m1
-			? `in=${m1.usage.inputTokens} out=${m1.usage.outputTokens} steps=${m1.steps.length}`
-			: "no turn",
+		m1Total !== null && m1Total.usage.outputTokens > 0,
+		m1Total
+			? `in=${m1Total.usage.inputTokens} out=${m1Total.usage.outputTokens} steps=${m1?.steps.length}`
+			: "no finalized turn total",
 	);
 	if (m1 !== undefined) {
 		const anyGen = m1.steps.some((s) => s.genTotalMs !== undefined);
 		const anyTtft = m1.steps.some((s) => s.ttftMs !== undefined);
 		note(
-			`live timing: durationMs=${m1.durationMs ?? "—"}, ` +
+			`live timing: durationMs=${m1Total?.durationMs ?? "—"}, ` +
 				`genTotalMs present=${anyGen}, ttftMs present=${anyTtft}`,
 		);
 		record(
 			"turn 1 LIVE metrics carries timing (durationMs or step genTotalMs)",
-			m1.durationMs !== undefined || anyGen,
+			m1Total?.durationMs !== undefined || anyGen,
 			"requires the backend runtime to have a clock",
 		);
 	}
@@ -248,10 +251,11 @@ async function main() {
 			applyDurableMetrics(initialMetricsState(), dm.turns),
 		);
 		const d1 = durableMerged[0];
+		const d1Total = d1?.total ?? null;
 		record(
 			"durable /metrics turn has token usage",
-			d1 !== undefined && d1.usage.outputTokens > 0,
-			d1 ? `out=${d1.usage.outputTokens} steps=${d1.steps.length}` : "no turn",
+			d1Total !== null && d1Total.usage.outputTokens > 0,
+			d1Total ? `out=${d1Total.usage.outputTokens} steps=${d1?.steps.length}` : "no turn total",
 		);
 	}
 
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 50f24e7..4c5a82b 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -1,5 +1,6 @@
 <script lang="ts">
 	import type { InvokeMessage } from "@dispatch/ui-contract";
+	import { tick } from "svelte";
 	import Table from "../components/Table.svelte";
 	import {
 		CacheWarmingView,
@@ -76,6 +77,31 @@
 	let transcriptEl = $state<HTMLElement | undefined>();
 	let transcriptContentEl = $state<HTMLElement | undefined>();
 
+	// Chat-limit unload gate: old chunks may be unloaded only while the reader is
+	// stuck to the bottom. While stuck, a trim removes content far ABOVE the
+	// viewport and the controller re-pins to the bottom — no visible jump; while
+	// reading history, trimming is deferred instead of yanking the page (the old
+	// Dispatch bug). In an $effect so a swapped store prop would be re-wired.
+	$effect(() => {
+		store.attachUnloadGate(() => smartScroll.isAtBottom());
+	});
+
+	// "Show earlier messages": page older history back in, preserving the reader's
+	// viewport position — prepended content grows scrollHeight, so shift scrollTop
+	// by the growth (the manual analogue of CSS scroll anchoring, which not every
+	// engine applies here).
+	async function handleShowEarlier(): Promise<void> {
+		const el = transcriptEl;
+		const prevHeight = el?.scrollHeight ?? 0;
+		const prevTop = el?.scrollTop ?? 0;
+		await store.activeChat.showEarlier();
+		await tick();
+		if (el) {
+			const delta = el.scrollHeight - prevHeight;
+			if (delta > 0) el.scrollTop = prevTop + delta;
+		}
+	}
+
 	// Attach/detach the controller to the live scroll element + content (disposed on
 	// unmount). The content element is observed (ResizeObserver) so the view follows
 	// height changes that aren't a transcript append.
@@ -201,7 +227,13 @@
 			<div bind:this={transcriptEl} class="h-full overflow-y-auto">
 				<div bind:this={transcriptContentEl}>
 					{#key store.activeConversationId}
-						<ChatView chunks={store.activeChat.chunks} turnMetrics={store.activeChat.turnMetrics} />
+						<ChatView
+							chunks={store.activeChat.chunks}
+							turnMetrics={store.activeChat.turnMetrics}
+							hasEarlier={store.activeChat.hasEarlier}
+							onShowEarlier={handleShowEarlier}
+							thinkingKeyBase={store.activeChat.thinkingKeyBase}
+						/>
 					{/key}
 				</div>
 			</div>
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index 2837bb5..379805f 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -15,6 +15,7 @@ import { createIdbChunkStore } from "../adapters/idb";
 import { createLocalStore } from "../adapters/local-storage";
 import type { WebSocketLike } from "../adapters/ws";
 import { createSurfaceSocket, type SurfaceSocketOptions } from "../adapters/ws";
+import { normalizeChatLimit } from "../core/chunks";
 import {
 	applyServerMessage,
 	getSurfaceSpec,
@@ -88,6 +89,15 @@ export interface AppStore {
 	 * The backend lazily spawns servers, so this may take a moment on the first call for a cwd.
 	 */
 	lspStatus(): Promise<LspResult | null>;
+	/**
+	 * Wire the chat-limit unload gate (composition-root injection, called once by
+	 * the shell after it owns the scroll region): unloading old chunks is allowed
+	 * only while the gate returns true — i.e. the reader is stuck to the bottom —
+	 * so a trim never yanks content out from under someone reading history.
+	 * Before attachment unloading is allowed (the initial view starts at the
+	 * bottom).
+	 */
+	attachUnloadGate(gate: () => boolean): void;
 	dispose(): void;
 }
 
@@ -157,6 +167,22 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 	});
 	const tabsStore: TabsStore = createTabsStore(storageAdapter);
 
+	// The chat limit (max loaded chunks per conversation) — a persisted local
+	// setting with no UI yet: edit `localStorage["dispatch.chatLimit"]`. The
+	// default is written back on first run so the knob is discoverable.
+	const chatLimitStore = createLocalStore<number>("dispatch.chatLimit", {
+		storage: localStorageOpt,
+	});
+	const storedChatLimit = chatLimitStore.load();
+	const chatLimit = normalizeChatLimit(storedChatLimit);
+	if (storedChatLimit === null) {
+		chatLimitStore.save(chatLimit);
+	}
+
+	// Unload gate — attached by the shell once it owns the scroll region (see
+	// `AppStore.attachUnloadGate`). Until then, unloading is allowed.
+	let unloadGate: (() => boolean) | null = null;
+
 	const cache: ConversationCache = createConversationCache(
 		createIdbChunkStore({ indexedDB: indexedDBFactory }),
 	);
@@ -178,6 +204,8 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			historySync,
 			metricsSync,
 			cache,
+			chatLimit,
+			canUnload: () => (unloadGate === null ? true : unloadGate()),
 		});
 	}
 
@@ -607,6 +635,10 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 				};
 			}
 		},
+		attachUnloadGate(gate: () => boolean): void {
+			unloadGate = gate;
+		},
+
 		dispose(): void {
 			for (const store of chatStores.values()) {
 				store.dispose();
diff --git a/src/core/chunks/index.ts b/src/core/chunks/index.ts
index ecfee74..6ab0f35 100644
--- a/src/core/chunks/index.ts
+++ b/src/core/chunks/index.ts
@@ -8,6 +8,18 @@ export {
 	initialState,
 } from "./reducer";
 export { selectChunks, selectGenerating, selectMessages } from "./selectors";
+export {
+	DEFAULT_CHAT_LIMIT,
+	initialWindowSize,
+	MAX_CHAT_LIMIT,
+	MIN_CHAT_LIMIT,
+	normalizeChatLimit,
+	restoreEarlier,
+	selectHasEarlier,
+	trimTranscript,
+	unloadCount,
+	windowTranscript,
+} from "./trim";
 export type {
 	AccumulatingChunk,
 	ProvisionalChunk,
diff --git a/src/core/chunks/reducer.ts b/src/core/chunks/reducer.ts
index 7ce55ce..0a57839 100644
--- a/src/core/chunks/reducer.ts
+++ b/src/core/chunks/reducer.ts
@@ -10,6 +10,8 @@ export function initialState(): TranscriptState {
 		currentTurnId: null,
 		latestUsage: null,
 		sealedTurnId: null,
+		hiddenBeforeSeq: 0,
+		hiddenThinkingCount: 0,
 		generating: false,
 	};
 }
@@ -41,6 +43,10 @@ function flushAccumulating(
  * Dedupes by seq (new wins), keeps seq-monotonic order, idempotent.
  * When sealedTurnId is set, drops all provisional chunks (now superseded)
  * and clears sealedTurnId.
+ *
+ * Chunks below the chat-limit unload watermark (`hiddenBeforeSeq`) are
+ * REJECTED: a full-cache or tail merge must not resurrect what the trim
+ * unloaded. Restoring earlier history goes through `restoreEarlier` instead.
  */
 export function applyHistory(
 	state: TranscriptState,
@@ -48,7 +54,10 @@ export function applyHistory(
 ): TranscriptState {
 	const seqMap = new Map<number, StoredChunk>();
 	for (const c of state.committed) seqMap.set(c.seq, c);
-	for (const c of chunks) seqMap.set(c.seq, c);
+	for (const c of chunks) {
+		if (c.seq < state.hiddenBeforeSeq) continue;
+		seqMap.set(c.seq, c);
+	}
 	const committed = Array.from(seqMap.values()).sort((a, b) => a.seq - b.seq);
 
 	if (state.sealedTurnId !== null) {
diff --git a/src/core/chunks/trim.test.ts b/src/core/chunks/trim.test.ts
new file mode 100644
index 0000000..091b646
--- /dev/null
+++ b/src/core/chunks/trim.test.ts
@@ -0,0 +1,218 @@
+import type { StoredChunk } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import { applyHistory, initialState } from "./reducer";
+import {
+	DEFAULT_CHAT_LIMIT,
+	initialWindowSize,
+	MAX_CHAT_LIMIT,
+	MIN_CHAT_LIMIT,
+	normalizeChatLimit,
+	restoreEarlier,
+	selectHasEarlier,
+	trimTranscript,
+	unloadCount,
+	windowTranscript,
+} from "./trim";
+import type { TranscriptState } from "./types";
+
+function chunk(seq: number, type: "text" | "thinking" = "text"): StoredChunk {
+	return { seq, role: "assistant", chunk: { type, text: `c${seq}` } };
+}
+
+function chunks(from: number, to: number): StoredChunk[] {
+	const out: StoredChunk[] = [];
+	for (let seq = from; seq <= to; seq++) out.push(chunk(seq));
+	return out;
+}
+
+function stateWith(committed: readonly StoredChunk[]): TranscriptState {
+	return { ...initialState(), committed };
+}
+
+describe("normalizeChatLimit", () => {
+	it("defaults non-numeric / NaN / missing values", () => {
+		expect(normalizeChatLimit(undefined)).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit(null)).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit("100")).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit(Number.NaN)).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit(Number.POSITIVE_INFINITY)).toBe(DEFAULT_CHAT_LIMIT);
+	});
+
+	it("floors and clamps numeric values", () => {
+		expect(normalizeChatLimit(100.9)).toBe(100);
+		expect(normalizeChatLimit(0)).toBe(MIN_CHAT_LIMIT);
+		expect(normalizeChatLimit(-5)).toBe(MIN_CHAT_LIMIT);
+		expect(normalizeChatLimit(10_000_000)).toBe(MAX_CHAT_LIMIT);
+		expect(normalizeChatLimit(256)).toBe(256);
+	});
+});
+
+describe("unloadCount / initialWindowSize", () => {
+	it("unload is a quarter of the limit, rounded up", () => {
+		expect(unloadCount(100)).toBe(25);
+		expect(unloadCount(256)).toBe(64);
+		expect(unloadCount(10)).toBe(3);
+	});
+
+	it("initial window is 75% of the limit, rounded down", () => {
+		expect(initialWindowSize(100)).toBe(75);
+		expect(initialWindowSize(256)).toBe(192);
+		expect(initialWindowSize(1)).toBe(1); // never below 1
+	});
+});
+
+describe("trimTranscript", () => {
+	it("is the identity at or under the limit", () => {
+		const at = stateWith(chunks(1, 100));
+		expect(trimTranscript(at, 100)).toBe(at);
+		const under = stateWith(chunks(1, 99));
+		expect(trimTranscript(under, 100)).toBe(under);
+	});
+
+	it("unloads exactly a quarter when the limit is first exceeded (100 → 101 drops 25)", () => {
+		const state = stateWith(chunks(1, 101));
+		const next = trimTranscript(state, 100);
+		expect(next.committed).toHaveLength(76);
+		expect(next.committed[0]?.seq).toBe(26);
+		expect(next.hiddenBeforeSeq).toBe(26);
+	});
+
+	it("unloads multiple quarters when trimming was deferred far past the limit", () => {
+		const state = stateWith(chunks(1, 130));
+		const next = trimTranscript(state, 100);
+		// 130 → needs 2 quarters (25 each) to get to ≤ 100 → 80 remain.
+		expect(next.committed).toHaveLength(80);
+		expect(next.committed[0]?.seq).toBe(51);
+		expect(next.hiddenBeforeSeq).toBe(51);
+	});
+
+	it("counts provisional + accumulating toward the limit but never drops them", () => {
+		const base = stateWith(chunks(1, 98));
+		const state: TranscriptState = {
+			...base,
+			provisional: [
+				{ role: "user", chunk: { type: "text", text: "q" } },
+				{ role: "assistant", chunk: { type: "text", text: "a" } },
+			],
+			accumulating: { kind: "text", text: "stream" },
+		};
+		// 98 + 2 + 1 = 101 > 100 → drop 25 committed.
+		const next = trimTranscript(state, 100);
+		expect(next.committed).toHaveLength(73);
+		expect(next.provisional).toHaveLength(2);
+		expect(next.accumulating).not.toBeNull();
+	});
+
+	it("caps the drop at the committed length", () => {
+		const base = stateWith(chunks(1, 2));
+		const provisional = Array.from({ length: 20 }, (_, i) => ({
+			role: "assistant" as const,
+			chunk: { type: "text" as const, text: `p${i}` },
+		}));
+		const state: TranscriptState = { ...base, provisional };
+		const next = trimTranscript(state, 10);
+		expect(next.committed).toHaveLength(0);
+		expect(next.provisional).toHaveLength(20);
+		// Watermark advances past the last dropped committed chunk.
+		expect(next.hiddenBeforeSeq).toBe(3);
+	});
+
+	it("accumulates the hidden thinking count for stable render keys", () => {
+		const committed = [chunk(1, "thinking"), ...chunks(2, 9), chunk(10, "thinking"), chunk(11)];
+		const state = stateWith(committed);
+		const next = trimTranscript(state, 10); // 11 > 10 → drop ceil(10/4)=3 oldest
+		expect(next.committed[0]?.seq).toBe(4);
+		expect(next.hiddenThinkingCount).toBe(1);
+	});
+
+	it("ignores a nonsensical limit", () => {
+		const state = stateWith(chunks(1, 50));
+		expect(trimTranscript(state, 0)).toBe(state);
+		expect(trimTranscript(state, Number.NaN)).toBe(state);
+	});
+});
+
+describe("windowTranscript", () => {
+	it("keeps only the newest maxCommitted chunks and sets the watermark", () => {
+		const state = stateWith(chunks(1, 1000));
+		const next = windowTranscript(state, 75);
+		expect(next.committed).toHaveLength(75);
+		expect(next.committed[0]?.seq).toBe(926);
+		expect(next.hiddenBeforeSeq).toBe(926);
+		expect(selectHasEarlier(next)).toBe(true);
+	});
+
+	it("is the identity within the window", () => {
+		const state = stateWith(chunks(1, 50));
+		expect(windowTranscript(state, 75)).toBe(state);
+		expect(selectHasEarlier(state)).toBe(false);
+	});
+});
+
+describe("applyHistory respects the watermark", () => {
+	it("does not resurrect chunks below hiddenBeforeSeq on a full-cache merge", () => {
+		const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100);
+		expect(trimmed.hiddenBeforeSeq).toBe(26);
+		// A later sync merges the FULL cache (seqs 1..101) — the unloaded prefix must stay out.
+		const merged = applyHistory(trimmed, chunks(1, 101));
+		expect(merged.committed[0]?.seq).toBe(26);
+		expect(merged.committed).toHaveLength(76);
+	});
+
+	it("still merges the tail above the watermark", () => {
+		const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100);
+		const merged = applyHistory(trimmed, chunks(100, 110));
+		expect(merged.committed[merged.committed.length - 1]?.seq).toBe(110);
+		expect(merged.committed[0]?.seq).toBe(26);
+	});
+});
+
+describe("restoreEarlier", () => {
+	it("pages the newest `count` earlier chunks back in and lowers the watermark", () => {
+		const windowed = windowTranscript(stateWith(chunks(1, 1000)), 75); // loaded 926..1000
+		const restored = restoreEarlier(windowed, chunks(1, 1000), 64);
+		expect(restored.committed[0]?.seq).toBe(862);
+		expect(restored.committed).toHaveLength(75 + 64);
+		expect(restored.hiddenBeforeSeq).toBe(862);
+		expect(selectHasEarlier(restored)).toBe(true);
+	});
+
+	it("clears the watermark when the restore exhausts known earlier history", () => {
+		const windowed = windowTranscript(stateWith(chunks(1, 100)), 75); // hidden: 1..25
+		const restored = restoreEarlier(windowed, chunks(1, 100), 64);
+		expect(restored.committed).toHaveLength(100);
+		expect(restored.committed[0]?.seq).toBe(1);
+		expect(restored.hiddenBeforeSeq).toBe(0);
+		expect(restored.hiddenThinkingCount).toBe(0);
+		expect(selectHasEarlier(restored)).toBe(false);
+	});
+
+	it("clears the watermark when nothing is actually below it", () => {
+		const windowed = windowTranscript(stateWith(chunks(50, 200)), 75);
+		const restored = restoreEarlier(windowed, [], 64);
+		expect(restored.hiddenBeforeSeq).toBe(0);
+		expect(restored.committed).toEqual(windowed.committed);
+	});
+
+	it("is the identity when nothing is hidden", () => {
+		const state = stateWith(chunks(1, 10));
+		expect(restoreEarlier(state, chunks(1, 10), 5)).toBe(state);
+	});
+
+	it("decrements the hidden thinking count by the restored thinking chunks", () => {
+		const committed = [chunk(1, "thinking"), chunk(2), chunk(3, "thinking"), ...chunks(4, 12)];
+		const trimmed = trimTranscript(stateWith(committed), 10); // drops 3: seqs 1..3 (2 thinking)
+		expect(trimmed.hiddenThinkingCount).toBe(2);
+		const restored = restoreEarlier(trimmed, committed, 2); // restores seqs 2..3 (1 thinking)
+		expect(restored.hiddenBeforeSeq).toBe(2);
+		expect(restored.hiddenThinkingCount).toBe(1);
+	});
+
+	it("round-trips with trim: trim → restore-all yields the original committed list", () => {
+		const original = chunks(1, 101);
+		const trimmed = trimTranscript(stateWith(original), 100);
+		const restored = restoreEarlier(trimmed, original, 1000);
+		expect(restored.committed).toEqual(original);
+		expect(restored.hiddenBeforeSeq).toBe(0);
+	});
+});
diff --git a/src/core/chunks/trim.ts b/src/core/chunks/trim.ts
new file mode 100644
index 0000000..1733027
--- /dev/null
+++ b/src/core/chunks/trim.ts
@@ -0,0 +1,149 @@
+// Chat-limit windowing for the transcript — PURE policy, zero DOM/Svelte.
+//
+// In very long conversations an unbounded transcript makes the browser crawl, so
+// the FE keeps at most `chat limit` chunks loaded and UNLOADS the oldest ones in
+// BULK: a quarter of the limit at a time (limit 100 → at 101 chunks it unloads 25,
+// leaving 76). Bulk-on-threshold — NOT one-per-delta like old Dispatch — so a trim
+// happens once per ~quarter-limit of new content instead of on every step, which
+// was the old scroll-jump-per-step failure mode. A fresh page load shows only the
+// newest `floor(0.75 × limit)` chunks, leaving headroom before the first trim.
+//
+// Unloading drops COMMITTED chunks only (provisional chunks are the in-flight
+// turn; they become committed at seal and trimmable then) and records the
+// `hiddenBeforeSeq` watermark so history merges can't resurrect them and the
+// "Show earlier messages" affordance knows where to page back in from.
+
+import type { StoredChunk } from "@dispatch/wire";
+import type { TranscriptState } from "./types";
+
+/** Default chat limit (max loaded chunks per conversation). */
+export const DEFAULT_CHAT_LIMIT = 256;
+/** Hard floor for a configured chat limit (a tiny window would thrash). */
+export const MIN_CHAT_LIMIT = 10;
+/** Hard ceiling for a configured chat limit. */
+export const MAX_CHAT_LIMIT = 100_000;
+
+/**
+ * Normalize an untrusted configured limit (e.g. parsed from localStorage):
+ * non-numeric/NaN → the default; otherwise floored + clamped to
+ * [MIN_CHAT_LIMIT, MAX_CHAT_LIMIT].
+ */
+export function normalizeChatLimit(value: unknown): number {
+	if (typeof value !== "number" || !Number.isFinite(value)) return DEFAULT_CHAT_LIMIT;
+	const n = Math.floor(value);
+	if (n < MIN_CHAT_LIMIT) return MIN_CHAT_LIMIT;
+	if (n > MAX_CHAT_LIMIT) return MAX_CHAT_LIMIT;
+	return n;
+}
+
+/** The bulk-unload unit: a quarter of the limit, rounded up. */
+export function unloadCount(limit: number): number {
+	return Math.ceil(limit / 4);
+}
+
+/** The fresh-load window: 75% of the limit, rounded down (≥ 1). */
+export function initialWindowSize(limit: number): number {
+	return Math.max(1, Math.floor(limit * 0.75));
+}
+
+/** Total loaded (rendered) chunk count: committed + provisional + accumulating. */
+function totalCount(state: TranscriptState): number {
+	return state.committed.length + state.provisional.length + (state.accumulating !== null ? 1 : 0);
+}
+
+function countThinking(chunks: readonly StoredChunk[]): number {
+	let n = 0;
+	for (const c of chunks) {
+		if (c.chunk.type === "thinking") n++;
+	}
+	return n;
+}
+
+/** Drop the `drop` oldest committed chunks, advancing the watermark + thinking base. */
+function dropOldest(state: TranscriptState, drop: number): TranscriptState {
+	const dropped = state.committed.slice(0, drop);
+	const kept = state.committed.slice(drop);
+	const first = kept[0];
+	const lastDropped = dropped[dropped.length - 1];
+	let hiddenBeforeSeq = state.hiddenBeforeSeq;
+	if (first !== undefined) {
+		hiddenBeforeSeq = first.seq;
+	} else if (lastDropped !== undefined) {
+		hiddenBeforeSeq = lastDropped.seq + 1;
+	}
+	return {
+		...state,
+		committed: kept,
+		hiddenBeforeSeq,
+		hiddenThinkingCount: state.hiddenThinkingCount + countThinking(dropped),
+	};
+}
+
+/**
+ * Enforce the chat limit: when the loaded count EXCEEDS `limit`, unload whole
+ * quarters (`unloadCount(limit)` each) of the OLDEST committed chunks until back
+ * at/under the limit — normally exactly one quarter (limit 100: 101 → 76); more
+ * only when trimming was deferred (e.g. while the reader was scrolled up).
+ * At/under the limit this is the identity. Never drops provisional chunks.
+ */
+export function trimTranscript(state: TranscriptState, limit: number): TranscriptState {
+	if (!Number.isFinite(limit) || limit <= 0) return state;
+	const total = totalCount(state);
+	if (total <= limit) return state;
+	const quarter = unloadCount(limit);
+	const passes = Math.ceil((total - limit) / quarter);
+	const drop = Math.min(passes * quarter, state.committed.length);
+	if (drop <= 0) return state;
+	return dropOldest(state, drop);
+}
+
+/**
+ * Window the committed history down to the newest `maxCommitted` chunks (the
+ * fresh-load path: `maxCommitted = initialWindowSize(limit)`). Identity when
+ * already within the window.
+ */
+export function windowTranscript(state: TranscriptState, maxCommitted: number): TranscriptState {
+	if (!Number.isFinite(maxCommitted) || maxCommitted < 0) return state;
+	const drop = state.committed.length - maxCommitted;
+	if (drop <= 0) return state;
+	return dropOldest(state, drop);
+}
+
+/**
+ * Page earlier (unloaded) history back in — the "Show earlier messages" action.
+ *
+ * `earlier` must be ALL locally-known chunks below the watermark (typically the
+ * full cached conversation; chunks at/above the watermark are ignored). The
+ * newest `count` of them are merged back in front of `committed` and the
+ * watermark lowers to the new oldest loaded seq — or clears to 0 when this
+ * restore exhausts the known earlier history (nothing left to offer).
+ */
+export function restoreEarlier(
+	state: TranscriptState,
+	earlier: readonly StoredChunk[],
+	count: number,
+): TranscriptState {
+	if (state.hiddenBeforeSeq <= 0) return state;
+	const below = earlier.filter((c) => c.seq < state.hiddenBeforeSeq).sort((a, b) => a.seq - b.seq);
+	if (below.length === 0) {
+		// Nothing is actually hidden below the watermark: clear it so the
+		// "Show earlier" affordance disappears.
+		return { ...state, hiddenBeforeSeq: 0, hiddenThinkingCount: 0 };
+	}
+	const keep = below.slice(-Math.max(1, count));
+	const exhausted = keep.length === below.length;
+	const firstKept = keep[0];
+	return {
+		...state,
+		committed: [...keep, ...state.committed],
+		hiddenBeforeSeq: exhausted || firstKept === undefined ? 0 : firstKept.seq,
+		hiddenThinkingCount: exhausted
+			? 0
+			: Math.max(0, state.hiddenThinkingCount - countThinking(keep)),
+	};
+}
+
+/** Whether unloaded earlier history exists to offer ("Show earlier messages"). */
+export function selectHasEarlier(state: TranscriptState): boolean {
+	return state.hiddenBeforeSeq > 0;
+}
diff --git a/src/core/chunks/types.ts b/src/core/chunks/types.ts
index faa0d3f..14619bd 100644
--- a/src/core/chunks/types.ts
+++ b/src/core/chunks/types.ts
@@ -20,6 +20,23 @@ export interface TranscriptState {
 	readonly currentTurnId: string | null;
 	readonly latestUsage: Usage | null;
 	readonly sealedTurnId: string | null;
+	/**
+	 * The chat-limit UNLOAD watermark: committed chunks with `seq <` this are
+	 * unloaded (not in `committed`, not rendered) to keep long transcripts cheap.
+	 * `0` = nothing unloaded. `applyHistory` refuses chunks below it (a cache/tail
+	 * merge must not resurrect what the trim dropped); "Show earlier messages"
+	 * lowers it via `restoreEarlier`. See `trim.ts`.
+	 */
+	readonly hiddenBeforeSeq: number;
+	/**
+	 * How many thinking-type chunks are currently unloaded below the watermark.
+	 * Pure render-key bookkeeping: the UI keys thinking collapses by ORDINAL (so
+	 * the key survives the provisional→committed seal transition), and this base
+	 * keeps those ordinals stable when a trim removes older thinking chunks —
+	 * otherwise every remaining collapse would shift keys and swap/lose its
+	 * open state mid-stream.
+	 */
+	readonly hiddenThinkingCount: number;
 	/**
 	 * True while a turn is generating on the server — derived STRUCTURALLY from the
 	 * event stream: a `turn-start` (or any turn delta) with no matching `done` /
diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts
index 37049bf..5ca28af 100644
--- a/src/features/chat/store.svelte.ts
+++ b/src/features/chat/store.svelte.ts
@@ -11,9 +11,16 @@ import {
 	clearGenerating,
 	foldEvent,
 	initialState,
+	initialWindowSize,
+	normalizeChatLimit,
+	restoreEarlier,
 	selectChunks,
 	selectGenerating,
+	selectHasEarlier,
 	selectMessages,
+	trimTranscript,
+	unloadCount,
+	windowTranscript,
 } from "../../core/chunks";
 import type { MetricsState, TurnMetricsEntry } from "../../core/metrics";
 import {
@@ -33,6 +40,19 @@ export interface ChatStoreDependencies {
 	readonly historySync: HistorySync;
 	readonly metricsSync: MetricsSync;
 	readonly cache: ConversationCache;
+	/**
+	 * The chat limit: max loaded chunks before the oldest quarter is unloaded
+	 * (see `core/chunks/trim.ts`). Normalized via `normalizeChatLimit`; absent →
+	 * `DEFAULT_CHAT_LIMIT`.
+	 */
+	readonly chatLimit?: number;
+	/**
+	 * Whether unloading may run RIGHT NOW. The composition root wires this to the
+	 * smart-scroll "stuck to bottom" state: while the reader is scrolled up, a
+	 * trim would yank the content under them, so it is DEFERRED until they return
+	 * to the bottom (the next fold retries). Absent → always allowed.
+	 */
+	readonly canUnload?: () => boolean;
 }
 
 export interface ChatStore {
@@ -55,10 +75,29 @@ export interface ChatStore {
 	readonly pendingSync: boolean;
 	readonly error: string | null;
 	readonly model: string | undefined;
+	/**
+	 * Whether earlier history was unloaded by the chat limit (or never loaded by
+	 * the fresh-load window) and can be paged back in — drives the
+	 * "Show earlier messages" affordance.
+	 */
+	readonly hasEarlier: boolean;
+	/**
+	 * Render-key base for thinking collapses: how many thinking chunks are
+	 * unloaded below the watermark, so the UI's ordinal keys stay stable across
+	 * a trim (see `TranscriptState.hiddenThinkingCount`).
+	 */
+	readonly thinkingKeyBase: number;
 	handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void;
 	send(text: string): void;
 	setModel(model: string): void;
 	load(): Promise<void>;
+	/**
+	 * Page one unload-unit (`ceil(limit/4)`) of earlier history back in from the
+	 * local cache — the "Show earlier messages" action. (When the backend ships
+	 * CR-5 `?beforeSeq=`, this can fall through to the server once the cache is
+	 * exhausted.)
+	 */
+	showEarlier(): Promise<void>;
 	/**
 	 * Re-sync after a WS (re)connect. Clears any stale `generating` (a turn may
 	 * have sealed while disconnected — the live `turn-sealed` was missed), then
@@ -78,6 +117,18 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
 	let _model = $state<string | undefined>(deps.model);
 	let disposed = false;
 
+	const chatLimit = normalizeChatLimit(deps.chatLimit);
+
+	/**
+	 * Enforce the chat limit after a transcript mutation — unless the injected
+	 * gate says the reader is scrolled up (then defer; the next mutation retries
+	 * and `trimTranscript` unloads whole quarters to catch up).
+	 */
+	function maybeTrim(): void {
+		if (deps.canUnload !== undefined && !deps.canUnload()) return;
+		transcript = trimTranscript(transcript, chatLimit);
+	}
+
 	async function syncTail(): Promise<void> {
 		if (disposed || _pendingSync) return;
 		_pendingSync = true;
@@ -86,6 +137,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
 			const res = await deps.historySync(deps.conversationId, since);
 			const merged = await deps.cache.commit(deps.conversationId, res.chunks);
 			transcript = applyHistory(transcript, merged);
+			maybeTrim();
 			_error = null;
 		} catch (err) {
 			_error = err instanceof Error ? err.message : String(err);
@@ -130,6 +182,12 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
 		get model(): string | undefined {
 			return _model;
 		},
+		get hasEarlier(): boolean {
+			return selectHasEarlier(transcript);
+		},
+		get thinkingKeyBase(): number {
+			return transcript.hiddenThinkingCount;
+		},
 
 		handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void {
 			if (msg.type === "chat.error") {
@@ -144,6 +202,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
 			}
 			transcript = foldEvent(transcript, msg.event);
 			metrics = foldMetricsEvent(metrics, msg.event);
+			maybeTrim();
 			if (transcript.sealedTurnId !== null) {
 				void syncTail();
 				void syncMetrics();
@@ -152,6 +211,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
 
 		send(text: string): void {
 			transcript = appendUserMessage(transcript, text);
+			maybeTrim();
 			const msg: ChatSendMessage = {
 				type: "chat.send",
 				conversationId: deps.conversationId,
@@ -166,14 +226,27 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
 		},
 
 		async load(): Promise<void> {
+			// Fresh load shows only the newest 75% of the limit — headroom before the
+			// first trim. Window the cached slice SYNCHRONOUSLY with its apply (no
+			// render in between), and again after the tail sync (a cold cache means
+			// syncTail pulled the whole history in one response).
+			const windowSize = initialWindowSize(chatLimit);
 			const cached = await deps.cache.load(deps.conversationId);
 			if (cached.length > 0) {
-				transcript = applyHistory(transcript, cached);
+				transcript = windowTranscript(applyHistory(transcript, cached), windowSize);
 			}
 			await syncTail();
+			transcript = windowTranscript(transcript, windowSize);
 			await syncMetrics();
 		},
 
+		async showEarlier(): Promise<void> {
+			if (disposed) return;
+			if (!selectHasEarlier(transcript)) return;
+			const cached = await deps.cache.load(deps.conversationId);
+			transcript = restoreEarlier(transcript, cached, unloadCount(chatLimit));
+		},
+
 		resync(): void {
 			if (disposed) return;
 			// A turn may have sealed while we were disconnected (missed `turn-sealed`):
diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts
index 6507d69..5c798d6 100644
--- a/src/features/chat/store.test.ts
+++ b/src/features/chat/store.test.ts
@@ -892,6 +892,295 @@ describe("createChatStore", () => {
 		store.dispose();
 	});
 
+	it("chat limit: crossing the limit unloads the oldest quarter in one bulk pass", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+		});
+
+		// Commit exactly 100 chunks via a sealed turn (at the limit — no trim).
+		const hundred = Array.from({ length: 100 }, (_, i) => makeStoredChunk(i + 1));
+		historySync.returnChunks = hundred;
+		store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+		store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" }));
+		await vi.waitFor(() => {
+			expect(store.chunks).toHaveLength(100);
+		});
+		expect(store.hasEarlier).toBe(false);
+
+		// The 101st chunk (a live tool-call) crosses the limit → 25 unload → 76 remain.
+		store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t2" }));
+		store.handleDelta(
+			deltaEvent({
+				type: "tool-call",
+				conversationId: CONV_ID,
+				turnId: "t2",
+				toolCallId: "tc1",
+				toolName: "probe",
+				input: {},
+				stepId: "t2#0" as StepId,
+			}),
+		);
+
+		expect(store.chunks).toHaveLength(76);
+		expect(store.chunks[0]?.seq).toBe(26);
+		expect(store.hasEarlier).toBe(true);
+
+		store.dispose();
+	});
+
+	it("chat limit: unloading is deferred while the gate is closed, then catches up", () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		let atBottom = false; // reader scrolled up
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 10,
+			canUnload: () => atBottom,
+		});
+
+		// 15 live tool-calls: over the limit, but the gate defers every trim.
+		store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+		for (let i = 0; i < 15; i++) {
+			store.handleDelta(
+				deltaEvent({
+					type: "tool-call",
+					conversationId: CONV_ID,
+					turnId: "t1",
+					toolCallId: `tc${i}`,
+					toolName: "probe",
+					input: {},
+					stepId: `t1#${i}` as StepId,
+				}),
+			);
+		}
+		expect(store.chunks).toHaveLength(15);
+
+		// Reader returns to the bottom — but provisional chunks are never unloaded,
+		// so the deferred trim still can't shrink an all-provisional transcript.
+		atBottom = true;
+		store.handleDelta(
+			deltaEvent({
+				type: "tool-call",
+				conversationId: CONV_ID,
+				turnId: "t1",
+				toolCallId: "tc15",
+				toolName: "probe",
+				input: {},
+				stepId: "t1#15" as StepId,
+			}),
+		);
+		expect(store.chunks).toHaveLength(16);
+
+		store.dispose();
+	});
+
+	it("chat limit: a deferred trim catches up across committed history once the gate opens", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		let atBottom = false;
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+			canUnload: () => atBottom,
+		});
+
+		// Seal a turn committing 130 chunks while the reader is scrolled up: no trim.
+		historySync.returnChunks = Array.from({ length: 130 }, (_, i) => makeStoredChunk(i + 1));
+		store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+		store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" }));
+		await vi.waitFor(() => {
+			expect(store.chunks).toHaveLength(130);
+		});
+
+		// Back at the bottom: the next fold trims whole quarters down to ≤ 100.
+		atBottom = true;
+		store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t2" }));
+		// 130 → 2 quarters of 25 → 80 committed (turn-start adds no chunk).
+		expect(store.chunks).toHaveLength(80);
+		expect(store.chunks[0]?.seq).toBe(51);
+
+		store.dispose();
+	});
+
+	it("chat limit: load windows a long cached conversation to 75% of the limit", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		await cache.impl.commit(
+			CONV_ID,
+			Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)),
+		);
+
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+		});
+
+		await store.load();
+
+		// floor(100 × 0.75) = 75 newest chunks: seqs 426..500.
+		expect(store.chunks).toHaveLength(75);
+		expect(store.chunks[0]?.seq).toBe(426);
+		expect(store.hasEarlier).toBe(true);
+		// The tail sync still used the cache's real cursor (not the window's edge).
+		expect(historySync.calls[0]?.sinceSeq).toBe(500);
+
+		store.dispose();
+	});
+
+	it("chat limit: a cold cache (fresh browser) windows the full server history to 75%", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		// Backend has no limit param yet (CR-5): sinceSeq=0 returns EVERYTHING.
+		historySync.returnChunks = Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1));
+
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+		});
+
+		await store.load();
+
+		expect(store.chunks).toHaveLength(75);
+		expect(store.chunks[0]?.seq).toBe(426);
+		expect(store.hasEarlier).toBe(true);
+		// The full history is still CACHED locally (show-earlier pages from it).
+		const cached = await cache.impl.load(CONV_ID);
+		expect(cached).toHaveLength(500);
+
+		store.dispose();
+	});
+
+	it("chat limit: showEarlier pages a quarter back in from the cache", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		await cache.impl.commit(
+			CONV_ID,
+			Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)),
+		);
+
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+		});
+
+		await store.load();
+		expect(store.chunks[0]?.seq).toBe(426);
+
+		await store.showEarlier(); // +ceil(100/4) = 25 older chunks
+		expect(store.chunks).toHaveLength(100);
+		expect(store.chunks[0]?.seq).toBe(401);
+		expect(store.hasEarlier).toBe(true);
+
+		store.dispose();
+	});
+
+	it("chat limit: showEarlier clears hasEarlier when the cache is exhausted", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		await cache.impl.commit(
+			CONV_ID,
+			Array.from({ length: 80 }, (_, i) => makeStoredChunk(i + 1)),
+		);
+
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+		});
+
+		await store.load(); // window 75: hidden 1..5
+		expect(store.chunks).toHaveLength(75);
+		expect(store.hasEarlier).toBe(true);
+
+		await store.showEarlier(); // restores all 5 → nothing left below
+		expect(store.chunks).toHaveLength(80);
+		expect(store.chunks[0]?.seq).toBe(1);
+		expect(store.hasEarlier).toBe(false);
+
+		store.dispose();
+	});
+
+	it("chat limit: a post-trim history sync does not resurrect unloaded chunks", async () => {
+		const transport = createFakeTransport();
+		const historySync = createFakeHistorySync();
+		const metricsSync = createFakeMetricsSync();
+		const cache = createFakeCache();
+		await cache.impl.commit(
+			CONV_ID,
+			Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)),
+		);
+
+		const store = createChatStore({
+			conversationId: CONV_ID,
+			transport: transport.impl,
+			historySync: historySync.impl,
+			metricsSync: metricsSync.impl,
+			cache: cache.impl,
+			chatLimit: 100,
+		});
+
+		await store.load();
+		expect(store.chunks[0]?.seq).toBe(426);
+
+		// A sealed turn triggers syncTail, whose cache.commit returns the FULL
+		// merged cache (seqs 1..501) — the watermark must keep 1..425 out.
+		historySync.returnChunks = [makeStoredChunk(501)];
+		store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t9" }));
+		store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t9" }));
+
+		await vi.waitFor(() => {
+			expect(store.chunks[store.chunks.length - 1]?.seq).toBe(501);
+		});
+		expect(store.chunks[0]?.seq).toBe(426);
+		expect(store.chunks).toHaveLength(76);
+
+		store.dispose();
+	});
+
 	it("resync is a no-op after dispose", async () => {
 		const transport = createFakeTransport();
 		const historySync = createFakeHistorySync();
diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts
index 278b2cf..7174821 100644
--- a/src/features/chat/ui.test.ts
+++ b/src/features/chat/ui.test.ts
@@ -41,6 +41,45 @@ describe("ChatView", () => {
 		expect(screen.getByText("Hello!")).toBeInTheDocument();
 	});
 
+	it("shows the show-earlier button only when earlier history is unloaded, and pages it in", async () => {
+		const chunks: RenderedChunk[] = [
+			{ seq: 26, role: "user", chunk: { type: "text", text: "later" }, provisional: false },
+		];
+
+		let resolveEarlier: (() => void) | undefined;
+		const onShowEarlier = vi.fn(
+			() =>
+				new Promise<void>((resolve) => {
+					resolveEarlier = resolve;
+				}),
+		);
+
+		render(ChatView, { props: { chunks, hasEarlier: true, onShowEarlier } });
+
+		const button = screen.getByRole("button", { name: /show earlier messages/i });
+		const user = userEvent.setup();
+		await user.click(button);
+
+		expect(onShowEarlier).toHaveBeenCalledTimes(1);
+		// While the page-in is awaited the button is disabled (no double-fire).
+		expect(screen.getByRole("button", { name: /loading earlier messages/i })).toBeDisabled();
+
+		resolveEarlier?.();
+		await vi.waitFor(() => {
+			expect(screen.getByRole("button", { name: /show earlier messages/i })).toBeEnabled();
+		});
+	});
+
+	it("hides the show-earlier button when nothing is unloaded", () => {
+		const chunks: RenderedChunk[] = [
+			{ seq: 1, role: "user", chunk: { type: "text", text: "all here" }, provisional: false },
+		];
+
+		render(ChatView, { props: { chunks, hasEarlier: false, onShowEarlier: vi.fn() } });
+
+		expect(screen.queryByRole("button", { name: /show earlier/i })).not.toBeInTheDocument();
+	});
+
 	it("renders tool-call chunks", () => {
 		const chunks: RenderedChunk[] = [
 			{
diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte
index 00691aa..d1d7709 100644
--- a/src/features/chat/ui/ChatView.svelte
+++ b/src/features/chat/ui/ChatView.svelte
@@ -19,21 +19,48 @@
 	let {
 		chunks,
 		turnMetrics = [],
+		hasEarlier = false,
+		onShowEarlier,
+		thinkingKeyBase = 0,
 	}: {
 		chunks: readonly RenderedChunk[];
 		turnMetrics?: readonly TurnMetricsEntry[];
+		/** Earlier history is unloaded (chat limit) and can be paged back in. */
+		hasEarlier?: boolean;
+		/** Page earlier history back in; the caller owns scroll-position preservation. */
+		onShowEarlier?: () => Promise<void>;
+		/**
+		 * Ordinal base for thinking-collapse keys: the count of thinking chunks
+		 * unloaded by the chat limit, so the remaining ordinals don't shift (and
+		 * swap collapse state) when a trim removes older thinking blocks.
+		 */
+		thinkingKeyBase?: number;
 	} = $props();
 
+	// True while a show-earlier page-in is awaited (disables the button).
+	let loadingEarlier = $state(false);
+
+	async function showEarlier() {
+		if (!onShowEarlier || loadingEarlier) return;
+		loadingEarlier = true;
+		try {
+			await onShowEarlier();
+		} finally {
+			loadingEarlier = false;
+		}
+	}
+
 	const groups = $derived(groupRenderedChunks(chunks));
 
 	const rows = $derived(interleaveTurnMetrics(groups, turnMetrics));
 
 	// Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that
 	// survives the provisional→committed (seq null → seq N) transition, so the
-	// collapse's open/close state is NOT lost when a turn seals. (App isolates
-	// these keys per conversation via {#key}.)
+	// collapse's open/close state is NOT lost when a turn seals. The ordinal
+	// starts at `thinkingKeyBase` so keys also survive a chat-limit trim removing
+	// older thinking blocks. (App isolates these keys per conversation via {#key}.)
 	const keyedRows = $derived.by(() => {
-		let thinking = 0;
+		let thinking = thinkingKeyBase;
 		return rows.map((row, i) => {
 			if (row.kind === "step-metrics") {
 				return { row, key: `s${row.step.stepId}` };
@@ -132,6 +159,19 @@
 {/snippet}
 
 <div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite">
+	{#if hasEarlier && onShowEarlier}
+		<!-- Chat limit: older chunks are unloaded; offer to page them back in. -->
+		<div class="flex justify-center">
+			<button class="btn btn-ghost btn-xs" disabled={loadingEarlier} onclick={showEarlier}>
+				{#if loadingEarlier}
+					<span class="loading loading-spinner loading-xs" aria-hidden="true"></span>
+					Loading earlier messages…
+				{:else}
+					Show earlier messages
+				{/if}
+			</button>
+		</div>
+	{/if}
 	{#each keyedRows as { row, key } (key)}
 		{#if row.kind === "step-metrics"}
 			{@const sv = viewStepMetrics(row.step, row.index)}
diff --git a/src/features/smart-scroll/ui/controller.svelte.ts b/src/features/smart-scroll/ui/controller.svelte.ts
index 99d53ca..dbe65d1 100644
--- a/src/features/smart-scroll/ui/controller.svelte.ts
+++ b/src/features/smart-scroll/ui/controller.svelte.ts
@@ -21,6 +21,12 @@ import {
 export interface SmartScrollController {
 	/** Reactive: show the "scroll to bottom" affordance (the user has scrolled up). */
 	readonly showButton: boolean;
+	/**
+	 * Non-reactive point-in-time query: is the view stuck to the bottom right now?
+	 * For imperative callers (e.g. the chat-limit unload gate) that poll at event
+	 * time rather than subscribing — reads the reducer state, not a rune.
+	 */
+	isAtBottom(): boolean;
 	/**
 	 * Attach to the scroll container; returns a teardown to call on unmount.
 	 * Pass the inner CONTENT element to also follow height changes that aren't a
@@ -84,6 +90,10 @@ export function createSmartScrollController(): SmartScrollController {
 			return showButton;
 		},
 
+		isAtBottom(): boolean {
+			return state.stuck;
+		},
+
 		attach(node: HTMLElement, content?: HTMLElement): () => void {
 			el = node;
 			node.addEventListener("scroll", handleScroll, { passive: true });
-- 
cgit v1.2.3