summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-12 18:26:00 +0900
committerAdam Malczewski <[email protected]>2026-06-12 18:26:00 +0900
commit1764e3e5dff836255d121a933dd92542368346f9 (patch)
treeb835055de0f0f1fd9750741764dac8b30f7498bf /src
parent4001274e3ba25a3946df1e9f2dc82ca6781cd2bf (diff)
downloaddispatch-web-1764e3e5dff836255d121a933dd92542368346f9.tar.gz
dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.zip
feat(chat): chat limit — bulk quarter-unload, 75% fresh-load window, show-earlier page-in
Long transcripts no longer grow unbounded: past the chat limit (default 256 chunks, localStorage dispatch.chatLimit) the oldest ceil(limit/4) committed chunks are unloaded in ONE bulk pass — never one-per-delta (old Dispatch's scroll-jump-per-step bug) — and only while the reader is stuck to the bottom (scrolled-up readers defer the trim; it catches up in whole quarters). A fresh page load windows to the newest floor(0.75*limit). Unloading is purely local (IndexedDB cache + server keep everything); a hiddenBeforeSeq watermark keeps history merges from resurrecting unloaded chunks, and a 'Show earlier messages' affordance pages a quarter back in from the cache with scroll-anchor preservation. Thinking-collapse render keys stay stable across trims via a hiddenThinkingCount ordinal base. - core/chunks/trim.ts: pure policy (trim/window/restore/normalize) + tests - chat store: chatLimit + canUnload deps, windowed load, showEarlier() - composition root: dispatch.chatLimit localStorage knob + unload gate wired to smart-scroll isAtBottom() - backend CR-5 OPENED (not a blocker): ?limit=/?beforeSeq= on GET /conversations/:id (courier backend-handoff-chat-limit.md) - scripts/live-probe.ts: fix pre-existing stale TurnMetricsEntry reads (m1.usage -> total.usage) that crashed the probe; 17/17 live checks pass
Diffstat (limited to 'src')
-rw-r--r--src/app/App.svelte34
-rw-r--r--src/app/store.svelte.ts32
-rw-r--r--src/core/chunks/index.ts12
-rw-r--r--src/core/chunks/reducer.ts11
-rw-r--r--src/core/chunks/trim.test.ts218
-rw-r--r--src/core/chunks/trim.ts149
-rw-r--r--src/core/chunks/types.ts17
-rw-r--r--src/features/chat/store.svelte.ts75
-rw-r--r--src/features/chat/store.test.ts289
-rw-r--r--src/features/chat/ui.test.ts39
-rw-r--r--src/features/chat/ui/ChatView.svelte46
-rw-r--r--src/features/smart-scroll/ui/controller.svelte.ts10
12 files changed, 926 insertions, 6 deletions
diff --git a/src/app/App.svelte b/src/app/App.svelte
index 50f24e7..4c5a82b 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -1,5 +1,6 @@
<script lang="ts">
import type { InvokeMessage } from "@dispatch/ui-contract";
+ import { tick } from "svelte";
import Table from "../components/Table.svelte";
import {
CacheWarmingView,
@@ -76,6 +77,31 @@
let transcriptEl = $state<HTMLElement | undefined>();
let transcriptContentEl = $state<HTMLElement | undefined>();
+ // Chat-limit unload gate: old chunks may be unloaded only while the reader is
+ // stuck to the bottom. While stuck, a trim removes content far ABOVE the
+ // viewport and the controller re-pins to the bottom — no visible jump; while
+ // reading history, trimming is deferred instead of yanking the page (the old
+ // Dispatch bug). In an $effect so a swapped store prop would be re-wired.
+ $effect(() => {
+ store.attachUnloadGate(() => smartScroll.isAtBottom());
+ });
+
+ // "Show earlier messages": page older history back in, preserving the reader's
+ // viewport position — prepended content grows scrollHeight, so shift scrollTop
+ // by the growth (the manual analogue of CSS scroll anchoring, which not every
+ // engine applies here).
+ async function handleShowEarlier(): Promise<void> {
+ const el = transcriptEl;
+ const prevHeight = el?.scrollHeight ?? 0;
+ const prevTop = el?.scrollTop ?? 0;
+ await store.activeChat.showEarlier();
+ await tick();
+ if (el) {
+ const delta = el.scrollHeight - prevHeight;
+ if (delta > 0) el.scrollTop = prevTop + delta;
+ }
+ }
+
// Attach/detach the controller to the live scroll element + content (disposed on
// unmount). The content element is observed (ResizeObserver) so the view follows
// height changes that aren't a transcript append.
@@ -201,7 +227,13 @@
<div bind:this={transcriptEl} class="h-full overflow-y-auto">
<div bind:this={transcriptContentEl}>
{#key store.activeConversationId}
- <ChatView chunks={store.activeChat.chunks} turnMetrics={store.activeChat.turnMetrics} />
+ <ChatView
+ chunks={store.activeChat.chunks}
+ turnMetrics={store.activeChat.turnMetrics}
+ hasEarlier={store.activeChat.hasEarlier}
+ onShowEarlier={handleShowEarlier}
+ thinkingKeyBase={store.activeChat.thinkingKeyBase}
+ />
{/key}
</div>
</div>
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index 2837bb5..379805f 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -15,6 +15,7 @@ import { createIdbChunkStore } from "../adapters/idb";
import { createLocalStore } from "../adapters/local-storage";
import type { WebSocketLike } from "../adapters/ws";
import { createSurfaceSocket, type SurfaceSocketOptions } from "../adapters/ws";
+import { normalizeChatLimit } from "../core/chunks";
import {
applyServerMessage,
getSurfaceSpec,
@@ -88,6 +89,15 @@ export interface AppStore {
* The backend lazily spawns servers, so this may take a moment on the first call for a cwd.
*/
lspStatus(): Promise<LspResult | null>;
+ /**
+ * Wire the chat-limit unload gate (composition-root injection, called once by
+ * the shell after it owns the scroll region): unloading old chunks is allowed
+ * only while the gate returns true — i.e. the reader is stuck to the bottom —
+ * so a trim never yanks content out from under someone reading history.
+ * Before attachment unloading is allowed (the initial view starts at the
+ * bottom).
+ */
+ attachUnloadGate(gate: () => boolean): void;
dispose(): void;
}
@@ -157,6 +167,22 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
});
const tabsStore: TabsStore = createTabsStore(storageAdapter);
+ // The chat limit (max loaded chunks per conversation) — a persisted local
+ // setting with no UI yet: edit `localStorage["dispatch.chatLimit"]`. The
+ // default is written back on first run so the knob is discoverable.
+ const chatLimitStore = createLocalStore<number>("dispatch.chatLimit", {
+ storage: localStorageOpt,
+ });
+ const storedChatLimit = chatLimitStore.load();
+ const chatLimit = normalizeChatLimit(storedChatLimit);
+ if (storedChatLimit === null) {
+ chatLimitStore.save(chatLimit);
+ }
+
+ // Unload gate — attached by the shell once it owns the scroll region (see
+ // `AppStore.attachUnloadGate`). Until then, unloading is allowed.
+ let unloadGate: (() => boolean) | null = null;
+
const cache: ConversationCache = createConversationCache(
createIdbChunkStore({ indexedDB: indexedDBFactory }),
);
@@ -178,6 +204,8 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
historySync,
metricsSync,
cache,
+ chatLimit,
+ canUnload: () => (unloadGate === null ? true : unloadGate()),
});
}
@@ -607,6 +635,10 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
};
}
},
+ attachUnloadGate(gate: () => boolean): void {
+ unloadGate = gate;
+ },
+
dispose(): void {
for (const store of chatStores.values()) {
store.dispose();
diff --git a/src/core/chunks/index.ts b/src/core/chunks/index.ts
index ecfee74..6ab0f35 100644
--- a/src/core/chunks/index.ts
+++ b/src/core/chunks/index.ts
@@ -8,6 +8,18 @@ export {
initialState,
} from "./reducer";
export { selectChunks, selectGenerating, selectMessages } from "./selectors";
+export {
+ DEFAULT_CHAT_LIMIT,
+ initialWindowSize,
+ MAX_CHAT_LIMIT,
+ MIN_CHAT_LIMIT,
+ normalizeChatLimit,
+ restoreEarlier,
+ selectHasEarlier,
+ trimTranscript,
+ unloadCount,
+ windowTranscript,
+} from "./trim";
export type {
AccumulatingChunk,
ProvisionalChunk,
diff --git a/src/core/chunks/reducer.ts b/src/core/chunks/reducer.ts
index 7ce55ce..0a57839 100644
--- a/src/core/chunks/reducer.ts
+++ b/src/core/chunks/reducer.ts
@@ -10,6 +10,8 @@ export function initialState(): TranscriptState {
currentTurnId: null,
latestUsage: null,
sealedTurnId: null,
+ hiddenBeforeSeq: 0,
+ hiddenThinkingCount: 0,
generating: false,
};
}
@@ -41,6 +43,10 @@ function flushAccumulating(
* Dedupes by seq (new wins), keeps seq-monotonic order, idempotent.
* When sealedTurnId is set, drops all provisional chunks (now superseded)
* and clears sealedTurnId.
+ *
+ * Chunks below the chat-limit unload watermark (`hiddenBeforeSeq`) are
+ * REJECTED: a full-cache or tail merge must not resurrect what the trim
+ * unloaded. Restoring earlier history goes through `restoreEarlier` instead.
*/
export function applyHistory(
state: TranscriptState,
@@ -48,7 +54,10 @@ export function applyHistory(
): TranscriptState {
const seqMap = new Map<number, StoredChunk>();
for (const c of state.committed) seqMap.set(c.seq, c);
- for (const c of chunks) seqMap.set(c.seq, c);
+ for (const c of chunks) {
+ if (c.seq < state.hiddenBeforeSeq) continue;
+ seqMap.set(c.seq, c);
+ }
const committed = Array.from(seqMap.values()).sort((a, b) => a.seq - b.seq);
if (state.sealedTurnId !== null) {
diff --git a/src/core/chunks/trim.test.ts b/src/core/chunks/trim.test.ts
new file mode 100644
index 0000000..091b646
--- /dev/null
+++ b/src/core/chunks/trim.test.ts
@@ -0,0 +1,218 @@
+import type { StoredChunk } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import { applyHistory, initialState } from "./reducer";
+import {
+ DEFAULT_CHAT_LIMIT,
+ initialWindowSize,
+ MAX_CHAT_LIMIT,
+ MIN_CHAT_LIMIT,
+ normalizeChatLimit,
+ restoreEarlier,
+ selectHasEarlier,
+ trimTranscript,
+ unloadCount,
+ windowTranscript,
+} from "./trim";
+import type { TranscriptState } from "./types";
+
+function chunk(seq: number, type: "text" | "thinking" = "text"): StoredChunk {
+ return { seq, role: "assistant", chunk: { type, text: `c${seq}` } };
+}
+
+function chunks(from: number, to: number): StoredChunk[] {
+ const out: StoredChunk[] = [];
+ for (let seq = from; seq <= to; seq++) out.push(chunk(seq));
+ return out;
+}
+
+function stateWith(committed: readonly StoredChunk[]): TranscriptState {
+ return { ...initialState(), committed };
+}
+
+describe("normalizeChatLimit", () => {
+ it("defaults non-numeric / NaN / missing values", () => {
+ expect(normalizeChatLimit(undefined)).toBe(DEFAULT_CHAT_LIMIT);
+ expect(normalizeChatLimit(null)).toBe(DEFAULT_CHAT_LIMIT);
+ expect(normalizeChatLimit("100")).toBe(DEFAULT_CHAT_LIMIT);
+ expect(normalizeChatLimit(Number.NaN)).toBe(DEFAULT_CHAT_LIMIT);
+ expect(normalizeChatLimit(Number.POSITIVE_INFINITY)).toBe(DEFAULT_CHAT_LIMIT);
+ });
+
+ it("floors and clamps numeric values", () => {
+ expect(normalizeChatLimit(100.9)).toBe(100);
+ expect(normalizeChatLimit(0)).toBe(MIN_CHAT_LIMIT);
+ expect(normalizeChatLimit(-5)).toBe(MIN_CHAT_LIMIT);
+ expect(normalizeChatLimit(10_000_000)).toBe(MAX_CHAT_LIMIT);
+ expect(normalizeChatLimit(256)).toBe(256);
+ });
+});
+
+describe("unloadCount / initialWindowSize", () => {
+ it("unload is a quarter of the limit, rounded up", () => {
+ expect(unloadCount(100)).toBe(25);
+ expect(unloadCount(256)).toBe(64);
+ expect(unloadCount(10)).toBe(3);
+ });
+
+ it("initial window is 75% of the limit, rounded down", () => {
+ expect(initialWindowSize(100)).toBe(75);
+ expect(initialWindowSize(256)).toBe(192);
+ expect(initialWindowSize(1)).toBe(1); // never below 1
+ });
+});
+
+describe("trimTranscript", () => {
+ it("is the identity at or under the limit", () => {
+ const at = stateWith(chunks(1, 100));
+ expect(trimTranscript(at, 100)).toBe(at);
+ const under = stateWith(chunks(1, 99));
+ expect(trimTranscript(under, 100)).toBe(under);
+ });
+
+ it("unloads exactly a quarter when the limit is first exceeded (100 → 101 drops 25)", () => {
+ const state = stateWith(chunks(1, 101));
+ const next = trimTranscript(state, 100);
+ expect(next.committed).toHaveLength(76);
+ expect(next.committed[0]?.seq).toBe(26);
+ expect(next.hiddenBeforeSeq).toBe(26);
+ });
+
+ it("unloads multiple quarters when trimming was deferred far past the limit", () => {
+ const state = stateWith(chunks(1, 130));
+ const next = trimTranscript(state, 100);
+ // 130 → needs 2 quarters (25 each) to get to ≤ 100 → 80 remain.
+ expect(next.committed).toHaveLength(80);
+ expect(next.committed[0]?.seq).toBe(51);
+ expect(next.hiddenBeforeSeq).toBe(51);
+ });
+
+ it("counts provisional + accumulating toward the limit but never drops them", () => {
+ const base = stateWith(chunks(1, 98));
+ const state: TranscriptState = {
+ ...base,
+ provisional: [
+ { role: "user", chunk: { type: "text", text: "q" } },
+ { role: "assistant", chunk: { type: "text", text: "a" } },
+ ],
+ accumulating: { kind: "text", text: "stream" },
+ };
+ // 98 + 2 + 1 = 101 > 100 → drop 25 committed.
+ const next = trimTranscript(state, 100);
+ expect(next.committed).toHaveLength(73);
+ expect(next.provisional).toHaveLength(2);
+ expect(next.accumulating).not.toBeNull();
+ });
+
+ it("caps the drop at the committed length", () => {
+ const base = stateWith(chunks(1, 2));
+ const provisional = Array.from({ length: 20 }, (_, i) => ({
+ role: "assistant" as const,
+ chunk: { type: "text" as const, text: `p${i}` },
+ }));
+ const state: TranscriptState = { ...base, provisional };
+ const next = trimTranscript(state, 10);
+ expect(next.committed).toHaveLength(0);
+ expect(next.provisional).toHaveLength(20);
+ // Watermark advances past the last dropped committed chunk.
+ expect(next.hiddenBeforeSeq).toBe(3);
+ });
+
+ it("accumulates the hidden thinking count for stable render keys", () => {
+ const committed = [chunk(1, "thinking"), ...chunks(2, 9), chunk(10, "thinking"), chunk(11)];
+ const state = stateWith(committed);
+ const next = trimTranscript(state, 10); // 11 > 10 → drop ceil(10/4)=3 oldest
+ expect(next.committed[0]?.seq).toBe(4);
+ expect(next.hiddenThinkingCount).toBe(1);
+ });
+
+ it("ignores a nonsensical limit", () => {
+ const state = stateWith(chunks(1, 50));
+ expect(trimTranscript(state, 0)).toBe(state);
+ expect(trimTranscript(state, Number.NaN)).toBe(state);
+ });
+});
+
+describe("windowTranscript", () => {
+ it("keeps only the newest maxCommitted chunks and sets the watermark", () => {
+ const state = stateWith(chunks(1, 1000));
+ const next = windowTranscript(state, 75);
+ expect(next.committed).toHaveLength(75);
+ expect(next.committed[0]?.seq).toBe(926);
+ expect(next.hiddenBeforeSeq).toBe(926);
+ expect(selectHasEarlier(next)).toBe(true);
+ });
+
+ it("is the identity within the window", () => {
+ const state = stateWith(chunks(1, 50));
+ expect(windowTranscript(state, 75)).toBe(state);
+ expect(selectHasEarlier(state)).toBe(false);
+ });
+});
+
+describe("applyHistory respects the watermark", () => {
+ it("does not resurrect chunks below hiddenBeforeSeq on a full-cache merge", () => {
+ const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100);
+ expect(trimmed.hiddenBeforeSeq).toBe(26);
+ // A later sync merges the FULL cache (seqs 1..101) — the unloaded prefix must stay out.
+ const merged = applyHistory(trimmed, chunks(1, 101));
+ expect(merged.committed[0]?.seq).toBe(26);
+ expect(merged.committed).toHaveLength(76);
+ });
+
+ it("still merges the tail above the watermark", () => {
+ const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100);
+ const merged = applyHistory(trimmed, chunks(100, 110));
+ expect(merged.committed[merged.committed.length - 1]?.seq).toBe(110);
+ expect(merged.committed[0]?.seq).toBe(26);
+ });
+});
+
+describe("restoreEarlier", () => {
+ it("pages the newest `count` earlier chunks back in and lowers the watermark", () => {
+ const windowed = windowTranscript(stateWith(chunks(1, 1000)), 75); // loaded 926..1000
+ const restored = restoreEarlier(windowed, chunks(1, 1000), 64);
+ expect(restored.committed[0]?.seq).toBe(862);
+ expect(restored.committed).toHaveLength(75 + 64);
+ expect(restored.hiddenBeforeSeq).toBe(862);
+ expect(selectHasEarlier(restored)).toBe(true);
+ });
+
+ it("clears the watermark when the restore exhausts known earlier history", () => {
+ const windowed = windowTranscript(stateWith(chunks(1, 100)), 75); // hidden: 1..25
+ const restored = restoreEarlier(windowed, chunks(1, 100), 64);
+ expect(restored.committed).toHaveLength(100);
+ expect(restored.committed[0]?.seq).toBe(1);
+ expect(restored.hiddenBeforeSeq).toBe(0);
+ expect(restored.hiddenThinkingCount).toBe(0);
+ expect(selectHasEarlier(restored)).toBe(false);
+ });
+
+ it("clears the watermark when nothing is actually below it", () => {
+ const windowed = windowTranscript(stateWith(chunks(50, 200)), 75);
+ const restored = restoreEarlier(windowed, [], 64);
+ expect(restored.hiddenBeforeSeq).toBe(0);
+ expect(restored.committed).toEqual(windowed.committed);
+ });
+
+ it("is the identity when nothing is hidden", () => {
+ const state = stateWith(chunks(1, 10));
+ expect(restoreEarlier(state, chunks(1, 10), 5)).toBe(state);
+ });
+
+ it("decrements the hidden thinking count by the restored thinking chunks", () => {
+ const committed = [chunk(1, "thinking"), chunk(2), chunk(3, "thinking"), ...chunks(4, 12)];
+ const trimmed = trimTranscript(stateWith(committed), 10); // drops 3: seqs 1..3 (2 thinking)
+ expect(trimmed.hiddenThinkingCount).toBe(2);
+ const restored = restoreEarlier(trimmed, committed, 2); // restores seqs 2..3 (1 thinking)
+ expect(restored.hiddenBeforeSeq).toBe(2);
+ expect(restored.hiddenThinkingCount).toBe(1);
+ });
+
+ it("round-trips with trim: trim → restore-all yields the original committed list", () => {
+ const original = chunks(1, 101);
+ const trimmed = trimTranscript(stateWith(original), 100);
+ const restored = restoreEarlier(trimmed, original, 1000);
+ expect(restored.committed).toEqual(original);
+ expect(restored.hiddenBeforeSeq).toBe(0);
+ });
+});
diff --git a/src/core/chunks/trim.ts b/src/core/chunks/trim.ts
new file mode 100644
index 0000000..1733027
--- /dev/null
+++ b/src/core/chunks/trim.ts
@@ -0,0 +1,149 @@
+// Chat-limit windowing for the transcript — PURE policy, zero DOM/Svelte.
+//
+// In very long conversations an unbounded transcript makes the browser crawl, so
+// the FE keeps at most `chat limit` chunks loaded and UNLOADS the oldest ones in
+// BULK: a quarter of the limit at a time (limit 100 → at 101 chunks it unloads 25,
+// leaving 76). Bulk-on-threshold — NOT one-per-delta like old Dispatch — so a trim
+// happens once per ~quarter-limit of new content instead of on every step, which
+// was the old scroll-jump-per-step failure mode. A fresh page load shows only the
+// newest `floor(0.75 × limit)` chunks, leaving headroom before the first trim.
+//
+// Unloading drops COMMITTED chunks only (provisional chunks are the in-flight
+// turn; they become committed at seal and trimmable then) and records the
+// `hiddenBeforeSeq` watermark so history merges can't resurrect them and the
+// "Show earlier messages" affordance knows where to page back in from.
+
+import type { StoredChunk } from "@dispatch/wire";
+import type { TranscriptState } from "./types";
+
+/** Default chat limit (max loaded chunks per conversation). */
+export const DEFAULT_CHAT_LIMIT = 256;
+/** Hard floor for a configured chat limit (a tiny window would thrash). */
+export const MIN_CHAT_LIMIT = 10;
+/** Hard ceiling for a configured chat limit. */
+export const MAX_CHAT_LIMIT = 100_000;
+
+/**
+ * Normalize an untrusted configured limit (e.g. parsed from localStorage):
+ * non-numeric/NaN → the default; otherwise floored + clamped to
+ * [MIN_CHAT_LIMIT, MAX_CHAT_LIMIT].
+ */
+export function normalizeChatLimit(value: unknown): number {
+ if (typeof value !== "number" || !Number.isFinite(value)) return DEFAULT_CHAT_LIMIT;
+ const n = Math.floor(value);
+ if (n < MIN_CHAT_LIMIT) return MIN_CHAT_LIMIT;
+ if (n > MAX_CHAT_LIMIT) return MAX_CHAT_LIMIT;
+ return n;
+}
+
+/** The bulk-unload unit: a quarter of the limit, rounded up. */
+export function unloadCount(limit: number): number {
+ return Math.ceil(limit / 4);
+}
+
+/** The fresh-load window: 75% of the limit, rounded down (≥ 1). */
+export function initialWindowSize(limit: number): number {
+ return Math.max(1, Math.floor(limit * 0.75));
+}
+
+/** Total loaded (rendered) chunk count: committed + provisional + accumulating. */
+function totalCount(state: TranscriptState): number {
+ return state.committed.length + state.provisional.length + (state.accumulating !== null ? 1 : 0);
+}
+
+function countThinking(chunks: readonly StoredChunk[]): number {
+ let n = 0;
+ for (const c of chunks) {
+ if (c.chunk.type === "thinking") n++;
+ }
+ return n;
+}
+
+/** Drop the `drop` oldest committed chunks, advancing the watermark + thinking base. */
+function dropOldest(state: TranscriptState, drop: number): TranscriptState {
+ const dropped = state.committed.slice(0, drop);
+ const kept = state.committed.slice(drop);
+ const first = kept[0];
+ const lastDropped = dropped[dropped.length - 1];
+ let hiddenBeforeSeq = state.hiddenBeforeSeq;
+ if (first !== undefined) {
+ hiddenBeforeSeq = first.seq;
+ } else if (lastDropped !== undefined) {
+ hiddenBeforeSeq = lastDropped.seq + 1;
+ }
+ return {
+ ...state,
+ committed: kept,
+ hiddenBeforeSeq,
+ hiddenThinkingCount: state.hiddenThinkingCount + countThinking(dropped),
+ };
+}
+
+/**
+ * Enforce the chat limit: when the loaded count EXCEEDS `limit`, unload whole
+ * quarters (`unloadCount(limit)` each) of the OLDEST committed chunks until back
+ * at/under the limit — normally exactly one quarter (limit 100: 101 → 76); more
+ * only when trimming was deferred (e.g. while the reader was scrolled up).
+ * At/under the limit this is the identity. Never drops provisional chunks.
+ */
+export function trimTranscript(state: TranscriptState, limit: number): TranscriptState {
+ if (!Number.isFinite(limit) || limit <= 0) return state;
+ const total = totalCount(state);
+ if (total <= limit) return state;
+ const quarter = unloadCount(limit);
+ const passes = Math.ceil((total - limit) / quarter);
+ const drop = Math.min(passes * quarter, state.committed.length);
+ if (drop <= 0) return state;
+ return dropOldest(state, drop);
+}
+
+/**
+ * Window the committed history down to the newest `maxCommitted` chunks (the
+ * fresh-load path: `maxCommitted = initialWindowSize(limit)`). Identity when
+ * already within the window.
+ */
+export function windowTranscript(state: TranscriptState, maxCommitted: number): TranscriptState {
+ if (!Number.isFinite(maxCommitted) || maxCommitted < 0) return state;
+ const drop = state.committed.length - maxCommitted;
+ if (drop <= 0) return state;
+ return dropOldest(state, drop);
+}
+
+/**
+ * Page earlier (unloaded) history back in — the "Show earlier messages" action.
+ *
+ * `earlier` must be ALL locally-known chunks below the watermark (typically the
+ * full cached conversation; chunks at/above the watermark are ignored). The
+ * newest `count` of them are merged back in front of `committed` and the
+ * watermark lowers to the new oldest loaded seq — or clears to 0 when this
+ * restore exhausts the known earlier history (nothing left to offer).
+ */
+export function restoreEarlier(
+ state: TranscriptState,
+ earlier: readonly StoredChunk[],
+ count: number,
+): TranscriptState {
+ if (state.hiddenBeforeSeq <= 0) return state;
+ const below = earlier.filter((c) => c.seq < state.hiddenBeforeSeq).sort((a, b) => a.seq - b.seq);
+ if (below.length === 0) {
+ // Nothing is actually hidden below the watermark: clear it so the
+ // "Show earlier" affordance disappears.
+ return { ...state, hiddenBeforeSeq: 0, hiddenThinkingCount: 0 };
+ }
+ const keep = below.slice(-Math.max(1, count));
+ const exhausted = keep.length === below.length;
+ const firstKept = keep[0];
+ return {
+ ...state,
+ committed: [...keep, ...state.committed],
+ hiddenBeforeSeq: exhausted || firstKept === undefined ? 0 : firstKept.seq,
+ hiddenThinkingCount: exhausted
+ ? 0
+ : Math.max(0, state.hiddenThinkingCount - countThinking(keep)),
+ };
+}
+
+/** Whether unloaded earlier history exists to offer ("Show earlier messages"). */
+export function selectHasEarlier(state: TranscriptState): boolean {
+ return state.hiddenBeforeSeq > 0;
+}
diff --git a/src/core/chunks/types.ts b/src/core/chunks/types.ts
index faa0d3f..14619bd 100644
--- a/src/core/chunks/types.ts
+++ b/src/core/chunks/types.ts
@@ -21,6 +21,23 @@ export interface TranscriptState {
readonly latestUsage: Usage | null;
readonly sealedTurnId: string | null;
/**
+ * The chat-limit UNLOAD watermark: committed chunks with `seq <` this are
+ * unloaded (not in `committed`, not rendered) to keep long transcripts cheap.
+ * `0` = nothing unloaded. `applyHistory` refuses chunks below it (a cache/tail
+ * merge must not resurrect what the trim dropped); "Show earlier messages"
+ * lowers it via `restoreEarlier`. See `trim.ts`.
+ */
+ readonly hiddenBeforeSeq: number;
+ /**
+ * How many thinking-type chunks are currently unloaded below the watermark.
+ * Pure render-key bookkeeping: the UI keys thinking collapses by ORDINAL (so
+ * the key survives the provisional→committed seal transition), and this base
+ * keeps those ordinals stable when a trim removes older thinking chunks —
+ * otherwise every remaining collapse would shift keys and swap/lose its
+ * open state mid-stream.
+ */
+ readonly hiddenThinkingCount: number;
+ /**
* True while a turn is generating on the server — derived STRUCTURALLY from the
* event stream: a `turn-start` (or any turn delta) with no matching `done` /
* `turn-sealed` / `error` yet. A late-joiner that subscribes mid-turn gets the
diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts
index 37049bf..5ca28af 100644
--- a/src/features/chat/store.svelte.ts
+++ b/src/features/chat/store.svelte.ts
@@ -11,9 +11,16 @@ import {
clearGenerating,
foldEvent,
initialState,
+ initialWindowSize,
+ normalizeChatLimit,
+ restoreEarlier,
selectChunks,
selectGenerating,
+ selectHasEarlier,
selectMessages,
+ trimTranscript,
+ unloadCount,
+ windowTranscript,
} from "../../core/chunks";
import type { MetricsState, TurnMetricsEntry } from "../../core/metrics";
import {
@@ -33,6 +40,19 @@ export interface ChatStoreDependencies {
readonly historySync: HistorySync;
readonly metricsSync: MetricsSync;
readonly cache: ConversationCache;
+ /**
+ * The chat limit: max loaded chunks before the oldest quarter is unloaded
+ * (see `core/chunks/trim.ts`). Normalized via `normalizeChatLimit`; absent →
+ * `DEFAULT_CHAT_LIMIT`.
+ */
+ readonly chatLimit?: number;
+ /**
+ * Whether unloading may run RIGHT NOW. The composition root wires this to the
+ * smart-scroll "stuck to bottom" state: while the reader is scrolled up, a
+ * trim would yank the content under them, so it is DEFERRED until they return
+ * to the bottom (the next fold retries). Absent → always allowed.
+ */
+ readonly canUnload?: () => boolean;
}
export interface ChatStore {
@@ -55,11 +75,30 @@ export interface ChatStore {
readonly pendingSync: boolean;
readonly error: string | null;
readonly model: string | undefined;
+ /**
+ * Whether earlier history was unloaded by the chat limit (or never loaded by
+ * the fresh-load window) and can be paged back in — drives the
+ * "Show earlier messages" affordance.
+ */
+ readonly hasEarlier: boolean;
+ /**
+ * Render-key base for thinking collapses: how many thinking chunks are
+ * unloaded below the watermark, so the UI's ordinal keys stay stable across
+ * a trim (see `TranscriptState.hiddenThinkingCount`).
+ */
+ readonly thinkingKeyBase: number;
handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void;
send(text: string): void;
setModel(model: string): void;
load(): Promise<void>;
/**
+ * Page one unload-unit (`ceil(limit/4)`) of earlier history back in from the
+ * local cache — the "Show earlier messages" action. (When the backend ships
+ * CR-5 `?beforeSeq=`, this can fall through to the server once the cache is
+ * exhausted.)
+ */
+ showEarlier(): Promise<void>;
+ /**
* Re-sync after a WS (re)connect. Clears any stale `generating` (a turn may
* have sealed while disconnected — the live `turn-sealed` was missed), then
* pulls newly-sealed turns from history (+ metrics). If the turn is still
@@ -78,6 +117,18 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
let _model = $state<string | undefined>(deps.model);
let disposed = false;
+ const chatLimit = normalizeChatLimit(deps.chatLimit);
+
+ /**
+ * Enforce the chat limit after a transcript mutation — unless the injected
+ * gate says the reader is scrolled up (then defer; the next mutation retries
+ * and `trimTranscript` unloads whole quarters to catch up).
+ */
+ function maybeTrim(): void {
+ if (deps.canUnload !== undefined && !deps.canUnload()) return;
+ transcript = trimTranscript(transcript, chatLimit);
+ }
+
async function syncTail(): Promise<void> {
if (disposed || _pendingSync) return;
_pendingSync = true;
@@ -86,6 +137,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
const res = await deps.historySync(deps.conversationId, since);
const merged = await deps.cache.commit(deps.conversationId, res.chunks);
transcript = applyHistory(transcript, merged);
+ maybeTrim();
_error = null;
} catch (err) {
_error = err instanceof Error ? err.message : String(err);
@@ -130,6 +182,12 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
get model(): string | undefined {
return _model;
},
+ get hasEarlier(): boolean {
+ return selectHasEarlier(transcript);
+ },
+ get thinkingKeyBase(): number {
+ return transcript.hiddenThinkingCount;
+ },
handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void {
if (msg.type === "chat.error") {
@@ -144,6 +202,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
}
transcript = foldEvent(transcript, msg.event);
metrics = foldMetricsEvent(metrics, msg.event);
+ maybeTrim();
if (transcript.sealedTurnId !== null) {
void syncTail();
void syncMetrics();
@@ -152,6 +211,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
send(text: string): void {
transcript = appendUserMessage(transcript, text);
+ maybeTrim();
const msg: ChatSendMessage = {
type: "chat.send",
conversationId: deps.conversationId,
@@ -166,14 +226,27 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore {
},
async load(): Promise<void> {
+ // Fresh load shows only the newest 75% of the limit — headroom before the
+ // first trim. Window the cached slice SYNCHRONOUSLY with its apply (no
+ // render in between), and again after the tail sync (a cold cache means
+ // syncTail pulled the whole history in one response).
+ const windowSize = initialWindowSize(chatLimit);
const cached = await deps.cache.load(deps.conversationId);
if (cached.length > 0) {
- transcript = applyHistory(transcript, cached);
+ transcript = windowTranscript(applyHistory(transcript, cached), windowSize);
}
await syncTail();
+ transcript = windowTranscript(transcript, windowSize);
await syncMetrics();
},
+ async showEarlier(): Promise<void> {
+ if (disposed) return;
+ if (!selectHasEarlier(transcript)) return;
+ const cached = await deps.cache.load(deps.conversationId);
+ transcript = restoreEarlier(transcript, cached, unloadCount(chatLimit));
+ },
+
resync(): void {
if (disposed) return;
// A turn may have sealed while we were disconnected (missed `turn-sealed`):
diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts
index 6507d69..5c798d6 100644
--- a/src/features/chat/store.test.ts
+++ b/src/features/chat/store.test.ts
@@ -892,6 +892,295 @@ describe("createChatStore", () => {
store.dispose();
});
+ it("chat limit: crossing the limit unloads the oldest quarter in one bulk pass", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ });
+
+ // Commit exactly 100 chunks via a sealed turn (at the limit — no trim).
+ const hundred = Array.from({ length: 100 }, (_, i) => makeStoredChunk(i + 1));
+ historySync.returnChunks = hundred;
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" }));
+ await vi.waitFor(() => {
+ expect(store.chunks).toHaveLength(100);
+ });
+ expect(store.hasEarlier).toBe(false);
+
+ // The 101st chunk (a live tool-call) crosses the limit → 25 unload → 76 remain.
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t2" }));
+ store.handleDelta(
+ deltaEvent({
+ type: "tool-call",
+ conversationId: CONV_ID,
+ turnId: "t2",
+ toolCallId: "tc1",
+ toolName: "probe",
+ input: {},
+ stepId: "t2#0" as StepId,
+ }),
+ );
+
+ expect(store.chunks).toHaveLength(76);
+ expect(store.chunks[0]?.seq).toBe(26);
+ expect(store.hasEarlier).toBe(true);
+
+ store.dispose();
+ });
+
+ it("chat limit: unloading is deferred while the gate is closed, then catches up", () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ let atBottom = false; // reader scrolled up
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 10,
+ canUnload: () => atBottom,
+ });
+
+ // 15 live tool-calls: over the limit, but the gate defers every trim.
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ for (let i = 0; i < 15; i++) {
+ store.handleDelta(
+ deltaEvent({
+ type: "tool-call",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ toolCallId: `tc${i}`,
+ toolName: "probe",
+ input: {},
+ stepId: `t1#${i}` as StepId,
+ }),
+ );
+ }
+ expect(store.chunks).toHaveLength(15);
+
+ // Reader returns to the bottom — but provisional chunks are never unloaded,
+ // so the deferred trim still can't shrink an all-provisional transcript.
+ atBottom = true;
+ store.handleDelta(
+ deltaEvent({
+ type: "tool-call",
+ conversationId: CONV_ID,
+ turnId: "t1",
+ toolCallId: "tc15",
+ toolName: "probe",
+ input: {},
+ stepId: "t1#15" as StepId,
+ }),
+ );
+ expect(store.chunks).toHaveLength(16);
+
+ store.dispose();
+ });
+
+ it("chat limit: a deferred trim catches up across committed history once the gate opens", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ let atBottom = false;
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ canUnload: () => atBottom,
+ });
+
+ // Seal a turn committing 130 chunks while the reader is scrolled up: no trim.
+ historySync.returnChunks = Array.from({ length: 130 }, (_, i) => makeStoredChunk(i + 1));
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" }));
+ store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" }));
+ await vi.waitFor(() => {
+ expect(store.chunks).toHaveLength(130);
+ });
+
+ // Back at the bottom: the next fold trims whole quarters down to ≤ 100.
+ atBottom = true;
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t2" }));
+ // 130 → 2 quarters of 25 → 80 committed (turn-start adds no chunk).
+ expect(store.chunks).toHaveLength(80);
+ expect(store.chunks[0]?.seq).toBe(51);
+
+ store.dispose();
+ });
+
+ it("chat limit: load windows a long cached conversation to 75% of the limit", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ await cache.impl.commit(
+ CONV_ID,
+ Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)),
+ );
+
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ });
+
+ await store.load();
+
+ // floor(100 × 0.75) = 75 newest chunks: seqs 426..500.
+ expect(store.chunks).toHaveLength(75);
+ expect(store.chunks[0]?.seq).toBe(426);
+ expect(store.hasEarlier).toBe(true);
+ // The tail sync still used the cache's real cursor (not the window's edge).
+ expect(historySync.calls[0]?.sinceSeq).toBe(500);
+
+ store.dispose();
+ });
+
+ it("chat limit: a cold cache (fresh browser) windows the full server history to 75%", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ // Backend has no limit param yet (CR-5): sinceSeq=0 returns EVERYTHING.
+ historySync.returnChunks = Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1));
+
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ });
+
+ await store.load();
+
+ expect(store.chunks).toHaveLength(75);
+ expect(store.chunks[0]?.seq).toBe(426);
+ expect(store.hasEarlier).toBe(true);
+ // The full history is still CACHED locally (show-earlier pages from it).
+ const cached = await cache.impl.load(CONV_ID);
+ expect(cached).toHaveLength(500);
+
+ store.dispose();
+ });
+
+ it("chat limit: showEarlier pages a quarter back in from the cache", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ await cache.impl.commit(
+ CONV_ID,
+ Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)),
+ );
+
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ });
+
+ await store.load();
+ expect(store.chunks[0]?.seq).toBe(426);
+
+ await store.showEarlier(); // +ceil(100/4) = 25 older chunks
+ expect(store.chunks).toHaveLength(100);
+ expect(store.chunks[0]?.seq).toBe(401);
+ expect(store.hasEarlier).toBe(true);
+
+ store.dispose();
+ });
+
+ it("chat limit: showEarlier clears hasEarlier when the cache is exhausted", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ await cache.impl.commit(
+ CONV_ID,
+ Array.from({ length: 80 }, (_, i) => makeStoredChunk(i + 1)),
+ );
+
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ });
+
+ await store.load(); // window 75: hidden 1..5
+ expect(store.chunks).toHaveLength(75);
+ expect(store.hasEarlier).toBe(true);
+
+ await store.showEarlier(); // restores all 5 → nothing left below
+ expect(store.chunks).toHaveLength(80);
+ expect(store.chunks[0]?.seq).toBe(1);
+ expect(store.hasEarlier).toBe(false);
+
+ store.dispose();
+ });
+
+ it("chat limit: a post-trim history sync does not resurrect unloaded chunks", async () => {
+ const transport = createFakeTransport();
+ const historySync = createFakeHistorySync();
+ const metricsSync = createFakeMetricsSync();
+ const cache = createFakeCache();
+ await cache.impl.commit(
+ CONV_ID,
+ Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)),
+ );
+
+ const store = createChatStore({
+ conversationId: CONV_ID,
+ transport: transport.impl,
+ historySync: historySync.impl,
+ metricsSync: metricsSync.impl,
+ cache: cache.impl,
+ chatLimit: 100,
+ });
+
+ await store.load();
+ expect(store.chunks[0]?.seq).toBe(426);
+
+ // A sealed turn triggers syncTail, whose cache.commit returns the FULL
+ // merged cache (seqs 1..501) — the watermark must keep 1..425 out.
+ historySync.returnChunks = [makeStoredChunk(501)];
+ store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t9" }));
+ store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t9" }));
+
+ await vi.waitFor(() => {
+ expect(store.chunks[store.chunks.length - 1]?.seq).toBe(501);
+ });
+ expect(store.chunks[0]?.seq).toBe(426);
+ expect(store.chunks).toHaveLength(76);
+
+ store.dispose();
+ });
+
it("resync is a no-op after dispose", async () => {
const transport = createFakeTransport();
const historySync = createFakeHistorySync();
diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts
index 278b2cf..7174821 100644
--- a/src/features/chat/ui.test.ts
+++ b/src/features/chat/ui.test.ts
@@ -41,6 +41,45 @@ describe("ChatView", () => {
expect(screen.getByText("Hello!")).toBeInTheDocument();
});
+ it("shows the show-earlier button only when earlier history is unloaded, and pages it in", async () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 26, role: "user", chunk: { type: "text", text: "later" }, provisional: false },
+ ];
+
+ let resolveEarlier: (() => void) | undefined;
+ const onShowEarlier = vi.fn(
+ () =>
+ new Promise<void>((resolve) => {
+ resolveEarlier = resolve;
+ }),
+ );
+
+ render(ChatView, { props: { chunks, hasEarlier: true, onShowEarlier } });
+
+ const button = screen.getByRole("button", { name: /show earlier messages/i });
+ const user = userEvent.setup();
+ await user.click(button);
+
+ expect(onShowEarlier).toHaveBeenCalledTimes(1);
+ // While the page-in is awaited the button is disabled (no double-fire).
+ expect(screen.getByRole("button", { name: /loading earlier messages/i })).toBeDisabled();
+
+ resolveEarlier?.();
+ await vi.waitFor(() => {
+ expect(screen.getByRole("button", { name: /show earlier messages/i })).toBeEnabled();
+ });
+ });
+
+ it("hides the show-earlier button when nothing is unloaded", () => {
+ const chunks: RenderedChunk[] = [
+ { seq: 1, role: "user", chunk: { type: "text", text: "all here" }, provisional: false },
+ ];
+
+ render(ChatView, { props: { chunks, hasEarlier: false, onShowEarlier: vi.fn() } });
+
+ expect(screen.queryByRole("button", { name: /show earlier/i })).not.toBeInTheDocument();
+ });
+
it("renders tool-call chunks", () => {
const chunks: RenderedChunk[] = [
{
diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte
index 00691aa..d1d7709 100644
--- a/src/features/chat/ui/ChatView.svelte
+++ b/src/features/chat/ui/ChatView.svelte
@@ -19,21 +19,48 @@
let {
chunks,
turnMetrics = [],
+ hasEarlier = false,
+ onShowEarlier,
+ thinkingKeyBase = 0,
}: {
chunks: readonly RenderedChunk[];
turnMetrics?: readonly TurnMetricsEntry[];
+ /** Earlier history is unloaded (chat limit) and can be paged back in. */
+ hasEarlier?: boolean;
+ /** Page earlier history back in; the caller owns scroll-position preservation. */
+ onShowEarlier?: () => Promise<void>;
+ /**
+ * Ordinal base for thinking-collapse keys: the count of thinking chunks
+ * unloaded by the chat limit, so the remaining ordinals don't shift (and
+ * swap collapse state) when a trim removes older thinking blocks.
+ */
+ thinkingKeyBase?: number;
} = $props();
+ // True while a show-earlier page-in is awaited (disables the button).
+ let loadingEarlier = $state(false);
+
+ async function showEarlier() {
+ if (!onShowEarlier || loadingEarlier) return;
+ loadingEarlier = true;
+ try {
+ await onShowEarlier();
+ } finally {
+ loadingEarlier = false;
+ }
+ }
+
const groups = $derived(groupRenderedChunks(chunks));
const rows = $derived(interleaveTurnMetrics(groups, turnMetrics));
// Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that
// survives the provisional→committed (seq null → seq N) transition, so the
- // collapse's open/close state is NOT lost when a turn seals. (App isolates
- // these keys per conversation via {#key}.)
+ // collapse's open/close state is NOT lost when a turn seals. The ordinal
+ // starts at `thinkingKeyBase` so keys also survive a chat-limit trim removing
+ // older thinking blocks. (App isolates these keys per conversation via {#key}.)
const keyedRows = $derived.by(() => {
- let thinking = 0;
+ let thinking = thinkingKeyBase;
return rows.map((row, i) => {
if (row.kind === "step-metrics") {
return { row, key: `s${row.step.stepId}` };
@@ -132,6 +159,19 @@
{/snippet}
<div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite">
+ {#if hasEarlier && onShowEarlier}
+ <!-- Chat limit: older chunks are unloaded; offer to page them back in. -->
+ <div class="flex justify-center">
+ <button class="btn btn-ghost btn-xs" disabled={loadingEarlier} onclick={showEarlier}>
+ {#if loadingEarlier}
+ <span class="loading loading-spinner loading-xs" aria-hidden="true"></span>
+ Loading earlier messages…
+ {:else}
+ Show earlier messages
+ {/if}
+ </button>
+ </div>
+ {/if}
{#each keyedRows as { row, key } (key)}
{#if row.kind === "step-metrics"}
{@const sv = viewStepMetrics(row.step, row.index)}
diff --git a/src/features/smart-scroll/ui/controller.svelte.ts b/src/features/smart-scroll/ui/controller.svelte.ts
index 99d53ca..dbe65d1 100644
--- a/src/features/smart-scroll/ui/controller.svelte.ts
+++ b/src/features/smart-scroll/ui/controller.svelte.ts
@@ -22,6 +22,12 @@ export interface SmartScrollController {
/** Reactive: show the "scroll to bottom" affordance (the user has scrolled up). */
readonly showButton: boolean;
/**
+ * Non-reactive point-in-time query: is the view stuck to the bottom right now?
+ * For imperative callers (e.g. the chat-limit unload gate) that poll at event
+ * time rather than subscribing — reads the reducer state, not a rune.
+ */
+ isAtBottom(): boolean;
+ /**
* Attach to the scroll container; returns a teardown to call on unmount.
* Pass the inner CONTENT element to also follow height changes that aren't a
* transcript update (async markdown/highlight, image loads, a collapse toggling,
@@ -84,6 +90,10 @@ export function createSmartScrollController(): SmartScrollController {
return showButton;
},
+ isAtBottom(): boolean {
+ return state.stuck;
+ },
+
attach(node: HTMLElement, content?: HTMLElement): () => void {
el = node;
node.addEventListener("scroll", handleScroll, { passive: true });