feat(chat): chat limit — bulk quarter-unload, 75% fresh-load window, show-earlier page-in

Long transcripts no longer grow unbounded: past the chat limit (default 256 chunks, localStorage dispatch.chatLimit) the oldest ceil(limit/4) committed chunks are unloaded in ONE bulk pass — never one-per-delta (old Dispatch's scroll-jump-per-step bug) — and only while the reader is stuck to the bottom (scrolled-up readers defer the trim; it catches up in whole quarters). A fresh page load windows to the newest floor(0.75*limit). Unloading is purely local (IndexedDB cache + server keep everything); a hiddenBeforeSeq watermark keeps history merges from resurrecting unloaded chunks, and a 'Show earlier messages' affordance pages a quarter back in from the cache with scroll-anchor preservation. Thinking-collapse render keys stay stable across trims via a hiddenThinkingCount ordinal base. - core/chunks/trim.ts: pure policy (trim/window/restore/normalize) + tests - chat store: chatLimit + canUnload deps, windowed load, showEarlier() - composition root: dispatch.chatLimit localStorage knob + unload gate wired to smart-scroll isAtBottom() - backend CR-5 OPENED (not a blocker): ?limit=/?beforeSeq= on GET /conversations/:id (courier backend-handoff-chat-limit.md) - scripts/live-probe.ts: fix pre-existing stale TurnMetricsEntry reads (m1.usage -> total.usage) that crashed the probe; 17/17 live checks pass
author: Adam Malczewski <[email protected]> 2026-06-12 18:26:00 +0900
committer: Adam Malczewski <[email protected]> 2026-06-12 18:26:00 +0900
commit: 1764e3e5dff836255d121a933dd92542368346f9 (patch)
tree: b835055de0f0f1fd9750741764dac8b30f7498bf /src/core/chunks
parent: 4001274e3ba25a3946df1e9f2dc82ca6781cd2bf (diff)
download: dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.tar.gz
dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.zip
5 files changed, 406 insertions, 1 deletions
diff --git a/src/core/chunks/index.ts b/src/core/chunks/index.ts
index ecfee74..6ab0f35 100644
--- a/src/core/chunks/index.ts
+++ b/src/core/chunks/index.ts
@@ -8,6 +8,18 @@ export {
 	initialState,
 } from "./reducer";
 export { selectChunks, selectGenerating, selectMessages } from "./selectors";
+export {
+	DEFAULT_CHAT_LIMIT,
+	initialWindowSize,
+	MAX_CHAT_LIMIT,
+	MIN_CHAT_LIMIT,
+	normalizeChatLimit,
+	restoreEarlier,
+	selectHasEarlier,
+	trimTranscript,
+	unloadCount,
+	windowTranscript,
+} from "./trim";
 export type {
 	AccumulatingChunk,
 	ProvisionalChunk,
diff --git a/src/core/chunks/reducer.ts b/src/core/chunks/reducer.ts
index 7ce55ce..0a57839 100644
--- a/src/core/chunks/reducer.ts
+++ b/src/core/chunks/reducer.ts
@@ -10,6 +10,8 @@ export function initialState(): TranscriptState {
 		currentTurnId: null,
 		latestUsage: null,
 		sealedTurnId: null,
+		hiddenBeforeSeq: 0,
+		hiddenThinkingCount: 0,
 		generating: false,
 	};
 }
@@ -41,6 +43,10 @@ function flushAccumulating(
  * Dedupes by seq (new wins), keeps seq-monotonic order, idempotent.
  * When sealedTurnId is set, drops all provisional chunks (now superseded)
  * and clears sealedTurnId.
+ *
+ * Chunks below the chat-limit unload watermark (`hiddenBeforeSeq`) are
+ * REJECTED: a full-cache or tail merge must not resurrect what the trim
+ * unloaded. Restoring earlier history goes through `restoreEarlier` instead.
  */
 export function applyHistory(
 	state: TranscriptState,
@@ -48,7 +54,10 @@ export function applyHistory(
 ): TranscriptState {
 	const seqMap = new Map<number, StoredChunk>();
 	for (const c of state.committed) seqMap.set(c.seq, c);
-	for (const c of chunks) seqMap.set(c.seq, c);
+	for (const c of chunks) {
+		if (c.seq < state.hiddenBeforeSeq) continue;
+		seqMap.set(c.seq, c);
+	}
 	const committed = Array.from(seqMap.values()).sort((a, b) => a.seq - b.seq);
 
 	if (state.sealedTurnId !== null) {
diff --git a/src/core/chunks/trim.test.ts b/src/core/chunks/trim.test.ts
new file mode 100644
index 0000000..091b646
--- /dev/null
+++ b/src/core/chunks/trim.test.ts
@@ -0,0 +1,218 @@
+import type { StoredChunk } from "@dispatch/wire";
+import { describe, expect, it } from "vitest";
+import { applyHistory, initialState } from "./reducer";
+import {
+	DEFAULT_CHAT_LIMIT,
+	initialWindowSize,
+	MAX_CHAT_LIMIT,
+	MIN_CHAT_LIMIT,
+	normalizeChatLimit,
+	restoreEarlier,
+	selectHasEarlier,
+	trimTranscript,
+	unloadCount,
+	windowTranscript,
+} from "./trim";
+import type { TranscriptState } from "./types";
+
+function chunk(seq: number, type: "text" | "thinking" = "text"): StoredChunk {
+	return { seq, role: "assistant", chunk: { type, text: `c${seq}` } };
+}
+
+function chunks(from: number, to: number): StoredChunk[] {
+	const out: StoredChunk[] = [];
+	for (let seq = from; seq <= to; seq++) out.push(chunk(seq));
+	return out;
+}
+
+function stateWith(committed: readonly StoredChunk[]): TranscriptState {
+	return { ...initialState(), committed };
+}
+
+describe("normalizeChatLimit", () => {
+	it("defaults non-numeric / NaN / missing values", () => {
+		expect(normalizeChatLimit(undefined)).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit(null)).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit("100")).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit(Number.NaN)).toBe(DEFAULT_CHAT_LIMIT);
+		expect(normalizeChatLimit(Number.POSITIVE_INFINITY)).toBe(DEFAULT_CHAT_LIMIT);
+	});
+
+	it("floors and clamps numeric values", () => {
+		expect(normalizeChatLimit(100.9)).toBe(100);
+		expect(normalizeChatLimit(0)).toBe(MIN_CHAT_LIMIT);
+		expect(normalizeChatLimit(-5)).toBe(MIN_CHAT_LIMIT);
+		expect(normalizeChatLimit(10_000_000)).toBe(MAX_CHAT_LIMIT);
+		expect(normalizeChatLimit(256)).toBe(256);
+	});
+});
+
+describe("unloadCount / initialWindowSize", () => {
+	it("unload is a quarter of the limit, rounded up", () => {
+		expect(unloadCount(100)).toBe(25);
+		expect(unloadCount(256)).toBe(64);
+		expect(unloadCount(10)).toBe(3);
+	});
+
+	it("initial window is 75% of the limit, rounded down", () => {
+		expect(initialWindowSize(100)).toBe(75);
+		expect(initialWindowSize(256)).toBe(192);
+		expect(initialWindowSize(1)).toBe(1); // never below 1
+	});
+});
+
+describe("trimTranscript", () => {
+	it("is the identity at or under the limit", () => {
+		const at = stateWith(chunks(1, 100));
+		expect(trimTranscript(at, 100)).toBe(at);
+		const under = stateWith(chunks(1, 99));
+		expect(trimTranscript(under, 100)).toBe(under);
+	});
+
+	it("unloads exactly a quarter when the limit is first exceeded (100 → 101 drops 25)", () => {
+		const state = stateWith(chunks(1, 101));
+		const next = trimTranscript(state, 100);
+		expect(next.committed).toHaveLength(76);
+		expect(next.committed[0]?.seq).toBe(26);
+		expect(next.hiddenBeforeSeq).toBe(26);
+	});
+
+	it("unloads multiple quarters when trimming was deferred far past the limit", () => {
+		const state = stateWith(chunks(1, 130));
+		const next = trimTranscript(state, 100);
+		// 130 → needs 2 quarters (25 each) to get to ≤ 100 → 80 remain.
+		expect(next.committed).toHaveLength(80);
+		expect(next.committed[0]?.seq).toBe(51);
+		expect(next.hiddenBeforeSeq).toBe(51);
+	});
+
+	it("counts provisional + accumulating toward the limit but never drops them", () => {
+		const base = stateWith(chunks(1, 98));
+		const state: TranscriptState = {
+			...base,
+			provisional: [
+				{ role: "user", chunk: { type: "text", text: "q" } },
+				{ role: "assistant", chunk: { type: "text", text: "a" } },
+			],
+			accumulating: { kind: "text", text: "stream" },
+		};
+		// 98 + 2 + 1 = 101 > 100 → drop 25 committed.
+		const next = trimTranscript(state, 100);
+		expect(next.committed).toHaveLength(73);
+		expect(next.provisional).toHaveLength(2);
+		expect(next.accumulating).not.toBeNull();
+	});
+
+	it("caps the drop at the committed length", () => {
+		const base = stateWith(chunks(1, 2));
+		const provisional = Array.from({ length: 20 }, (_, i) => ({
+			role: "assistant" as const,
+			chunk: { type: "text" as const, text: `p${i}` },
+		}));
+		const state: TranscriptState = { ...base, provisional };
+		const next = trimTranscript(state, 10);
+		expect(next.committed).toHaveLength(0);
+		expect(next.provisional).toHaveLength(20);
+		// Watermark advances past the last dropped committed chunk.
+		expect(next.hiddenBeforeSeq).toBe(3);
+	});
+
+	it("accumulates the hidden thinking count for stable render keys", () => {
+		const committed = [chunk(1, "thinking"), ...chunks(2, 9), chunk(10, "thinking"), chunk(11)];
+		const state = stateWith(committed);
+		const next = trimTranscript(state, 10); // 11 > 10 → drop ceil(10/4)=3 oldest
+		expect(next.committed[0]?.seq).toBe(4);
+		expect(next.hiddenThinkingCount).toBe(1);
+	});
+
+	it("ignores a nonsensical limit", () => {
+		const state = stateWith(chunks(1, 50));
+		expect(trimTranscript(state, 0)).toBe(state);
+		expect(trimTranscript(state, Number.NaN)).toBe(state);
+	});
+});
+
+describe("windowTranscript", () => {
+	it("keeps only the newest maxCommitted chunks and sets the watermark", () => {
+		const state = stateWith(chunks(1, 1000));
+		const next = windowTranscript(state, 75);
+		expect(next.committed).toHaveLength(75);
+		expect(next.committed[0]?.seq).toBe(926);
+		expect(next.hiddenBeforeSeq).toBe(926);
+		expect(selectHasEarlier(next)).toBe(true);
+	});
+
+	it("is the identity within the window", () => {
+		const state = stateWith(chunks(1, 50));
+		expect(windowTranscript(state, 75)).toBe(state);
+		expect(selectHasEarlier(state)).toBe(false);
+	});
+});
+
+describe("applyHistory respects the watermark", () => {
+	it("does not resurrect chunks below hiddenBeforeSeq on a full-cache merge", () => {
+		const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100);
+		expect(trimmed.hiddenBeforeSeq).toBe(26);
+		// A later sync merges the FULL cache (seqs 1..101) — the unloaded prefix must stay out.
+		const merged = applyHistory(trimmed, chunks(1, 101));
+		expect(merged.committed[0]?.seq).toBe(26);
+		expect(merged.committed).toHaveLength(76);
+	});
+
+	it("still merges the tail above the watermark", () => {
+		const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100);
+		const merged = applyHistory(trimmed, chunks(100, 110));
+		expect(merged.committed[merged.committed.length - 1]?.seq).toBe(110);
+		expect(merged.committed[0]?.seq).toBe(26);
+	});
+});
+
+describe("restoreEarlier", () => {
+	it("pages the newest `count` earlier chunks back in and lowers the watermark", () => {
+		const windowed = windowTranscript(stateWith(chunks(1, 1000)), 75); // loaded 926..1000
+		const restored = restoreEarlier(windowed, chunks(1, 1000), 64);
+		expect(restored.committed[0]?.seq).toBe(862);
+		expect(restored.committed).toHaveLength(75 + 64);
+		expect(restored.hiddenBeforeSeq).toBe(862);
+		expect(selectHasEarlier(restored)).toBe(true);
+	});
+
+	it("clears the watermark when the restore exhausts known earlier history", () => {
+		const windowed = windowTranscript(stateWith(chunks(1, 100)), 75); // hidden: 1..25
+		const restored = restoreEarlier(windowed, chunks(1, 100), 64);
+		expect(restored.committed).toHaveLength(100);
+		expect(restored.committed[0]?.seq).toBe(1);
+		expect(restored.hiddenBeforeSeq).toBe(0);
+		expect(restored.hiddenThinkingCount).toBe(0);
+		expect(selectHasEarlier(restored)).toBe(false);
+	});
+
+	it("clears the watermark when nothing is actually below it", () => {
+		const windowed = windowTranscript(stateWith(chunks(50, 200)), 75);
+		const restored = restoreEarlier(windowed, [], 64);
+		expect(restored.hiddenBeforeSeq).toBe(0);
+		expect(restored.committed).toEqual(windowed.committed);
+	});
+
+	it("is the identity when nothing is hidden", () => {
+		const state = stateWith(chunks(1, 10));
+		expect(restoreEarlier(state, chunks(1, 10), 5)).toBe(state);
+	});
+
+	it("decrements the hidden thinking count by the restored thinking chunks", () => {
+		const committed = [chunk(1, "thinking"), chunk(2), chunk(3, "thinking"), ...chunks(4, 12)];
+		const trimmed = trimTranscript(stateWith(committed), 10); // drops 3: seqs 1..3 (2 thinking)
+		expect(trimmed.hiddenThinkingCount).toBe(2);
+		const restored = restoreEarlier(trimmed, committed, 2); // restores seqs 2..3 (1 thinking)
+		expect(restored.hiddenBeforeSeq).toBe(2);
+		expect(restored.hiddenThinkingCount).toBe(1);
+	});
+
+	it("round-trips with trim: trim → restore-all yields the original committed list", () => {
+		const original = chunks(1, 101);
+		const trimmed = trimTranscript(stateWith(original), 100);
+		const restored = restoreEarlier(trimmed, original, 1000);
+		expect(restored.committed).toEqual(original);
+		expect(restored.hiddenBeforeSeq).toBe(0);
+	});
+});
diff --git a/src/core/chunks/trim.ts b/src/core/chunks/trim.ts
new file mode 100644
index 0000000..1733027
--- /dev/null
+++ b/src/core/chunks/trim.ts
@@ -0,0 +1,149 @@
+// Chat-limit windowing for the transcript — PURE policy, zero DOM/Svelte.
+//
+// In very long conversations an unbounded transcript makes the browser crawl, so
+// the FE keeps at most `chat limit` chunks loaded and UNLOADS the oldest ones in
+// BULK: a quarter of the limit at a time (limit 100 → at 101 chunks it unloads 25,
+// leaving 76). Bulk-on-threshold — NOT one-per-delta like old Dispatch — so a trim
+// happens once per ~quarter-limit of new content instead of on every step, which
+// was the old scroll-jump-per-step failure mode. A fresh page load shows only the
+// newest `floor(0.75 × limit)` chunks, leaving headroom before the first trim.
+//
+// Unloading drops COMMITTED chunks only (provisional chunks are the in-flight
+// turn; they become committed at seal and trimmable then) and records the
+// `hiddenBeforeSeq` watermark so history merges can't resurrect them and the
+// "Show earlier messages" affordance knows where to page back in from.
+
+import type { StoredChunk } from "@dispatch/wire";
+import type { TranscriptState } from "./types";
+
+/** Default chat limit (max loaded chunks per conversation). */
+export const DEFAULT_CHAT_LIMIT = 256;
+/** Hard floor for a configured chat limit (a tiny window would thrash). */
+export const MIN_CHAT_LIMIT = 10;
+/** Hard ceiling for a configured chat limit. */
+export const MAX_CHAT_LIMIT = 100_000;
+
+/**
+ * Normalize an untrusted configured limit (e.g. parsed from localStorage):
+ * non-numeric/NaN → the default; otherwise floored + clamped to
+ * [MIN_CHAT_LIMIT, MAX_CHAT_LIMIT].
+ */
+export function normalizeChatLimit(value: unknown): number {
+	if (typeof value !== "number" || !Number.isFinite(value)) return DEFAULT_CHAT_LIMIT;
+	const n = Math.floor(value);
+	if (n < MIN_CHAT_LIMIT) return MIN_CHAT_LIMIT;
+	if (n > MAX_CHAT_LIMIT) return MAX_CHAT_LIMIT;
+	return n;
+}
+
+/** The bulk-unload unit: a quarter of the limit, rounded up. */
+export function unloadCount(limit: number): number {
+	return Math.ceil(limit / 4);
+}
+
+/** The fresh-load window: 75% of the limit, rounded down (≥ 1). */
+export function initialWindowSize(limit: number): number {
+	return Math.max(1, Math.floor(limit * 0.75));
+}
+
+/** Total loaded (rendered) chunk count: committed + provisional + accumulating. */
+function totalCount(state: TranscriptState): number {
+	return state.committed.length + state.provisional.length + (state.accumulating !== null ? 1 : 0);
+}
+
+function countThinking(chunks: readonly StoredChunk[]): number {
+	let n = 0;
+	for (const c of chunks) {
+		if (c.chunk.type === "thinking") n++;
+	}
+	return n;
+}
+
+/** Drop the `drop` oldest committed chunks, advancing the watermark + thinking base. */
+function dropOldest(state: TranscriptState, drop: number): TranscriptState {
+	const dropped = state.committed.slice(0, drop);
+	const kept = state.committed.slice(drop);
+	const first = kept[0];
+	const lastDropped = dropped[dropped.length - 1];
+	let hiddenBeforeSeq = state.hiddenBeforeSeq;
+	if (first !== undefined) {
+		hiddenBeforeSeq = first.seq;
+	} else if (lastDropped !== undefined) {
+		hiddenBeforeSeq = lastDropped.seq + 1;
+	}
+	return {
+		...state,
+		committed: kept,
+		hiddenBeforeSeq,
+		hiddenThinkingCount: state.hiddenThinkingCount + countThinking(dropped),
+	};
+}
+
+/**
+ * Enforce the chat limit: when the loaded count EXCEEDS `limit`, unload whole
+ * quarters (`unloadCount(limit)` each) of the OLDEST committed chunks until back
+ * at/under the limit — normally exactly one quarter (limit 100: 101 → 76); more
+ * only when trimming was deferred (e.g. while the reader was scrolled up).
+ * At/under the limit this is the identity. Never drops provisional chunks.
+ */
+export function trimTranscript(state: TranscriptState, limit: number): TranscriptState {
+	if (!Number.isFinite(limit) || limit <= 0) return state;
+	const total = totalCount(state);
+	if (total <= limit) return state;
+	const quarter = unloadCount(limit);
+	const passes = Math.ceil((total - limit) / quarter);
+	const drop = Math.min(passes * quarter, state.committed.length);
+	if (drop <= 0) return state;
+	return dropOldest(state, drop);
+}
+
+/**
+ * Window the committed history down to the newest `maxCommitted` chunks (the
+ * fresh-load path: `maxCommitted = initialWindowSize(limit)`). Identity when
+ * already within the window.
+ */
+export function windowTranscript(state: TranscriptState, maxCommitted: number): TranscriptState {
+	if (!Number.isFinite(maxCommitted) || maxCommitted < 0) return state;
+	const drop = state.committed.length - maxCommitted;
+	if (drop <= 0) return state;
+	return dropOldest(state, drop);
+}
+
+/**
+ * Page earlier (unloaded) history back in — the "Show earlier messages" action.
+ *
+ * `earlier` must be ALL locally-known chunks below the watermark (typically the
+ * full cached conversation; chunks at/above the watermark are ignored). The
+ * newest `count` of them are merged back in front of `committed` and the
+ * watermark lowers to the new oldest loaded seq — or clears to 0 when this
+ * restore exhausts the known earlier history (nothing left to offer).
+ */
+export function restoreEarlier(
+	state: TranscriptState,
+	earlier: readonly StoredChunk[],
+	count: number,
+): TranscriptState {
+	if (state.hiddenBeforeSeq <= 0) return state;
+	const below = earlier.filter((c) => c.seq < state.hiddenBeforeSeq).sort((a, b) => a.seq - b.seq);
+	if (below.length === 0) {
+		// Nothing is actually hidden below the watermark: clear it so the
+		// "Show earlier" affordance disappears.
+		return { ...state, hiddenBeforeSeq: 0, hiddenThinkingCount: 0 };
+	}
+	const keep = below.slice(-Math.max(1, count));
+	const exhausted = keep.length === below.length;
+	const firstKept = keep[0];
+	return {
+		...state,
+		committed: [...keep, ...state.committed],
+		hiddenBeforeSeq: exhausted || firstKept === undefined ? 0 : firstKept.seq,
+		hiddenThinkingCount: exhausted
+			? 0
+			: Math.max(0, state.hiddenThinkingCount - countThinking(keep)),
+	};
+}
+
+/** Whether unloaded earlier history exists to offer ("Show earlier messages"). */
+export function selectHasEarlier(state: TranscriptState): boolean {
+	return state.hiddenBeforeSeq > 0;
+}
diff --git a/src/core/chunks/types.ts b/src/core/chunks/types.ts
index faa0d3f..14619bd 100644
--- a/src/core/chunks/types.ts
+++ b/src/core/chunks/types.ts
@@ -21,6 +21,23 @@ export interface TranscriptState {
 	readonly latestUsage: Usage | null;
 	readonly sealedTurnId: string | null;
 	/**
+	 * The chat-limit UNLOAD watermark: committed chunks with `seq <` this are
+	 * unloaded (not in `committed`, not rendered) to keep long transcripts cheap.
+	 * `0` = nothing unloaded. `applyHistory` refuses chunks below it (a cache/tail
+	 * merge must not resurrect what the trim dropped); "Show earlier messages"
+	 * lowers it via `restoreEarlier`. See `trim.ts`.
+	 */
+	readonly hiddenBeforeSeq: number;
+	/**
+	 * How many thinking-type chunks are currently unloaded below the watermark.
+	 * Pure render-key bookkeeping: the UI keys thinking collapses by ORDINAL (so
+	 * the key survives the provisional→committed seal transition), and this base
+	 * keeps those ordinals stable when a trim removes older thinking chunks —
+	 * otherwise every remaining collapse would shift keys and swap/lose its
+	 * open state mid-stream.
+	 */
+	readonly hiddenThinkingCount: number;
+	/**
 	 * True while a turn is generating on the server — derived STRUCTURALLY from the
 	 * event stream: a `turn-start` (or any turn delta) with no matching `done` /
 	 * `turn-sealed` / `error` yet. A late-joiner that subscribes mid-turn gets the
author	Adam Malczewski <[email protected]>	2026-06-12 18:26:00 +0900
committer	Adam Malczewski <[email protected]>	2026-06-12 18:26:00 +0900
commit	1764e3e5dff836255d121a933dd92542368346f9 (patch)
tree	b835055de0f0f1fd9750741764dac8b30f7498bf /src/core/chunks
parent	4001274e3ba25a3946df1e9f2dc82ca6781cd2bf (diff)
download	dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.tar.gz dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.zip