diff options
| author | Adam Malczewski <[email protected]> | 2026-06-12 18:26:00 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-12 18:26:00 +0900 |
| commit | 1764e3e5dff836255d121a933dd92542368346f9 (patch) | |
| tree | b835055de0f0f1fd9750741764dac8b30f7498bf /src/core/chunks | |
| parent | 4001274e3ba25a3946df1e9f2dc82ca6781cd2bf (diff) | |
| download | dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.tar.gz dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.zip | |
feat(chat): chat limit — bulk quarter-unload, 75% fresh-load window, show-earlier page-in
Long transcripts no longer grow unbounded: past the chat limit (default 256
chunks, localStorage dispatch.chatLimit) the oldest ceil(limit/4) committed
chunks are unloaded in ONE bulk pass — never one-per-delta (old Dispatch's
scroll-jump-per-step bug) — and only while the reader is stuck to the bottom
(scrolled-up readers defer the trim; it catches up in whole quarters). A fresh
page load windows to the newest floor(0.75*limit). Unloading is purely local
(IndexedDB cache + server keep everything); a hiddenBeforeSeq watermark keeps
history merges from resurrecting unloaded chunks, and a 'Show earlier messages'
affordance pages a quarter back in from the cache with scroll-anchor
preservation. Thinking-collapse render keys stay stable across trims via a
hiddenThinkingCount ordinal base.
- core/chunks/trim.ts: pure policy (trim/window/restore/normalize) + tests
- chat store: chatLimit + canUnload deps, windowed load, showEarlier()
- composition root: dispatch.chatLimit localStorage knob + unload gate wired
to smart-scroll isAtBottom()
- backend CR-5 OPENED (not a blocker): ?limit=/?beforeSeq= on
GET /conversations/:id (courier backend-handoff-chat-limit.md)
- scripts/live-probe.ts: fix pre-existing stale TurnMetricsEntry reads
(m1.usage -> total.usage) that crashed the probe; 17/17 live checks pass
Diffstat (limited to 'src/core/chunks')
| -rw-r--r-- | src/core/chunks/index.ts | 12 | ||||
| -rw-r--r-- | src/core/chunks/reducer.ts | 11 | ||||
| -rw-r--r-- | src/core/chunks/trim.test.ts | 218 | ||||
| -rw-r--r-- | src/core/chunks/trim.ts | 149 | ||||
| -rw-r--r-- | src/core/chunks/types.ts | 17 |
5 files changed, 406 insertions, 1 deletions
diff --git a/src/core/chunks/index.ts b/src/core/chunks/index.ts index ecfee74..6ab0f35 100644 --- a/src/core/chunks/index.ts +++ b/src/core/chunks/index.ts @@ -8,6 +8,18 @@ export { initialState, } from "./reducer"; export { selectChunks, selectGenerating, selectMessages } from "./selectors"; +export { + DEFAULT_CHAT_LIMIT, + initialWindowSize, + MAX_CHAT_LIMIT, + MIN_CHAT_LIMIT, + normalizeChatLimit, + restoreEarlier, + selectHasEarlier, + trimTranscript, + unloadCount, + windowTranscript, +} from "./trim"; export type { AccumulatingChunk, ProvisionalChunk, diff --git a/src/core/chunks/reducer.ts b/src/core/chunks/reducer.ts index 7ce55ce..0a57839 100644 --- a/src/core/chunks/reducer.ts +++ b/src/core/chunks/reducer.ts @@ -10,6 +10,8 @@ export function initialState(): TranscriptState { currentTurnId: null, latestUsage: null, sealedTurnId: null, + hiddenBeforeSeq: 0, + hiddenThinkingCount: 0, generating: false, }; } @@ -41,6 +43,10 @@ function flushAccumulating( * Dedupes by seq (new wins), keeps seq-monotonic order, idempotent. * When sealedTurnId is set, drops all provisional chunks (now superseded) * and clears sealedTurnId. + * + * Chunks below the chat-limit unload watermark (`hiddenBeforeSeq`) are + * REJECTED: a full-cache or tail merge must not resurrect what the trim + * unloaded. Restoring earlier history goes through `restoreEarlier` instead. */ export function applyHistory( state: TranscriptState, @@ -48,7 +54,10 @@ export function applyHistory( ): TranscriptState { const seqMap = new Map<number, StoredChunk>(); for (const c of state.committed) seqMap.set(c.seq, c); - for (const c of chunks) seqMap.set(c.seq, c); + for (const c of chunks) { + if (c.seq < state.hiddenBeforeSeq) continue; + seqMap.set(c.seq, c); + } const committed = Array.from(seqMap.values()).sort((a, b) => a.seq - b.seq); if (state.sealedTurnId !== null) { diff --git a/src/core/chunks/trim.test.ts b/src/core/chunks/trim.test.ts new file mode 100644 index 0000000..091b646 --- /dev/null +++ b/src/core/chunks/trim.test.ts @@ -0,0 +1,218 @@ +import type { StoredChunk } from "@dispatch/wire"; +import { describe, expect, it } from "vitest"; +import { applyHistory, initialState } from "./reducer"; +import { + DEFAULT_CHAT_LIMIT, + initialWindowSize, + MAX_CHAT_LIMIT, + MIN_CHAT_LIMIT, + normalizeChatLimit, + restoreEarlier, + selectHasEarlier, + trimTranscript, + unloadCount, + windowTranscript, +} from "./trim"; +import type { TranscriptState } from "./types"; + +function chunk(seq: number, type: "text" | "thinking" = "text"): StoredChunk { + return { seq, role: "assistant", chunk: { type, text: `c${seq}` } }; +} + +function chunks(from: number, to: number): StoredChunk[] { + const out: StoredChunk[] = []; + for (let seq = from; seq <= to; seq++) out.push(chunk(seq)); + return out; +} + +function stateWith(committed: readonly StoredChunk[]): TranscriptState { + return { ...initialState(), committed }; +} + +describe("normalizeChatLimit", () => { + it("defaults non-numeric / NaN / missing values", () => { + expect(normalizeChatLimit(undefined)).toBe(DEFAULT_CHAT_LIMIT); + expect(normalizeChatLimit(null)).toBe(DEFAULT_CHAT_LIMIT); + expect(normalizeChatLimit("100")).toBe(DEFAULT_CHAT_LIMIT); + expect(normalizeChatLimit(Number.NaN)).toBe(DEFAULT_CHAT_LIMIT); + expect(normalizeChatLimit(Number.POSITIVE_INFINITY)).toBe(DEFAULT_CHAT_LIMIT); + }); + + it("floors and clamps numeric values", () => { + expect(normalizeChatLimit(100.9)).toBe(100); + expect(normalizeChatLimit(0)).toBe(MIN_CHAT_LIMIT); + expect(normalizeChatLimit(-5)).toBe(MIN_CHAT_LIMIT); + expect(normalizeChatLimit(10_000_000)).toBe(MAX_CHAT_LIMIT); + expect(normalizeChatLimit(256)).toBe(256); + }); +}); + +describe("unloadCount / initialWindowSize", () => { + it("unload is a quarter of the limit, rounded up", () => { + expect(unloadCount(100)).toBe(25); + expect(unloadCount(256)).toBe(64); + expect(unloadCount(10)).toBe(3); + }); + + it("initial window is 75% of the limit, rounded down", () => { + expect(initialWindowSize(100)).toBe(75); + expect(initialWindowSize(256)).toBe(192); + expect(initialWindowSize(1)).toBe(1); // never below 1 + }); +}); + +describe("trimTranscript", () => { + it("is the identity at or under the limit", () => { + const at = stateWith(chunks(1, 100)); + expect(trimTranscript(at, 100)).toBe(at); + const under = stateWith(chunks(1, 99)); + expect(trimTranscript(under, 100)).toBe(under); + }); + + it("unloads exactly a quarter when the limit is first exceeded (100 → 101 drops 25)", () => { + const state = stateWith(chunks(1, 101)); + const next = trimTranscript(state, 100); + expect(next.committed).toHaveLength(76); + expect(next.committed[0]?.seq).toBe(26); + expect(next.hiddenBeforeSeq).toBe(26); + }); + + it("unloads multiple quarters when trimming was deferred far past the limit", () => { + const state = stateWith(chunks(1, 130)); + const next = trimTranscript(state, 100); + // 130 → needs 2 quarters (25 each) to get to ≤ 100 → 80 remain. + expect(next.committed).toHaveLength(80); + expect(next.committed[0]?.seq).toBe(51); + expect(next.hiddenBeforeSeq).toBe(51); + }); + + it("counts provisional + accumulating toward the limit but never drops them", () => { + const base = stateWith(chunks(1, 98)); + const state: TranscriptState = { + ...base, + provisional: [ + { role: "user", chunk: { type: "text", text: "q" } }, + { role: "assistant", chunk: { type: "text", text: "a" } }, + ], + accumulating: { kind: "text", text: "stream" }, + }; + // 98 + 2 + 1 = 101 > 100 → drop 25 committed. + const next = trimTranscript(state, 100); + expect(next.committed).toHaveLength(73); + expect(next.provisional).toHaveLength(2); + expect(next.accumulating).not.toBeNull(); + }); + + it("caps the drop at the committed length", () => { + const base = stateWith(chunks(1, 2)); + const provisional = Array.from({ length: 20 }, (_, i) => ({ + role: "assistant" as const, + chunk: { type: "text" as const, text: `p${i}` }, + })); + const state: TranscriptState = { ...base, provisional }; + const next = trimTranscript(state, 10); + expect(next.committed).toHaveLength(0); + expect(next.provisional).toHaveLength(20); + // Watermark advances past the last dropped committed chunk. + expect(next.hiddenBeforeSeq).toBe(3); + }); + + it("accumulates the hidden thinking count for stable render keys", () => { + const committed = [chunk(1, "thinking"), ...chunks(2, 9), chunk(10, "thinking"), chunk(11)]; + const state = stateWith(committed); + const next = trimTranscript(state, 10); // 11 > 10 → drop ceil(10/4)=3 oldest + expect(next.committed[0]?.seq).toBe(4); + expect(next.hiddenThinkingCount).toBe(1); + }); + + it("ignores a nonsensical limit", () => { + const state = stateWith(chunks(1, 50)); + expect(trimTranscript(state, 0)).toBe(state); + expect(trimTranscript(state, Number.NaN)).toBe(state); + }); +}); + +describe("windowTranscript", () => { + it("keeps only the newest maxCommitted chunks and sets the watermark", () => { + const state = stateWith(chunks(1, 1000)); + const next = windowTranscript(state, 75); + expect(next.committed).toHaveLength(75); + expect(next.committed[0]?.seq).toBe(926); + expect(next.hiddenBeforeSeq).toBe(926); + expect(selectHasEarlier(next)).toBe(true); + }); + + it("is the identity within the window", () => { + const state = stateWith(chunks(1, 50)); + expect(windowTranscript(state, 75)).toBe(state); + expect(selectHasEarlier(state)).toBe(false); + }); +}); + +describe("applyHistory respects the watermark", () => { + it("does not resurrect chunks below hiddenBeforeSeq on a full-cache merge", () => { + const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100); + expect(trimmed.hiddenBeforeSeq).toBe(26); + // A later sync merges the FULL cache (seqs 1..101) — the unloaded prefix must stay out. + const merged = applyHistory(trimmed, chunks(1, 101)); + expect(merged.committed[0]?.seq).toBe(26); + expect(merged.committed).toHaveLength(76); + }); + + it("still merges the tail above the watermark", () => { + const trimmed = trimTranscript(stateWith(chunks(1, 101)), 100); + const merged = applyHistory(trimmed, chunks(100, 110)); + expect(merged.committed[merged.committed.length - 1]?.seq).toBe(110); + expect(merged.committed[0]?.seq).toBe(26); + }); +}); + +describe("restoreEarlier", () => { + it("pages the newest `count` earlier chunks back in and lowers the watermark", () => { + const windowed = windowTranscript(stateWith(chunks(1, 1000)), 75); // loaded 926..1000 + const restored = restoreEarlier(windowed, chunks(1, 1000), 64); + expect(restored.committed[0]?.seq).toBe(862); + expect(restored.committed).toHaveLength(75 + 64); + expect(restored.hiddenBeforeSeq).toBe(862); + expect(selectHasEarlier(restored)).toBe(true); + }); + + it("clears the watermark when the restore exhausts known earlier history", () => { + const windowed = windowTranscript(stateWith(chunks(1, 100)), 75); // hidden: 1..25 + const restored = restoreEarlier(windowed, chunks(1, 100), 64); + expect(restored.committed).toHaveLength(100); + expect(restored.committed[0]?.seq).toBe(1); + expect(restored.hiddenBeforeSeq).toBe(0); + expect(restored.hiddenThinkingCount).toBe(0); + expect(selectHasEarlier(restored)).toBe(false); + }); + + it("clears the watermark when nothing is actually below it", () => { + const windowed = windowTranscript(stateWith(chunks(50, 200)), 75); + const restored = restoreEarlier(windowed, [], 64); + expect(restored.hiddenBeforeSeq).toBe(0); + expect(restored.committed).toEqual(windowed.committed); + }); + + it("is the identity when nothing is hidden", () => { + const state = stateWith(chunks(1, 10)); + expect(restoreEarlier(state, chunks(1, 10), 5)).toBe(state); + }); + + it("decrements the hidden thinking count by the restored thinking chunks", () => { + const committed = [chunk(1, "thinking"), chunk(2), chunk(3, "thinking"), ...chunks(4, 12)]; + const trimmed = trimTranscript(stateWith(committed), 10); // drops 3: seqs 1..3 (2 thinking) + expect(trimmed.hiddenThinkingCount).toBe(2); + const restored = restoreEarlier(trimmed, committed, 2); // restores seqs 2..3 (1 thinking) + expect(restored.hiddenBeforeSeq).toBe(2); + expect(restored.hiddenThinkingCount).toBe(1); + }); + + it("round-trips with trim: trim → restore-all yields the original committed list", () => { + const original = chunks(1, 101); + const trimmed = trimTranscript(stateWith(original), 100); + const restored = restoreEarlier(trimmed, original, 1000); + expect(restored.committed).toEqual(original); + expect(restored.hiddenBeforeSeq).toBe(0); + }); +}); diff --git a/src/core/chunks/trim.ts b/src/core/chunks/trim.ts new file mode 100644 index 0000000..1733027 --- /dev/null +++ b/src/core/chunks/trim.ts @@ -0,0 +1,149 @@ +// Chat-limit windowing for the transcript — PURE policy, zero DOM/Svelte. +// +// In very long conversations an unbounded transcript makes the browser crawl, so +// the FE keeps at most `chat limit` chunks loaded and UNLOADS the oldest ones in +// BULK: a quarter of the limit at a time (limit 100 → at 101 chunks it unloads 25, +// leaving 76). Bulk-on-threshold — NOT one-per-delta like old Dispatch — so a trim +// happens once per ~quarter-limit of new content instead of on every step, which +// was the old scroll-jump-per-step failure mode. A fresh page load shows only the +// newest `floor(0.75 × limit)` chunks, leaving headroom before the first trim. +// +// Unloading drops COMMITTED chunks only (provisional chunks are the in-flight +// turn; they become committed at seal and trimmable then) and records the +// `hiddenBeforeSeq` watermark so history merges can't resurrect them and the +// "Show earlier messages" affordance knows where to page back in from. + +import type { StoredChunk } from "@dispatch/wire"; +import type { TranscriptState } from "./types"; + +/** Default chat limit (max loaded chunks per conversation). */ +export const DEFAULT_CHAT_LIMIT = 256; +/** Hard floor for a configured chat limit (a tiny window would thrash). */ +export const MIN_CHAT_LIMIT = 10; +/** Hard ceiling for a configured chat limit. */ +export const MAX_CHAT_LIMIT = 100_000; + +/** + * Normalize an untrusted configured limit (e.g. parsed from localStorage): + * non-numeric/NaN → the default; otherwise floored + clamped to + * [MIN_CHAT_LIMIT, MAX_CHAT_LIMIT]. + */ +export function normalizeChatLimit(value: unknown): number { + if (typeof value !== "number" || !Number.isFinite(value)) return DEFAULT_CHAT_LIMIT; + const n = Math.floor(value); + if (n < MIN_CHAT_LIMIT) return MIN_CHAT_LIMIT; + if (n > MAX_CHAT_LIMIT) return MAX_CHAT_LIMIT; + return n; +} + +/** The bulk-unload unit: a quarter of the limit, rounded up. */ +export function unloadCount(limit: number): number { + return Math.ceil(limit / 4); +} + +/** The fresh-load window: 75% of the limit, rounded down (≥ 1). */ +export function initialWindowSize(limit: number): number { + return Math.max(1, Math.floor(limit * 0.75)); +} + +/** Total loaded (rendered) chunk count: committed + provisional + accumulating. */ +function totalCount(state: TranscriptState): number { + return state.committed.length + state.provisional.length + (state.accumulating !== null ? 1 : 0); +} + +function countThinking(chunks: readonly StoredChunk[]): number { + let n = 0; + for (const c of chunks) { + if (c.chunk.type === "thinking") n++; + } + return n; +} + +/** Drop the `drop` oldest committed chunks, advancing the watermark + thinking base. */ +function dropOldest(state: TranscriptState, drop: number): TranscriptState { + const dropped = state.committed.slice(0, drop); + const kept = state.committed.slice(drop); + const first = kept[0]; + const lastDropped = dropped[dropped.length - 1]; + let hiddenBeforeSeq = state.hiddenBeforeSeq; + if (first !== undefined) { + hiddenBeforeSeq = first.seq; + } else if (lastDropped !== undefined) { + hiddenBeforeSeq = lastDropped.seq + 1; + } + return { + ...state, + committed: kept, + hiddenBeforeSeq, + hiddenThinkingCount: state.hiddenThinkingCount + countThinking(dropped), + }; +} + +/** + * Enforce the chat limit: when the loaded count EXCEEDS `limit`, unload whole + * quarters (`unloadCount(limit)` each) of the OLDEST committed chunks until back + * at/under the limit — normally exactly one quarter (limit 100: 101 → 76); more + * only when trimming was deferred (e.g. while the reader was scrolled up). + * At/under the limit this is the identity. Never drops provisional chunks. + */ +export function trimTranscript(state: TranscriptState, limit: number): TranscriptState { + if (!Number.isFinite(limit) || limit <= 0) return state; + const total = totalCount(state); + if (total <= limit) return state; + const quarter = unloadCount(limit); + const passes = Math.ceil((total - limit) / quarter); + const drop = Math.min(passes * quarter, state.committed.length); + if (drop <= 0) return state; + return dropOldest(state, drop); +} + +/** + * Window the committed history down to the newest `maxCommitted` chunks (the + * fresh-load path: `maxCommitted = initialWindowSize(limit)`). Identity when + * already within the window. + */ +export function windowTranscript(state: TranscriptState, maxCommitted: number): TranscriptState { + if (!Number.isFinite(maxCommitted) || maxCommitted < 0) return state; + const drop = state.committed.length - maxCommitted; + if (drop <= 0) return state; + return dropOldest(state, drop); +} + +/** + * Page earlier (unloaded) history back in — the "Show earlier messages" action. + * + * `earlier` must be ALL locally-known chunks below the watermark (typically the + * full cached conversation; chunks at/above the watermark are ignored). The + * newest `count` of them are merged back in front of `committed` and the + * watermark lowers to the new oldest loaded seq — or clears to 0 when this + * restore exhausts the known earlier history (nothing left to offer). + */ +export function restoreEarlier( + state: TranscriptState, + earlier: readonly StoredChunk[], + count: number, +): TranscriptState { + if (state.hiddenBeforeSeq <= 0) return state; + const below = earlier.filter((c) => c.seq < state.hiddenBeforeSeq).sort((a, b) => a.seq - b.seq); + if (below.length === 0) { + // Nothing is actually hidden below the watermark: clear it so the + // "Show earlier" affordance disappears. + return { ...state, hiddenBeforeSeq: 0, hiddenThinkingCount: 0 }; + } + const keep = below.slice(-Math.max(1, count)); + const exhausted = keep.length === below.length; + const firstKept = keep[0]; + return { + ...state, + committed: [...keep, ...state.committed], + hiddenBeforeSeq: exhausted || firstKept === undefined ? 0 : firstKept.seq, + hiddenThinkingCount: exhausted + ? 0 + : Math.max(0, state.hiddenThinkingCount - countThinking(keep)), + }; +} + +/** Whether unloaded earlier history exists to offer ("Show earlier messages"). */ +export function selectHasEarlier(state: TranscriptState): boolean { + return state.hiddenBeforeSeq > 0; +} diff --git a/src/core/chunks/types.ts b/src/core/chunks/types.ts index faa0d3f..14619bd 100644 --- a/src/core/chunks/types.ts +++ b/src/core/chunks/types.ts @@ -21,6 +21,23 @@ export interface TranscriptState { readonly latestUsage: Usage | null; readonly sealedTurnId: string | null; /** + * The chat-limit UNLOAD watermark: committed chunks with `seq <` this are + * unloaded (not in `committed`, not rendered) to keep long transcripts cheap. + * `0` = nothing unloaded. `applyHistory` refuses chunks below it (a cache/tail + * merge must not resurrect what the trim dropped); "Show earlier messages" + * lowers it via `restoreEarlier`. See `trim.ts`. + */ + readonly hiddenBeforeSeq: number; + /** + * How many thinking-type chunks are currently unloaded below the watermark. + * Pure render-key bookkeeping: the UI keys thinking collapses by ORDINAL (so + * the key survives the provisional→committed seal transition), and this base + * keeps those ordinals stable when a trim removes older thinking chunks — + * otherwise every remaining collapse would shift keys and swap/lose its + * open state mid-stream. + */ + readonly hiddenThinkingCount: number; + /** * True while a turn is generating on the server — derived STRUCTURALLY from the * event stream: a `turn-start` (or any turn delta) with no matching `done` / * `turn-sealed` / `error` yet. A late-joiner that subscribes mid-turn gets the |
