diff options
| author | Adam Malczewski <[email protected]> | 2026-06-12 18:26:00 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-12 18:26:00 +0900 |
| commit | 1764e3e5dff836255d121a933dd92542368346f9 (patch) | |
| tree | b835055de0f0f1fd9750741764dac8b30f7498bf /src/features | |
| parent | 4001274e3ba25a3946df1e9f2dc82ca6781cd2bf (diff) | |
| download | dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.tar.gz dispatch-web-1764e3e5dff836255d121a933dd92542368346f9.zip | |
feat(chat): chat limit — bulk quarter-unload, 75% fresh-load window, show-earlier page-in
Long transcripts no longer grow unbounded: past the chat limit (default 256
chunks, localStorage dispatch.chatLimit) the oldest ceil(limit/4) committed
chunks are unloaded in ONE bulk pass — never one-per-delta (old Dispatch's
scroll-jump-per-step bug) — and only while the reader is stuck to the bottom
(scrolled-up readers defer the trim; it catches up in whole quarters). A fresh
page load windows to the newest floor(0.75*limit). Unloading is purely local
(IndexedDB cache + server keep everything); a hiddenBeforeSeq watermark keeps
history merges from resurrecting unloaded chunks, and a 'Show earlier messages'
affordance pages a quarter back in from the cache with scroll-anchor
preservation. Thinking-collapse render keys stay stable across trims via a
hiddenThinkingCount ordinal base.
- core/chunks/trim.ts: pure policy (trim/window/restore/normalize) + tests
- chat store: chatLimit + canUnload deps, windowed load, showEarlier()
- composition root: dispatch.chatLimit localStorage knob + unload gate wired
to smart-scroll isAtBottom()
- backend CR-5 OPENED (not a blocker): ?limit=/?beforeSeq= on
GET /conversations/:id (courier backend-handoff-chat-limit.md)
- scripts/live-probe.ts: fix pre-existing stale TurnMetricsEntry reads
(m1.usage -> total.usage) that crashed the probe; 17/17 live checks pass
Diffstat (limited to 'src/features')
| -rw-r--r-- | src/features/chat/store.svelte.ts | 75 | ||||
| -rw-r--r-- | src/features/chat/store.test.ts | 289 | ||||
| -rw-r--r-- | src/features/chat/ui.test.ts | 39 | ||||
| -rw-r--r-- | src/features/chat/ui/ChatView.svelte | 46 | ||||
| -rw-r--r-- | src/features/smart-scroll/ui/controller.svelte.ts | 10 |
5 files changed, 455 insertions, 4 deletions
diff --git a/src/features/chat/store.svelte.ts b/src/features/chat/store.svelte.ts index 37049bf..5ca28af 100644 --- a/src/features/chat/store.svelte.ts +++ b/src/features/chat/store.svelte.ts @@ -11,9 +11,16 @@ import { clearGenerating, foldEvent, initialState, + initialWindowSize, + normalizeChatLimit, + restoreEarlier, selectChunks, selectGenerating, + selectHasEarlier, selectMessages, + trimTranscript, + unloadCount, + windowTranscript, } from "../../core/chunks"; import type { MetricsState, TurnMetricsEntry } from "../../core/metrics"; import { @@ -33,6 +40,19 @@ export interface ChatStoreDependencies { readonly historySync: HistorySync; readonly metricsSync: MetricsSync; readonly cache: ConversationCache; + /** + * The chat limit: max loaded chunks before the oldest quarter is unloaded + * (see `core/chunks/trim.ts`). Normalized via `normalizeChatLimit`; absent → + * `DEFAULT_CHAT_LIMIT`. + */ + readonly chatLimit?: number; + /** + * Whether unloading may run RIGHT NOW. The composition root wires this to the + * smart-scroll "stuck to bottom" state: while the reader is scrolled up, a + * trim would yank the content under them, so it is DEFERRED until they return + * to the bottom (the next fold retries). Absent → always allowed. + */ + readonly canUnload?: () => boolean; } export interface ChatStore { @@ -55,11 +75,30 @@ export interface ChatStore { readonly pendingSync: boolean; readonly error: string | null; readonly model: string | undefined; + /** + * Whether earlier history was unloaded by the chat limit (or never loaded by + * the fresh-load window) and can be paged back in — drives the + * "Show earlier messages" affordance. + */ + readonly hasEarlier: boolean; + /** + * Render-key base for thinking collapses: how many thinking chunks are + * unloaded below the watermark, so the UI's ordinal keys stay stable across + * a trim (see `TranscriptState.hiddenThinkingCount`). + */ + readonly thinkingKeyBase: number; handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void; send(text: string): void; setModel(model: string): void; load(): Promise<void>; /** + * Page one unload-unit (`ceil(limit/4)`) of earlier history back in from the + * local cache — the "Show earlier messages" action. (When the backend ships + * CR-5 `?beforeSeq=`, this can fall through to the server once the cache is + * exhausted.) + */ + showEarlier(): Promise<void>; + /** * Re-sync after a WS (re)connect. Clears any stale `generating` (a turn may * have sealed while disconnected — the live `turn-sealed` was missed), then * pulls newly-sealed turns from history (+ metrics). If the turn is still @@ -78,6 +117,18 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { let _model = $state<string | undefined>(deps.model); let disposed = false; + const chatLimit = normalizeChatLimit(deps.chatLimit); + + /** + * Enforce the chat limit after a transcript mutation — unless the injected + * gate says the reader is scrolled up (then defer; the next mutation retries + * and `trimTranscript` unloads whole quarters to catch up). + */ + function maybeTrim(): void { + if (deps.canUnload !== undefined && !deps.canUnload()) return; + transcript = trimTranscript(transcript, chatLimit); + } + async function syncTail(): Promise<void> { if (disposed || _pendingSync) return; _pendingSync = true; @@ -86,6 +137,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { const res = await deps.historySync(deps.conversationId, since); const merged = await deps.cache.commit(deps.conversationId, res.chunks); transcript = applyHistory(transcript, merged); + maybeTrim(); _error = null; } catch (err) { _error = err instanceof Error ? err.message : String(err); @@ -130,6 +182,12 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { get model(): string | undefined { return _model; }, + get hasEarlier(): boolean { + return selectHasEarlier(transcript); + }, + get thinkingKeyBase(): number { + return transcript.hiddenThinkingCount; + }, handleDelta(msg: ChatDeltaMessage | ChatErrorMessage): void { if (msg.type === "chat.error") { @@ -144,6 +202,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { } transcript = foldEvent(transcript, msg.event); metrics = foldMetricsEvent(metrics, msg.event); + maybeTrim(); if (transcript.sealedTurnId !== null) { void syncTail(); void syncMetrics(); @@ -152,6 +211,7 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { send(text: string): void { transcript = appendUserMessage(transcript, text); + maybeTrim(); const msg: ChatSendMessage = { type: "chat.send", conversationId: deps.conversationId, @@ -166,14 +226,27 @@ export function createChatStore(deps: ChatStoreDependencies): ChatStore { }, async load(): Promise<void> { + // Fresh load shows only the newest 75% of the limit — headroom before the + // first trim. Window the cached slice SYNCHRONOUSLY with its apply (no + // render in between), and again after the tail sync (a cold cache means + // syncTail pulled the whole history in one response). + const windowSize = initialWindowSize(chatLimit); const cached = await deps.cache.load(deps.conversationId); if (cached.length > 0) { - transcript = applyHistory(transcript, cached); + transcript = windowTranscript(applyHistory(transcript, cached), windowSize); } await syncTail(); + transcript = windowTranscript(transcript, windowSize); await syncMetrics(); }, + async showEarlier(): Promise<void> { + if (disposed) return; + if (!selectHasEarlier(transcript)) return; + const cached = await deps.cache.load(deps.conversationId); + transcript = restoreEarlier(transcript, cached, unloadCount(chatLimit)); + }, + resync(): void { if (disposed) return; // A turn may have sealed while we were disconnected (missed `turn-sealed`): diff --git a/src/features/chat/store.test.ts b/src/features/chat/store.test.ts index 6507d69..5c798d6 100644 --- a/src/features/chat/store.test.ts +++ b/src/features/chat/store.test.ts @@ -892,6 +892,295 @@ describe("createChatStore", () => { store.dispose(); }); + it("chat limit: crossing the limit unloads the oldest quarter in one bulk pass", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + }); + + // Commit exactly 100 chunks via a sealed turn (at the limit — no trim). + const hundred = Array.from({ length: 100 }, (_, i) => makeStoredChunk(i + 1)); + historySync.returnChunks = hundred; + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" })); + await vi.waitFor(() => { + expect(store.chunks).toHaveLength(100); + }); + expect(store.hasEarlier).toBe(false); + + // The 101st chunk (a live tool-call) crosses the limit → 25 unload → 76 remain. + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t2" })); + store.handleDelta( + deltaEvent({ + type: "tool-call", + conversationId: CONV_ID, + turnId: "t2", + toolCallId: "tc1", + toolName: "probe", + input: {}, + stepId: "t2#0" as StepId, + }), + ); + + expect(store.chunks).toHaveLength(76); + expect(store.chunks[0]?.seq).toBe(26); + expect(store.hasEarlier).toBe(true); + + store.dispose(); + }); + + it("chat limit: unloading is deferred while the gate is closed, then catches up", () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + let atBottom = false; // reader scrolled up + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 10, + canUnload: () => atBottom, + }); + + // 15 live tool-calls: over the limit, but the gate defers every trim. + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + for (let i = 0; i < 15; i++) { + store.handleDelta( + deltaEvent({ + type: "tool-call", + conversationId: CONV_ID, + turnId: "t1", + toolCallId: `tc${i}`, + toolName: "probe", + input: {}, + stepId: `t1#${i}` as StepId, + }), + ); + } + expect(store.chunks).toHaveLength(15); + + // Reader returns to the bottom — but provisional chunks are never unloaded, + // so the deferred trim still can't shrink an all-provisional transcript. + atBottom = true; + store.handleDelta( + deltaEvent({ + type: "tool-call", + conversationId: CONV_ID, + turnId: "t1", + toolCallId: "tc15", + toolName: "probe", + input: {}, + stepId: "t1#15" as StepId, + }), + ); + expect(store.chunks).toHaveLength(16); + + store.dispose(); + }); + + it("chat limit: a deferred trim catches up across committed history once the gate opens", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + let atBottom = false; + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + canUnload: () => atBottom, + }); + + // Seal a turn committing 130 chunks while the reader is scrolled up: no trim. + historySync.returnChunks = Array.from({ length: 130 }, (_, i) => makeStoredChunk(i + 1)); + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t1" })); + store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t1" })); + await vi.waitFor(() => { + expect(store.chunks).toHaveLength(130); + }); + + // Back at the bottom: the next fold trims whole quarters down to ≤ 100. + atBottom = true; + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t2" })); + // 130 → 2 quarters of 25 → 80 committed (turn-start adds no chunk). + expect(store.chunks).toHaveLength(80); + expect(store.chunks[0]?.seq).toBe(51); + + store.dispose(); + }); + + it("chat limit: load windows a long cached conversation to 75% of the limit", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + await cache.impl.commit( + CONV_ID, + Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)), + ); + + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + }); + + await store.load(); + + // floor(100 × 0.75) = 75 newest chunks: seqs 426..500. + expect(store.chunks).toHaveLength(75); + expect(store.chunks[0]?.seq).toBe(426); + expect(store.hasEarlier).toBe(true); + // The tail sync still used the cache's real cursor (not the window's edge). + expect(historySync.calls[0]?.sinceSeq).toBe(500); + + store.dispose(); + }); + + it("chat limit: a cold cache (fresh browser) windows the full server history to 75%", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + // Backend has no limit param yet (CR-5): sinceSeq=0 returns EVERYTHING. + historySync.returnChunks = Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)); + + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + }); + + await store.load(); + + expect(store.chunks).toHaveLength(75); + expect(store.chunks[0]?.seq).toBe(426); + expect(store.hasEarlier).toBe(true); + // The full history is still CACHED locally (show-earlier pages from it). + const cached = await cache.impl.load(CONV_ID); + expect(cached).toHaveLength(500); + + store.dispose(); + }); + + it("chat limit: showEarlier pages a quarter back in from the cache", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + await cache.impl.commit( + CONV_ID, + Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)), + ); + + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + }); + + await store.load(); + expect(store.chunks[0]?.seq).toBe(426); + + await store.showEarlier(); // +ceil(100/4) = 25 older chunks + expect(store.chunks).toHaveLength(100); + expect(store.chunks[0]?.seq).toBe(401); + expect(store.hasEarlier).toBe(true); + + store.dispose(); + }); + + it("chat limit: showEarlier clears hasEarlier when the cache is exhausted", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + await cache.impl.commit( + CONV_ID, + Array.from({ length: 80 }, (_, i) => makeStoredChunk(i + 1)), + ); + + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + }); + + await store.load(); // window 75: hidden 1..5 + expect(store.chunks).toHaveLength(75); + expect(store.hasEarlier).toBe(true); + + await store.showEarlier(); // restores all 5 → nothing left below + expect(store.chunks).toHaveLength(80); + expect(store.chunks[0]?.seq).toBe(1); + expect(store.hasEarlier).toBe(false); + + store.dispose(); + }); + + it("chat limit: a post-trim history sync does not resurrect unloaded chunks", async () => { + const transport = createFakeTransport(); + const historySync = createFakeHistorySync(); + const metricsSync = createFakeMetricsSync(); + const cache = createFakeCache(); + await cache.impl.commit( + CONV_ID, + Array.from({ length: 500 }, (_, i) => makeStoredChunk(i + 1)), + ); + + const store = createChatStore({ + conversationId: CONV_ID, + transport: transport.impl, + historySync: historySync.impl, + metricsSync: metricsSync.impl, + cache: cache.impl, + chatLimit: 100, + }); + + await store.load(); + expect(store.chunks[0]?.seq).toBe(426); + + // A sealed turn triggers syncTail, whose cache.commit returns the FULL + // merged cache (seqs 1..501) — the watermark must keep 1..425 out. + historySync.returnChunks = [makeStoredChunk(501)]; + store.handleDelta(deltaEvent({ type: "turn-start", conversationId: CONV_ID, turnId: "t9" })); + store.handleDelta(deltaEvent({ type: "turn-sealed", conversationId: CONV_ID, turnId: "t9" })); + + await vi.waitFor(() => { + expect(store.chunks[store.chunks.length - 1]?.seq).toBe(501); + }); + expect(store.chunks[0]?.seq).toBe(426); + expect(store.chunks).toHaveLength(76); + + store.dispose(); + }); + it("resync is a no-op after dispose", async () => { const transport = createFakeTransport(); const historySync = createFakeHistorySync(); diff --git a/src/features/chat/ui.test.ts b/src/features/chat/ui.test.ts index 278b2cf..7174821 100644 --- a/src/features/chat/ui.test.ts +++ b/src/features/chat/ui.test.ts @@ -41,6 +41,45 @@ describe("ChatView", () => { expect(screen.getByText("Hello!")).toBeInTheDocument(); }); + it("shows the show-earlier button only when earlier history is unloaded, and pages it in", async () => { + const chunks: RenderedChunk[] = [ + { seq: 26, role: "user", chunk: { type: "text", text: "later" }, provisional: false }, + ]; + + let resolveEarlier: (() => void) | undefined; + const onShowEarlier = vi.fn( + () => + new Promise<void>((resolve) => { + resolveEarlier = resolve; + }), + ); + + render(ChatView, { props: { chunks, hasEarlier: true, onShowEarlier } }); + + const button = screen.getByRole("button", { name: /show earlier messages/i }); + const user = userEvent.setup(); + await user.click(button); + + expect(onShowEarlier).toHaveBeenCalledTimes(1); + // While the page-in is awaited the button is disabled (no double-fire). + expect(screen.getByRole("button", { name: /loading earlier messages/i })).toBeDisabled(); + + resolveEarlier?.(); + await vi.waitFor(() => { + expect(screen.getByRole("button", { name: /show earlier messages/i })).toBeEnabled(); + }); + }); + + it("hides the show-earlier button when nothing is unloaded", () => { + const chunks: RenderedChunk[] = [ + { seq: 1, role: "user", chunk: { type: "text", text: "all here" }, provisional: false }, + ]; + + render(ChatView, { props: { chunks, hasEarlier: false, onShowEarlier: vi.fn() } }); + + expect(screen.queryByRole("button", { name: /show earlier/i })).not.toBeInTheDocument(); + }); + it("renders tool-call chunks", () => { const chunks: RenderedChunk[] = [ { diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte index 00691aa..d1d7709 100644 --- a/src/features/chat/ui/ChatView.svelte +++ b/src/features/chat/ui/ChatView.svelte @@ -19,21 +19,48 @@ let { chunks, turnMetrics = [], + hasEarlier = false, + onShowEarlier, + thinkingKeyBase = 0, }: { chunks: readonly RenderedChunk[]; turnMetrics?: readonly TurnMetricsEntry[]; + /** Earlier history is unloaded (chat limit) and can be paged back in. */ + hasEarlier?: boolean; + /** Page earlier history back in; the caller owns scroll-position preservation. */ + onShowEarlier?: () => Promise<void>; + /** + * Ordinal base for thinking-collapse keys: the count of thinking chunks + * unloaded by the chat limit, so the remaining ordinals don't shift (and + * swap collapse state) when a trim removes older thinking blocks. + */ + thinkingKeyBase?: number; } = $props(); + // True while a show-earlier page-in is awaited (disables the button). + let loadingEarlier = $state(false); + + async function showEarlier() { + if (!onShowEarlier || loadingEarlier) return; + loadingEarlier = true; + try { + await onShowEarlier(); + } finally { + loadingEarlier = false; + } + } + const groups = $derived(groupRenderedChunks(chunks)); const rows = $derived(interleaveTurnMetrics(groups, turnMetrics)); // Stable per-row keys. Thinking blocks get an ordinal key (`think<n>`) that // survives the provisional→committed (seq null → seq N) transition, so the - // collapse's open/close state is NOT lost when a turn seals. (App isolates - // these keys per conversation via {#key}.) + // collapse's open/close state is NOT lost when a turn seals. The ordinal + // starts at `thinkingKeyBase` so keys also survive a chat-limit trim removing + // older thinking blocks. (App isolates these keys per conversation via {#key}.) const keyedRows = $derived.by(() => { - let thinking = 0; + let thinking = thinkingKeyBase; return rows.map((row, i) => { if (row.kind === "step-metrics") { return { row, key: `s${row.step.stepId}` }; @@ -132,6 +159,19 @@ {/snippet} <div class="flex flex-col gap-2 p-4 pl-6" role="log" aria-live="polite"> + {#if hasEarlier && onShowEarlier} + <!-- Chat limit: older chunks are unloaded; offer to page them back in. --> + <div class="flex justify-center"> + <button class="btn btn-ghost btn-xs" disabled={loadingEarlier} onclick={showEarlier}> + {#if loadingEarlier} + <span class="loading loading-spinner loading-xs" aria-hidden="true"></span> + Loading earlier messages… + {:else} + Show earlier messages + {/if} + </button> + </div> + {/if} {#each keyedRows as { row, key } (key)} {#if row.kind === "step-metrics"} {@const sv = viewStepMetrics(row.step, row.index)} diff --git a/src/features/smart-scroll/ui/controller.svelte.ts b/src/features/smart-scroll/ui/controller.svelte.ts index 99d53ca..dbe65d1 100644 --- a/src/features/smart-scroll/ui/controller.svelte.ts +++ b/src/features/smart-scroll/ui/controller.svelte.ts @@ -22,6 +22,12 @@ export interface SmartScrollController { /** Reactive: show the "scroll to bottom" affordance (the user has scrolled up). */ readonly showButton: boolean; /** + * Non-reactive point-in-time query: is the view stuck to the bottom right now? + * For imperative callers (e.g. the chat-limit unload gate) that poll at event + * time rather than subscribing — reads the reducer state, not a rune. + */ + isAtBottom(): boolean; + /** * Attach to the scroll container; returns a teardown to call on unmount. * Pass the inner CONTENT element to also follow height changes that aren't a * transcript update (async markdown/highlight, image loads, a collapse toggling, @@ -84,6 +90,10 @@ export function createSmartScrollController(): SmartScrollController { return showButton; }, + isAtBottom(): boolean { + return state.stuck; + }, + attach(node: HTMLElement, content?: HTMLElement): () => void { el = node; node.addEventListener("scroll", handleScroll, { passive: true }); |
