From 1764e3e5dff836255d121a933dd92542368346f9 Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Fri, 12 Jun 2026 18:26:00 +0900 Subject: feat(chat): chat limit — bulk quarter-unload, 75% fresh-load window, show-earlier page-in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long transcripts no longer grow unbounded: past the chat limit (default 256 chunks, localStorage dispatch.chatLimit) the oldest ceil(limit/4) committed chunks are unloaded in ONE bulk pass — never one-per-delta (old Dispatch's scroll-jump-per-step bug) — and only while the reader is stuck to the bottom (scrolled-up readers defer the trim; it catches up in whole quarters). A fresh page load windows to the newest floor(0.75*limit). Unloading is purely local (IndexedDB cache + server keep everything); a hiddenBeforeSeq watermark keeps history merges from resurrecting unloaded chunks, and a 'Show earlier messages' affordance pages a quarter back in from the cache with scroll-anchor preservation. Thinking-collapse render keys stay stable across trims via a hiddenThinkingCount ordinal base. - core/chunks/trim.ts: pure policy (trim/window/restore/normalize) + tests - chat store: chatLimit + canUnload deps, windowed load, showEarlier() - composition root: dispatch.chatLimit localStorage knob + unload gate wired to smart-scroll isAtBottom() - backend CR-5 OPENED (not a blocker): ?limit=/?beforeSeq= on GET /conversations/:id (courier backend-handoff-chat-limit.md) - scripts/live-probe.ts: fix pre-existing stale TurnMetricsEntry reads (m1.usage -> total.usage) that crashed the probe; 17/17 live checks pass --- AGENTS.md | 6 +- GLOSSARY.md | 3 + backend-handoff-chat-limit.md | 66 +++++ backend-handoff.md | 34 ++- scripts/live-probe.ts | 20 +- src/app/App.svelte | 34 ++- src/app/store.svelte.ts | 32 +++ src/core/chunks/index.ts | 12 + src/core/chunks/reducer.ts | 11 +- src/core/chunks/trim.test.ts | 218 ++++++++++++++++ src/core/chunks/trim.ts | 149 +++++++++++ src/core/chunks/types.ts | 17 ++ src/features/chat/store.svelte.ts | 75 +++++- src/features/chat/store.test.ts | 289 ++++++++++++++++++++++ src/features/chat/ui.test.ts | 39 +++ src/features/chat/ui/ChatView.svelte | 46 +++- src/features/smart-scroll/ui/controller.svelte.ts | 10 + 17 files changed, 1038 insertions(+), 23 deletions(-) create mode 100644 backend-handoff-chat-limit.md create mode 100644 src/core/chunks/trim.test.ts create mode 100644 src/core/chunks/trim.ts diff --git a/AGENTS.md b/AGENTS.md index 3f7d428..a4c2f36 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -136,8 +136,10 @@ browser effects, run a LIVE probe: ## Status Slices 1–3 DONE + committed (surface system + WS; conversation transcript cache + delta streaming; tabs + model selector + DaisyUI/dracula), plus per-conversation cwd + LSP view, -context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, and -multi-client live view (subscribe/reconnect + the user prompt on the event stream). Plan in +context size, cache-warming (+ retention/timer), markdown, smart auto-scroll, multi-client +live view (subscribe/reconnect + the user prompt on the event stream), and the chat limit +(bulk quarter-unload past `dispatch.chatLimit`, 75% fresh-load window, show-earlier page-in; +`core/chunks/trim.ts`; backend CR-5 open for `?limit=`/`?beforeSeq=`). Plan in `../arch-rewrite/notes/frontend-design.md` §10. ## Reports diff --git a/GLOSSARY.md b/GLOSSARY.md index 2a6904a..2f4f199 100644 --- a/GLOSSARY.md +++ b/GLOSSARY.md @@ -37,3 +37,6 @@ | **surface interpreter** | The generic renderer: field kind → component. Knows kinds, never surface ids. | — | | **metrics bubble** | The FE chat element that renders a turn's **turn metrics** (one per-turn total) and **step metrics** (one per step) as muted system-style bubbles at a turn's tail. UI presentation of `TurnMetrics`/`StepMetrics`; never a surface. | telemetry bubble, usage bubble, stats bubble | | **TPS** (tokens per second) | A FE-DERIVED decode rate: `outputTokens / (decodeMs / 1000)` (per step; per turn over Σ `decodeMs`), falling back to `genTotalMs` when `decodeMs` is absent. The backend-recommended basis (excludes first-token latency). Not carried on the wire; omitted when timing is absent. | throughput | +| **chat limit** | The max LOADED chunks per conversation (default 256; localStorage `dispatch.chatLimit`, no UI yet) before the oldest quarter is unloaded. Counts **chunks** (committed + provisional + accumulating). Policy in `core/chunks/trim.ts`. | chunk limit, message limit, history limit | +| **unload** | Drop the oldest COMMITTED chunks from the in-memory transcript (and DOM) past the **chat limit** — in BULK (`ceil(limit/4)` per pass, deferred while the reader is scrolled up), never one-per-delta (old Dispatch's scroll-jump bug). Purely local: the IndexedDB cache and the server keep everything; `TranscriptState.hiddenBeforeSeq` is the watermark. Distinct from the conversation-cache's cross-conversation **eviction**. | evict (reserved for the cross-conversation cache), prune, drop | +| **show earlier** | The affordance at the top of a transcript with unloaded history ("Show earlier messages"): pages one unload-unit back in from the local cache (later: the server via CR-5 `?beforeSeq=`), preserving the reader's scroll position. | load more, pagination | diff --git a/backend-handoff-chat-limit.md b/backend-handoff-chat-limit.md new file mode 100644 index 0000000..da20583 --- /dev/null +++ b/backend-handoff-chat-limit.md @@ -0,0 +1,66 @@ +# Backend handoff — CR-5: history windowing for the FE chat limit (courier doc) + +> **From:** dispatch-web · **To:** arch-rewrite · **Courier:** the user. +> Companion to the living `backend-handoff.md` (§2 CR-5). 2026-06-12. + +## Context — what the FE is building (no backend blocker) + +The FE is adding a **chat limit**: in very long conversations the transcript unloads old +chunks from memory/DOM so the browser stays fast. Policy (already decided with the user): + +- Limit `L` counts **chunks** (default 256, localStorage-configurable). +- When the loaded count exceeds `L`, the FE unloads the oldest `ceil(L/4)` chunks in ONE + bulk pass (e.g. `L=100`: at 101 chunks it unloads 25 → 76 remain). Bulk-on-threshold — + NOT one-per-delta like old Dispatch — to kill the scroll-jump-per-step failure mode. +- A fresh page load shows only the newest `floor(0.75 × L)` chunks (192 for the default). +- A "Show earlier messages" affordance pages older history back in (today: from the FE's + IndexedDB cache, which still holds it). + +**This works TODAY with no backend change** — the FE fetches everything and windows in +memory. The ask below makes the *fresh-browser* case cheap: with an empty IndexedDB cache, +`GET /conversations/:id?sinceSeq=0` currently returns the ENTIRE conversation, so a +10k-chunk chat downloads + parses megabytes only for the FE to display 192 chunks. + +## The ask (additive, `transport-contract` bump) + +Extend `GET /conversations/:id` with two OPTIONAL query params: + +1. **`limit=`** — return only the **newest** `n` chunks of the selection (still + ascending seq order in the response). Selection semantics otherwise unchanged + (`seq > sinceSeq`). + - **If the selection has ≤ `n` chunks, return everything** — the FE will routinely send + a largish number (e.g. `limit=192`) against short conversations and expects the + normal full response (that flow must stay cheap and exact). + - `limit` absent → exactly today's behavior (full selection). Existing FE versions keep + working unchanged. +2. **`beforeSeq=`** — restrict the selection to `seq < s` (combined with `limit`: the + newest `n` chunks below `s`, ascending). This is the "Show earlier messages" page-in + path for history the FE's local cache doesn't have (e.g. a fresh browser that + initial-loaded with `limit`). `beforeSeq` + `sinceSeq` together = `sinceSeq < seq < s` + (we only ever send one of them, but defined semantics beat undefined). + +And one additive response field on `ConversationHistoryResponse`: + +3. **`earliestSeq?: number`** (or `hasOlder: boolean` — your pick, flag your choice in the + reply) — the conversation's overall lowest seq (or whether chunks exist below the + returned window). The FE needs to know whether to OFFER "Show earlier messages" when + its local cache is exhausted. Without it the FE can only guess (seq 1 = start works if + seqs are guaranteed to start at 1 and be gap-free — if you'd rather just CONFIRM that + invariant in writing, the FE can derive `hasOlder` from `chunks[0].seq > 1` and we skip + the new field entirely; cheapest option, totally fine). + +## How the FE will consume it + +- Fresh load (empty cache): `GET /conversations/:id?sinceSeq=0&limit=`. +- Incremental tail sync (cache warm): unchanged `?sinceSeq=` (no limit — the + tail since last sync is small by construction). +- Show-earlier beyond local cache: `GET /conversations/:id?beforeSeq=&limit=`. +- The FE's IndexedDB cache is seq-keyed + dedup-by-seq and already tolerates a + non-contiguous prefix (a windowed suffix), so no cache-format change is needed FE-side. + +## Priority / sequencing + +Not a blocker — the FE ships the limit feature against the current contract (full fetch + +in-memory windowing) and lights up the `limit`/`beforeSeq` params when you ship. Ship +whenever convenient; please bump `transport-contract` and note the params in the reply +handoff so the FE re-pins + re-mirrors. diff --git a/backend-handoff.md b/backend-handoff.md index 30a1d64..4410b44 100644 --- a/backend-handoff.md +++ b/backend-handoff.md @@ -5,12 +5,15 @@ > **From:** dispatch-web orchestrator · **To:** arch-rewrite orchestrator · **Courier:** the user. > `lsp` does NOT span the repos (AGENTS.md § Backend seam) — every cross-repo ask flows through here. -_Last updated: 2026-06-12 (CR-4 consumed). **FE is current on `ui-contract@0.2.0` / -`transport-contract@0.9.0` / `wire@0.6.0`.** All handoffs to date are consumed: surfaces + WS, -conversation transcript/metrics, tabs + model selector, cache-warming (incl. authoritative timer + -retention + cache-rate fix + the CR-4 lifecycle below), **per-conversation cwd + LSP status**, -**context size**, and **turn continuity + multi-client live view**. -**Open asks: NONE.** CR-1/CR-2/CR-4 all RESOLVED ✅ (see §2); §3 lists likely next asks. +_Last updated: 2026-06-12 (CR-5 opened: chat-limit history windowing). **FE is current on +`ui-contract@0.2.0` / `transport-contract@0.9.0` / `wire@0.6.0`.** All handoffs to date are +consumed: surfaces + WS, conversation transcript/metrics, tabs + model selector, cache-warming +(incl. authoritative timer + retention + cache-rate fix + the CR-4 lifecycle below), +**per-conversation cwd + LSP status**, **context size**, and **turn continuity + multi-client +live view**. +**Open asks: ONE — CR-5** (`?limit=`/`?beforeSeq=` on `GET /conversations/:id`; NOT a blocker, +courier doc `backend-handoff-chat-limit.md`). CR-1/CR-2/CR-4 all RESOLVED ✅ (see §2); §3 lists +likely next asks. **CR-3 (watcher couldn't see the USER prompt until seal) → RESOLVED ✅** — backend shipped the `user-message` turn event; FE re-pinned + consumption live. The cwd/LSP draft-path verification (`backend-handoff-cwd-lsp.md`) came back **all ✅ confirmed**._ @@ -81,7 +84,24 @@ Mirrored in-repo for headless agents: `.dispatch/{ui-contract,wire,transport-con ## 2. Open asks FOR THE BACKEND -**None open.** Resolved history below. +**One open: CR-5.** Resolved history below it. + +### CR-5 — history windowing for the FE chat limit → **OPEN (not a blocker)** (courier `backend-handoff-chat-limit.md`) + +The FE is shipping a **chat limit** (default 256 chunks, localStorage-configurable): past the +limit it bulk-unloads the oldest `ceil(L/4)` chunks (scroll-jump-free, unlike old Dispatch's +one-per-delta eviction), and a fresh page load shows only the newest `floor(0.75×L)`. Works +today by fetching the full history and windowing in memory — the ask makes the FRESH-BROWSER +load cheap (today `?sinceSeq=0` returns the whole conversation; a 10k-chunk chat downloads +megabytes to show 192 chunks). Additive `transport-contract` asks: +- **`?limit=`** on `GET /conversations/:id` — newest `n` of the selection, still ascending; + **≤ `n` chunks exist ⇒ return everything** (the FE always sends it; short chats must stay + exact). Absent ⇒ today's behavior. +- **`?beforeSeq=`** — selection `seq < s` (with `limit`: newest `n` below `s`) — the + "Show earlier messages" page-in path once the FE's local cache is exhausted. +- **`earliestSeq?`/`hasOlder?` response field** — OR simply confirm in writing that seqs start + at 1 gap-free, and the FE derives `hasOlder` from `chunks[0].seq > 1` (cheapest, preferred). +Full consumption plan + sequencing in the courier doc. ### CR-1 — Loaded Extensions as a true table → **RESOLVED ✅** (shipped + consumed) diff --git a/scripts/live-probe.ts b/scripts/live-probe.ts index 2b2880b..7099b44 100644 --- a/scripts/live-probe.ts +++ b/scripts/live-probe.ts @@ -204,25 +204,28 @@ async function main() { record("turn 1 committed transcript has assistant text", committedText.length > 0); // ─── Metrics: LIVE token + timing (wire@0.3.0 usage/step-complete/done) ────── + // (TurnMetricsEntry is `{ turnId, steps, total }` — the turn aggregate lives on + // `total`, present once the live `done` folded.) const liveTurns = selectOrderedTurnMetrics(t1.metrics); const m1 = liveTurns[0]; + const m1Total = m1?.total ?? null; record( "turn 1 LIVE metrics: a turn with output tokens", - m1 !== undefined && m1.usage.outputTokens > 0, - m1 - ? `in=${m1.usage.inputTokens} out=${m1.usage.outputTokens} steps=${m1.steps.length}` - : "no turn", + m1Total !== null && m1Total.usage.outputTokens > 0, + m1Total + ? `in=${m1Total.usage.inputTokens} out=${m1Total.usage.outputTokens} steps=${m1?.steps.length}` + : "no finalized turn total", ); if (m1 !== undefined) { const anyGen = m1.steps.some((s) => s.genTotalMs !== undefined); const anyTtft = m1.steps.some((s) => s.ttftMs !== undefined); note( - `live timing: durationMs=${m1.durationMs ?? "—"}, ` + + `live timing: durationMs=${m1Total?.durationMs ?? "—"}, ` + `genTotalMs present=${anyGen}, ttftMs present=${anyTtft}`, ); record( "turn 1 LIVE metrics carries timing (durationMs or step genTotalMs)", - m1.durationMs !== undefined || anyGen, + m1Total?.durationMs !== undefined || anyGen, "requires the backend runtime to have a clock", ); } @@ -248,10 +251,11 @@ async function main() { applyDurableMetrics(initialMetricsState(), dm.turns), ); const d1 = durableMerged[0]; + const d1Total = d1?.total ?? null; record( "durable /metrics turn has token usage", - d1 !== undefined && d1.usage.outputTokens > 0, - d1 ? `out=${d1.usage.outputTokens} steps=${d1.steps.length}` : "no turn", + d1Total !== null && d1Total.usage.outputTokens > 0, + d1Total ? `out=${d1Total.usage.outputTokens} steps=${d1?.steps.length}` : "no turn total", ); } diff --git a/src/app/App.svelte b/src/app/App.svelte index 50f24e7..4c5a82b 100644 --- a/src/app/App.svelte +++ b/src/app/App.svelte @@ -1,5 +1,6 @@