From e45cab2a2d9d7bf5e48ace7111fd84b1b9bf2df3 Mon Sep 17 00:00:00 2001
From: Adam Malczewski <github@tradam.dev>
Date: Thu, 11 Jun 2026 16:06:48 +0900
Subject: feat(cache-warming,surfaces,metrics,markdown): conversation-scoped
 surfaces, cache warming + retention, markdown

Consumes the backend cache-warming + cache-rate handoffs end-to-end and adds supporting infra:

- protocol/transport: conversation-scoped surfaces (conversationId on subscribe/invoke/surface + staleness routing); store auto-subscribes the catalog with the focused conversation and re-scopes on switch.
- surface-host: generic Number field renderer + custom rendererId dispatch (graceful skip on unknown).
- cache-warming feature: enabled toggle, min+sec interval, AUTHORITATIVE countdown from the surface's cache-warming-timer nextWarmAt, manual Warm now (POST /chat/warm), lastWarmAt-keyed history, cache-retention stat, expectedCacheRate headline.
- metrics: cross-turn expected-cache (retention) derivation + bubble badge; cache-rate fix needs no code change (inputTokens now total).
- markdown feature: marked + marked-highlight + highlight.js + dompurify, rendered in ChatView.
- fixes (gemini review): {#key activeConversationId} remount of CacheWarmingView to stop history/feedback leaking across tabs; guard NaN interval inputs from committing 0.
- docs/contracts: regenerated transport/ui-contract mirrors; backend-handoff updated (CR-3 resolved).

Verified: svelte-check 0 errors, biome clean, 494 tests pass, vite build OK.
---
 src/app.css                                        | 106 +++++++++
 src/app/App.svelte                                 |  45 +++-
 src/app/App.test.ts                                |   9 +-
 src/app/store.svelte.ts                            |  83 ++++++-
 src/core/metrics/format.test.ts                    |  59 +++++
 src/core/metrics/format.ts                         |  29 +++
 src/core/metrics/index.ts                          |   2 +
 src/core/metrics/place.test.ts                     |  13 ++
 src/core/metrics/place.ts                          |  13 +-
 src/core/metrics/types.ts                          |   5 +
 src/core/protocol/index.ts                         |  11 +-
 src/core/protocol/reducer.test.ts                  | 110 +++++++++-
 src/core/protocol/reducer.ts                       | 101 +++++++--
 src/core/protocol/types.ts                         |  19 +-
 src/features/cache-warming/index.ts                |   8 +
 .../cache-warming/logic/view-model.test.ts         | 220 +++++++++++++++++++
 src/features/cache-warming/logic/view-model.ts     | 242 +++++++++++++++++++++
 .../cache-warming/ui/CacheWarmingView.svelte       | 234 ++++++++++++++++++++
 src/features/chat/ui/ChatView.svelte               |  11 +-
 src/features/markdown/index.ts                     |   8 +
 src/features/markdown/logic/markdown.test.ts       |  58 +++++
 src/features/markdown/logic/markdown.ts            | 165 ++++++++++++++
 src/features/markdown/ui/Markdown.svelte           |  58 +++++
 src/features/markdown/ui/markdown.test.ts          |  40 ++++
 src/features/surface-host/logic/plan.test.ts       |  41 ++++
 src/features/surface-host/logic/plan.ts            |  33 ++-
 src/features/surface-host/logic/types.ts           |  17 ++
 src/features/surface-host/ui/Number.svelte         |  43 ++++
 src/features/surface-host/ui/SurfaceView.svelte    |   3 +
 29 files changed, 1736 insertions(+), 50 deletions(-)
 create mode 100644 src/features/cache-warming/index.ts
 create mode 100644 src/features/cache-warming/logic/view-model.test.ts
 create mode 100644 src/features/cache-warming/logic/view-model.ts
 create mode 100644 src/features/cache-warming/ui/CacheWarmingView.svelte
 create mode 100644 src/features/markdown/index.ts
 create mode 100644 src/features/markdown/logic/markdown.test.ts
 create mode 100644 src/features/markdown/logic/markdown.ts
 create mode 100644 src/features/markdown/ui/Markdown.svelte
 create mode 100644 src/features/markdown/ui/markdown.test.ts
 create mode 100644 src/features/surface-host/ui/Number.svelte

(limited to 'src')

diff --git a/src/app.css b/src/app.css
index 5db1f25..2c30b5f 100644
--- a/src/app.css
+++ b/src/app.css
@@ -1,4 +1,6 @@
 @import "tailwindcss";
+/* Syntax-highlight theme for fenced code blocks in rendered Markdown. */
+@import "highlight.js/styles/atom-one-dark.min.css";
 
 /* DaisyUI v5 — enable the plugin AND bundle the dracula theme (set as default,
    applied via <html data-theme="dracula">). Themes not listed here are NOT
@@ -7,6 +9,110 @@
 	themes: dracula --default;
 }
 
+/* Rendered-Markdown (assistant messages) typography — scoped to .markdown-body
+   so it never leaks into the rest of the app. */
+.markdown-body {
+	& p {
+		margin-block: 0.5em;
+		&:first-child {
+			margin-block-start: 0;
+		}
+		&:last-child {
+			margin-block-end: 0;
+		}
+	}
+	& h1,
+	& h2,
+	& h3,
+	& h4,
+	& h5,
+	& h6 {
+		font-weight: 600;
+		line-height: 1.25;
+		margin-block: 0.75em 0.25em;
+		&:first-child {
+			margin-block-start: 0;
+		}
+	}
+	& h1 {
+		font-size: 1.4em;
+	}
+	& h2 {
+		font-size: 1.2em;
+	}
+	& h3 {
+		font-size: 1.1em;
+	}
+	& ul,
+	& ol {
+		padding-inline-start: 1.5em;
+		margin-block: 0.5em;
+	}
+	& ul {
+		list-style-type: disc;
+	}
+	& ol {
+		list-style-type: decimal;
+	}
+	& li {
+		margin-block: 0.15em;
+	}
+	& pre {
+		overflow-x: auto;
+		border-radius: var(--radius-box);
+		margin-block: 0.5em;
+	}
+	& pre code {
+		display: block;
+		padding: 0.75em 1em;
+		font-size: 0.8125em;
+		line-height: 1.5;
+	}
+	& :not(pre) > code {
+		font-size: 0.875em;
+		padding: 0.15em 0.4em;
+		border-radius: var(--radius-selector);
+		background-color: oklch(var(--color-base-content) / 0.1);
+	}
+	& blockquote {
+		border-inline-start: 3px solid oklch(var(--color-base-content) / 0.2);
+		padding-inline-start: 0.75em;
+		margin-block: 0.5em;
+		opacity: 0.8;
+	}
+	& a {
+		color: oklch(var(--color-primary));
+		text-decoration: underline;
+		&:hover {
+			opacity: 0.8;
+		}
+	}
+	& strong {
+		font-weight: 600;
+	}
+	& table {
+		width: 100%;
+		border-collapse: collapse;
+		margin-block: 0.5em;
+		font-size: 0.875em;
+	}
+	& th,
+	& td {
+		border: 1px solid oklch(var(--color-base-content) / 0.15);
+		padding: 0.4em 0.75em;
+		text-align: start;
+	}
+	& th {
+		font-weight: 600;
+		background-color: oklch(var(--color-base-200));
+	}
+	& hr {
+		border: none;
+		border-top: 1px solid oklch(var(--color-base-content) / 0.2);
+		margin-block: 0.75em;
+	}
+}
+
 /* App shell fills the viewport and never scrolls/overflows at the page level —
    the inner regions (tab strip, chat transcript) own their own scrolling. */
 html,
diff --git a/src/app/App.svelte b/src/app/App.svelte
index f02797e..dae6177 100644
--- a/src/app/App.svelte
+++ b/src/app/App.svelte
@@ -1,8 +1,14 @@
 <script lang="ts">
 	import type { InvokeMessage } from "@dispatch/ui-contract";
 	import Table from "../components/Table.svelte";
+	import {
+		CacheWarmingView,
+		manifest as cacheWarmingManifest,
+		type WarmFeedback,
+	} from "../features/cache-warming";
 	import { ChatView, Composer, manifest as chatManifest, ModelSelector } from "../features/chat";
 	import { manifest as conversationCacheManifest } from "../features/conversation-cache";
+	import { manifest as markdownManifest } from "../features/markdown";
 	import { manifest as surfaceHostManifest, SurfaceView } from "../features/surface-host";
 	import { manifest as tabsManifest, TabBar } from "../features/tabs";
 	import { manifest as viewsManifest, ViewSidebar } from "../features/views";
@@ -10,15 +16,22 @@
 
 	let { store }: { store: AppStore } = $props();
 
+	// The backend's conversation-scoped cache-warming surface. Referenced by id at
+	// the composition root (sanctioned discovery-by-id) to give it a dedicated view
+	// and keep it out of the generic Extensions surface list — SurfaceView itself
+	// stays fully generic (it never switches on a surface id).
+	const CACHE_WARMING_ID = "cache-warming";
+
 	// The view kinds offered in the sidebar's dropdown. Generic data — the
 	// `viewContent` snippet below maps each kind id to its renderer.
 	const viewKinds = [
 		{ id: "model", label: "Model" },
 		{ id: "extensions", label: "Extensions" },
+		{ id: "cache-warming", label: "Cache Warming" },
 	] as const;
 
-	// Default sidebar layout: a Model panel on top, Extensions below.
-	const initialViews = ["model", "extensions"] as const;
+	// Default sidebar layout: a Model panel on top, then Extensions, then Cache Warming.
+	const initialViews = ["model", "extensions", "cache-warming"] as const;
 
 	// Frontend module list for the "Loaded Modules" view, AGGREGATED from each
 	// feature's public `manifest` export so it can't drift from what's actually
@@ -32,6 +45,8 @@
 		surfaceHostManifest,
 		viewsManifest,
 		conversationCacheManifest,
+		markdownManifest,
+		cacheWarmingManifest,
 	].map((m) => [m.name, m.description] as const);
 
 	// Right sidebar: open by default on wide screens (pushes the chat aside),
@@ -51,6 +66,19 @@
 	function handleSelectModel(model: string) {
 		store.selectModel(model);
 	}
+
+	// Adapt the store's WarmResult to the cache-warming feature's WarmNow port.
+	async function warmNow(): Promise<WarmFeedback | null> {
+		const result = await store.warmNow();
+		if (result === null) return null;
+		return result.ok
+			? {
+					ok: true,
+					cachePct: result.response.cachePct,
+					expectedCacheRate: result.response.expectedCacheRate,
+				}
+			: { ok: false, error: result.error };
+	}
 </script>
 
 <main class="relative flex h-screen overflow-hidden">
@@ -165,9 +193,20 @@
 		</section>
 		<section class="mt-4 flex flex-col gap-3">
 			<h3 class="text-xs font-semibold uppercase opacity-60">Surfaces</h3>
-			{#each store.surfaces as spec (spec.id)}
+			{#each store.surfaces.filter((s) => s.id !== CACHE_WARMING_ID) as spec (spec.id)}
 				<SurfaceView {spec} onInvoke={handleInvoke} />
 			{/each}
 		</section>
+	{:else if kind === "cache-warming"}
+		<!-- Re-mount per conversation (like ChatView) so the view's local warming
+		     history / manual-warm feedback can't bleed across tabs. -->
+		{#key store.activeConversationId}
+			<CacheWarmingView
+				spec={store.surface(CACHE_WARMING_ID)}
+				canWarm={store.activeConversationId !== null}
+				onInvoke={handleInvoke}
+				{warmNow}
+			/>
+		{/key}
 	{/if}
 {/snippet}
diff --git a/src/app/App.test.ts b/src/app/App.test.ts
index 121bd20..1534d1c 100644
--- a/src/app/App.test.ts
+++ b/src/app/App.test.ts
@@ -388,7 +388,14 @@ describe("App component interaction tests", () => {
 
 		// Extensions is the default view, so the modules table renders immediately.
 		expect(screen.getByRole("columnheader", { name: "Module" })).toBeInTheDocument();
-		for (const name of ["chat", "tabs", "surface-host", "views", "conversation-cache"]) {
+		for (const name of [
+			"chat",
+			"tabs",
+			"surface-host",
+			"views",
+			"conversation-cache",
+			"markdown",
+		]) {
 			expect(screen.getByRole("cell", { name })).toBeInTheDocument();
 		}
 
diff --git a/src/app/store.svelte.ts b/src/app/store.svelte.ts
index efbe065..c242d77 100644
--- a/src/app/store.svelte.ts
+++ b/src/app/store.svelte.ts
@@ -4,6 +4,8 @@ import type {
 	ConversationHistoryResponse,
 	ConversationMetricsResponse,
 	ModelsResponse,
+	WarmRequest,
+	WarmResponse,
 } from "@dispatch/transport-contract";
 import type { SubscribeMessage, SurfaceServerMessage, SurfaceSpec } from "@dispatch/ui-contract";
 import { createIdbChunkStore } from "../adapters/idb";
@@ -12,6 +14,7 @@ import type { WebSocketLike } from "../adapters/ws";
 import { createSurfaceSocket, type SurfaceSocketOptions } from "../adapters/ws";
 import {
 	applyServerMessage,
+	getSurfaceSpec,
 	type ProtocolState,
 	initialState as protocolInitialState,
 	invoke as protocolInvoke,
@@ -30,6 +33,11 @@ import { randomId } from "./uuid";
 
 const DEFAULT_MODEL = "opencode/deepseek-v4-flash";
 
+/** Outcome of a manual `POST /chat/warm` (the "warm now" affordance). */
+export type WarmResult =
+	| { readonly ok: true; readonly response: WarmResponse }
+	| { readonly ok: false; readonly error: string };
+
 export interface AppStore {
 	readonly tabs: readonly Tab[];
 	readonly activeConversationId: string | null;
@@ -40,12 +48,19 @@ export interface AppStore {
 	/** Every received surface spec, in catalog order — all auto-subscribed + expanded. */
 	readonly surfaces: readonly SurfaceSpec[];
 	readonly lastError: ProtocolState["lastError"];
+	/** The current spec for one surface by id (discovery-by-id), or null if absent. */
+	surface(surfaceId: string): SurfaceSpec | null;
 	send(text: string): void;
 	selectModel(model: string): void;
 	newDraft(): void;
 	selectTab(conversationId: string): void;
 	closeTab(conversationId: string): void;
 	invoke(surfaceId: string, actionId: string, payload?: unknown): void;
+	/**
+	 * Manually warm the focused conversation's prompt cache (`POST /chat/warm`).
+	 * Returns null when no conversation is focused (a draft has nothing to warm).
+	 */
+	warmNow(): Promise<WarmResult | null>;
 	dispose(): void;
 }
 
@@ -179,6 +194,11 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		}
 	}
 
+	/** The conversation the surfaces should scope to (undefined for a draft). */
+	function focusedConversationId(): string | undefined {
+		return tabsStore.activeConversationId ?? undefined;
+	}
+
 	function handleServerMessage(msg: SurfaceServerMessage): void {
 		protocol = applyServerMessage(protocol, msg);
 		// Surfaces are auto-expanded: whenever the catalog changes, subscribe to
@@ -188,10 +208,16 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		}
 	}
 
-	/** Subscribe to every catalog entry not yet subscribed; unsubscribe stragglers. */
+	/**
+	 * Subscribe to every catalog entry, scoped to the focused conversation, and
+	 * unsubscribe stragglers. Re-run on conversation switch: a conversation-scoped
+	 * surface (e.g. cache-warming) re-scopes to the new id (`protocolSubscribe`
+	 * emits unsubscribe-old + subscribe-new); a global surface ignores the id.
+	 */
 	function syncSubscriptions(): void {
+		const cid = focusedConversationId();
 		for (const entry of protocol.catalog) {
-			const result = protocolSubscribe(protocol, entry.id);
+			const result = protocolSubscribe(protocol, entry.id, cid);
 			protocol = result.state;
 			for (const msg of result.outgoing) {
 				socket?.send(msg);
@@ -216,11 +242,14 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		onMessage: handleServerMessage,
 		onChat: handleChatMessage,
 		onReopen() {
-			// The server forgot our subscriptions on reconnect; re-send for all
-			// catalog entries (protocolSubscribe would no-op since they're still in
-			// our local map, so emit the wire messages directly).
-			for (const entry of protocol.catalog) {
-				const msg: SubscribeMessage = { type: "subscribe", surfaceId: entry.id };
+			// The server forgot our subscriptions on reconnect; re-send each with the
+			// conversation it was subscribed under (protocolSubscribe would no-op since
+			// they're still in our local map, so emit the wire messages directly).
+			for (const [surfaceId, sub] of protocol.subscriptions) {
+				const msg: SubscribeMessage =
+					sub.conversationId === undefined
+						? { type: "subscribe", surfaceId }
+						: { type: "subscribe", surfaceId, conversationId: sub.conversationId };
 				socket?.send(msg);
 			}
 		},
@@ -292,7 +321,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 		get surfaces(): readonly SurfaceSpec[] {
 			const out: SurfaceSpec[] = [];
 			for (const entry of protocol.catalog) {
-				const spec = protocol.subscriptions.get(entry.id);
+				const spec = getSurfaceSpec(protocol, entry.id);
 				if (spec) out.push(spec);
 			}
 			return out;
@@ -301,6 +330,10 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			return protocol.lastError;
 		},
 
+		surface(surfaceId: string): SurfaceSpec | null {
+			return getSurfaceSpec(protocol, surfaceId);
+		},
+
 		send(text: string): void {
 			if (tabsStore.activeConversationId === null) {
 				// Draft: promote to tab on first send
@@ -320,6 +353,9 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 				draftConversationId = nextDraftId;
 
 				refreshActiveChat();
+				// The draft became a real conversation: re-scope conversation-scoped
+				// surfaces (e.g. cache-warming) to its id.
+				syncSubscriptions();
 				// Now send on the promoted store
 				chatStores.get(conversationId)?.send(text);
 			} else {
@@ -344,6 +380,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			draftStore = createChatFor(nextDraftId, activeModel);
 			draftConversationId = nextDraftId;
 			refreshActiveChat();
+			syncSubscriptions();
 		},
 
 		selectTab(conversationId: string): void {
@@ -353,6 +390,7 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 				activeModel = tab.model;
 			}
 			refreshActiveChat();
+			syncSubscriptions();
 		},
 
 		closeTab(conversationId: string): void {
@@ -364,15 +402,42 @@ export function createAppStore(opts?: CreateAppStoreOptions): AppStore {
 			}
 			void cache.delete(conversationId);
 			refreshActiveChat();
+			syncSubscriptions();
 		},
 
 		invoke(surfaceId: string, actionId: string, payload?: unknown): void {
-			const result = protocolInvoke(protocol, surfaceId, actionId, payload);
+			const result = protocolInvoke(
+				protocol,
+				surfaceId,
+				actionId,
+				payload,
+				focusedConversationId(),
+			);
 			protocol = result.state;
 			for (const msg of result.outgoing) {
 				socket?.send(msg);
 			}
 		},
+
+		async warmNow(): Promise<WarmResult | null> {
+			const conversationId = tabsStore.activeConversationId;
+			if (conversationId === null) return null;
+			const body: WarmRequest = { conversationId, model: activeModel };
+			try {
+				const res = await fetchImpl(`${httpBase}/chat/warm`, {
+					method: "POST",
+					headers: { "content-type": "application/json" },
+					body: JSON.stringify(body),
+				});
+				if (!res.ok) {
+					const errBody = (await res.json().catch(() => null)) as { error?: string } | null;
+					return { ok: false, error: errBody?.error ?? `Warm failed (HTTP ${res.status})` };
+				}
+				return { ok: true, response: (await res.json()) as WarmResponse };
+			} catch (err) {
+				return { ok: false, error: err instanceof Error ? err.message : "Warm request failed" };
+			}
+		},
 		dispose(): void {
 			for (const store of chatStores.values()) {
 				store.dispose();
diff --git a/src/core/metrics/format.test.ts b/src/core/metrics/format.test.ts
index 77c5204..3eec93d 100644
--- a/src/core/metrics/format.test.ts
+++ b/src/core/metrics/format.test.ts
@@ -2,8 +2,10 @@ import type { StepId, StepMetrics, TurnMetrics } from "@dispatch/wire";
 import { describe, expect, it } from "vitest";
 import {
 	computeCachePct,
+	computeExpectedCachePct,
 	computeTps,
 	viewCacheRate,
+	viewExpectedCache,
 	viewStepMetrics,
 	viewTurnMetrics,
 } from "./format";
@@ -249,3 +251,60 @@ describe("viewCacheRate", () => {
 		expect(miss.isHit).toBe(false);
 	});
 });
+
+describe("computeExpectedCachePct", () => {
+	it("null when there is no prior turn (first turn has no baseline)", () => {
+		expect(computeExpectedCachePct({ inputTokens: 100, outputTokens: 0 }, null)).toBeNull();
+	});
+
+	it("null when the prior turn cached nothing (denominator 0)", () => {
+		const prev = { inputTokens: 100, outputTokens: 0 };
+		const current = { inputTokens: 200, outputTokens: 0, cacheReadTokens: 50 };
+		expect(computeExpectedCachePct(current, prev)).toBeNull();
+	});
+
+	it("100% when the whole prior cached prefix was read back (backend worked example)", () => {
+		// turn 1: cacheRead 0, cacheWrite 5146 → prefix 5146; turn 2 reads 5146 back.
+		const prev = { inputTokens: 5149, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 5146 };
+		const current = {
+			inputTokens: 8462,
+			outputTokens: 0,
+			cacheReadTokens: 5146,
+			cacheWriteTokens: 3313,
+		};
+		expect(computeExpectedCachePct(current, prev)).toBe(100);
+	});
+
+	it("drops below 100% when the cache busted (read < prior prefix)", () => {
+		const prev = {
+			inputTokens: 1000,
+			outputTokens: 0,
+			cacheReadTokens: 100,
+			cacheWriteTokens: 900,
+		};
+		const current = { inputTokens: 1000, outputTokens: 0, cacheReadTokens: 500 };
+		// 500 / (100 + 900) = 50%
+		expect(computeExpectedCachePct(current, prev)).toBe(50);
+	});
+
+	it("clamps to 100 if read somehow exceeds the prior prefix", () => {
+		const prev = { inputTokens: 100, outputTokens: 0, cacheWriteTokens: 100 };
+		const current = { inputTokens: 100, outputTokens: 0, cacheReadTokens: 250 };
+		expect(computeExpectedCachePct(current, prev)).toBe(100);
+	});
+});
+
+describe("viewExpectedCache", () => {
+	it("null view when it cannot be derived (no prior turn)", () => {
+		expect(viewExpectedCache({ inputTokens: 100, outputTokens: 0 }, null)).toBeNull();
+	});
+
+	it("success level + hit flag for full retention", () => {
+		const prev = { inputTokens: 5149, outputTokens: 0, cacheWriteTokens: 5146 };
+		const current = { inputTokens: 8462, outputTokens: 0, cacheReadTokens: 5146 };
+		const v = viewExpectedCache(current, prev);
+		expect(v?.pct).toBe(100);
+		expect(v?.level).toBe("success");
+		expect(v?.isHit).toBe(true);
+	});
+});
diff --git a/src/core/metrics/format.ts b/src/core/metrics/format.ts
index cc86976..ee8db60 100644
--- a/src/core/metrics/format.ts
+++ b/src/core/metrics/format.ts
@@ -75,6 +75,35 @@ export function viewCacheRate(u: Usage): CacheRateView {
 	return { pct, level: cacheLevel(pct), isHit: (u.cacheReadTokens ?? 0) > 0 };
 }
 
+/**
+ * Expected cache (retention): of the cache that existed going INTO this turn, how
+ * much was read back — `clamp01(cacheRead_N / (cacheRead_{N-1} + cacheWrite_{N-1}))`.
+ * The denominator is the PRIOR turn's cached prefix (what it read + what it wrote).
+ * Ideally ~100% on every turn after the first; <100% = the cache busted/expired.
+ *
+ * Returns `null` when it cannot be derived: no prior turn (`prev === null`) or the
+ * prior turn cached nothing (denominator <= 0) — distinct from a real 0%.
+ */
+export function computeExpectedCachePct(current: Usage, prev: Usage | null): number | null {
+	if (prev === null) return null;
+	const denom = (prev.cacheReadTokens ?? 0) + (prev.cacheWriteTokens ?? 0);
+	if (denom <= 0) return null;
+	const read = current.cacheReadTokens ?? 0;
+	const rate = read / denom;
+	const clamped = rate < 0 ? 0 : rate > 1 ? 1 : rate;
+	return Math.round(clamped * 100);
+}
+
+/**
+ * Build a view of the cross-turn retention (percentage + colour level + hit flag),
+ * or `null` when it can't be derived (see `computeExpectedCachePct`).
+ */
+export function viewExpectedCache(current: Usage, prev: Usage | null): CacheRateView | null {
+	const pct = computeExpectedCachePct(current, prev);
+	if (pct === null) return null;
+	return { pct, level: cacheLevel(pct), isHit: (current.cacheReadTokens ?? 0) > 0 };
+}
+
 /** Build a formatted view of a turn's aggregate metrics. */
 export function viewTurnMetrics(turn: TurnMetrics): TurnMetricsView {
 	const total = totalTokens(turn.usage);
diff --git a/src/core/metrics/index.ts b/src/core/metrics/index.ts
index 6997ab9..8822159 100644
--- a/src/core/metrics/index.ts
+++ b/src/core/metrics/index.ts
@@ -1,7 +1,9 @@
 export {
 	computeCachePct,
+	computeExpectedCachePct,
 	computeTps,
 	viewCacheRate,
+	viewExpectedCache,
 	viewStepMetrics,
 	viewTurnMetrics,
 } from "./format";
diff --git a/src/core/metrics/place.test.ts b/src/core/metrics/place.test.ts
index d94882d..0b9c0ec 100644
--- a/src/core/metrics/place.test.ts
+++ b/src/core/metrics/place.test.ts
@@ -526,4 +526,17 @@ describe("interleaveTurnMetrics — cumulative usage (cache total)", () => {
 		expect(tm[0]?.cumulativeUsage.inputTokens).toBe(1000);
 		expect(tm[0]?.cumulativeUsage.cacheReadTokens).toBe(500);
 	});
+
+	it("carries the prior finalized turn's usage as the retention baseline", () => {
+		const rows = interleaveTurnMetrics(
+			[userGroup(1, "q1"), assistantGroup(2, "a1"), userGroup(3, "q2"), assistantGroup(4, "a2")],
+			[cacheEntry("t1", 2669, 10, 384), cacheEntry("t2", 2737, 10, 2560)],
+		);
+		const tm = turnMetricsRows(rows);
+		// first finalized turn has no earlier baseline
+		expect(tm[0]?.prevTurnUsage).toBeNull();
+		// second turn's baseline is the first turn's usage
+		expect(tm[1]?.prevTurnUsage?.inputTokens).toBe(2669);
+		expect(tm[1]?.prevTurnUsage?.cacheReadTokens).toBe(384);
+	});
 });
diff --git a/src/core/metrics/place.ts b/src/core/metrics/place.ts
index fc30df0..afeb84b 100644
--- a/src/core/metrics/place.ts
+++ b/src/core/metrics/place.ts
@@ -79,11 +79,19 @@ export function interleaveTurnMetrics(
 	}
 
 	// Running cumulative usage across finalized turns (conversation total at each
-	// entry index), for the per-turn "chat total" cache rate.
+	// entry index), for the per-turn "chat total" cache rate. Alongside it, the
+	// previous finalized turn's usage at each index — the baseline for cross-turn
+	// retention (expected cache).
 	const cumulativeByEntry: Usage[] = [];
+	const prevUsageByEntry: (Usage | null)[] = [];
 	let runningUsage: Usage = { inputTokens: 0, outputTokens: 0 };
+	let lastFinalizedUsage: Usage | null = null;
 	for (const e of entries) {
-		if (e.total !== null) runningUsage = addUsage(runningUsage, e.total.usage);
+		prevUsageByEntry.push(lastFinalizedUsage);
+		if (e.total !== null) {
+			runningUsage = addUsage(runningUsage, e.total.usage);
+			lastFinalizedUsage = e.total.usage;
+		}
 		cumulativeByEntry.push(runningUsage);
 	}
 
@@ -170,6 +178,7 @@ export function interleaveTurnMetrics(
 				kind: "turn-metrics",
 				turn: entry.total,
 				cumulativeUsage: cumulativeByEntry[seg] ?? entry.total.usage,
+				prevTurnUsage: prevUsageByEntry[seg] ?? null,
 			});
 		}
 	}
diff --git a/src/core/metrics/types.ts b/src/core/metrics/types.ts
index cf2511c..f5557f7 100644
--- a/src/core/metrics/types.ts
+++ b/src/core/metrics/types.ts
@@ -52,6 +52,11 @@ export type MetricsRow =
 			readonly turn: TurnMetrics;
 			/** Cumulative usage across all finalized turns up to and including this one. */
 			readonly cumulativeUsage: Usage;
+			/**
+			 * Usage of the most recent EARLIER finalized turn, or `null` when this is the
+			 * first finalized turn. The baseline for cross-turn retention (expected cache).
+			 */
+			readonly prevTurnUsage: Usage | null;
 	  };
 
 /** Formatted cache hit-rate view: percentage + colour severity + hit flag. */
diff --git a/src/core/protocol/index.ts b/src/core/protocol/index.ts
index 25174ea..e7fd161 100644
--- a/src/core/protocol/index.ts
+++ b/src/core/protocol/index.ts
@@ -1,2 +1,9 @@
-export { applyServerMessage, initialState, invoke, subscribe, unsubscribe } from "./reducer";
-export type { ProtocolResult, ProtocolState } from "./types";
+export {
+	applyServerMessage,
+	getSurfaceSpec,
+	initialState,
+	invoke,
+	subscribe,
+	unsubscribe,
+} from "./reducer";
+export type { ProtocolResult, ProtocolState, Subscription } from "./types";
diff --git a/src/core/protocol/reducer.test.ts b/src/core/protocol/reducer.test.ts
index 57e12f2..c8e517a 100644
--- a/src/core/protocol/reducer.test.ts
+++ b/src/core/protocol/reducer.test.ts
@@ -1,5 +1,12 @@
 import { describe, expect, it } from "vitest";
-import { applyServerMessage, initialState, invoke, subscribe, unsubscribe } from "./reducer";
+import {
+	applyServerMessage,
+	getSurfaceSpec,
+	initialState,
+	invoke,
+	subscribe,
+	unsubscribe,
+} from "./reducer";
 
 const makeSpec = (id: string, title = id) => ({
 	id,
@@ -32,11 +39,10 @@ describe("applyServerMessage — catalog", () => {
 describe("applyServerMessage — surface", () => {
 	it("sets the spec for a subscribed surface", () => {
 		let s = initialState();
-		const result = subscribe(s, "s1");
-		s = result.state;
+		s = subscribe(s, "s1").state;
 		const spec = makeSpec("s1", "Surface 1");
 		const next = applyServerMessage(s, { type: "surface", spec });
-		expect(next.subscriptions.get("s1")).toEqual(spec);
+		expect(getSurfaceSpec(next, "s1")).toEqual(spec);
 	});
 
 	it("ignores a surface message for a non-subscribed surface", () => {
@@ -56,7 +62,7 @@ describe("applyServerMessage — update", () => {
 			type: "update",
 			update: { surfaceId: "s1", spec: makeSpec("s1", "V2") },
 		});
-		expect(next.subscriptions.get("s1")?.title).toBe("V2");
+		expect(getSurfaceSpec(next, "s1")?.title).toBe("V2");
 	});
 
 	it("ignores an update for a non-subscribed surface", () => {
@@ -86,7 +92,7 @@ describe("applyServerMessage — error", () => {
 });
 
 describe("subscribe", () => {
-	it("emits exactly one subscribe message", () => {
+	it("emits exactly one subscribe message (global, no conversationId)", () => {
 		const s = initialState();
 		const result = subscribe(s, "s1");
 		expect(result.outgoing).toEqual([{ type: "subscribe", surfaceId: "s1" }]);
@@ -96,10 +102,14 @@ describe("subscribe", () => {
 	it("adds the surface to subscriptions with null spec", () => {
 		const s = initialState();
 		const result = subscribe(s, "s1");
-		expect(result.state.subscriptions.get("s1")).toBeNull();
+		expect(result.state.subscriptions.get("s1")).toEqual({
+			conversationId: undefined,
+			spec: null,
+		});
+		expect(getSurfaceSpec(result.state, "s1")).toBeNull();
 	});
 
-	it("is idempotent — second subscribe is a no-op", () => {
+	it("is idempotent — second subscribe with the same scope is a no-op", () => {
 		let s = initialState();
 		s = subscribe(s, "s1").state;
 		const result = subscribe(s, "s1");
@@ -108,6 +118,67 @@ describe("subscribe", () => {
 	});
 });
 
+describe("subscribe — conversation-scoped", () => {
+	it("includes conversationId in the subscribe message", () => {
+		const s = initialState();
+		const result = subscribe(s, "cache-warming", "conv-A");
+		expect(result.outgoing).toEqual([
+			{ type: "subscribe", surfaceId: "cache-warming", conversationId: "conv-A" },
+		]);
+		expect(result.state.subscriptions.get("cache-warming")?.conversationId).toBe("conv-A");
+	});
+
+	it("re-scopes on conversation switch: unsubscribe old pair then subscribe new", () => {
+		let s = initialState();
+		s = subscribe(s, "cw", "conv-A").state;
+		s = applyServerMessage(s, {
+			type: "surface",
+			spec: makeSpec("cw", "A-spec"),
+			conversationId: "conv-A",
+		});
+		const result = subscribe(s, "cw", "conv-B");
+		expect(result.outgoing).toEqual([
+			{ type: "unsubscribe", surfaceId: "cw", conversationId: "conv-A" },
+			{ type: "subscribe", surfaceId: "cw", conversationId: "conv-B" },
+		]);
+		// previous spec retained until the new one arrives (no flicker)
+		expect(getSurfaceSpec(result.state, "cw")?.title).toBe("A-spec");
+		expect(result.state.subscriptions.get("cw")?.conversationId).toBe("conv-B");
+	});
+
+	it("drops a stale update echoing the previous conversationId", () => {
+		let s = initialState();
+		s = subscribe(s, "cw", "conv-A").state;
+		s = subscribe(s, "cw", "conv-B").state; // re-scoped to B
+		const next = applyServerMessage(s, {
+			type: "update",
+			update: { surfaceId: "cw", spec: makeSpec("cw", "STALE-A"), conversationId: "conv-A" },
+		});
+		expect(getSurfaceSpec(next, "cw")).toBeNull(); // stale ignored, no spec yet for B
+	});
+
+	it("accepts an update echoing the current conversationId", () => {
+		let s = initialState();
+		s = subscribe(s, "cw", "conv-B").state;
+		const next = applyServerMessage(s, {
+			type: "update",
+			update: { surfaceId: "cw", spec: makeSpec("cw", "B-spec"), conversationId: "conv-B" },
+		});
+		expect(getSurfaceSpec(next, "cw")?.title).toBe("B-spec");
+	});
+
+	it("accepts a global (no-echo) surface message even when subscribed with a conversationId", () => {
+		// loaded-extensions is global: server ignores our conversationId and echoes none.
+		let s = initialState();
+		s = subscribe(s, "loaded-extensions", "conv-A").state;
+		const next = applyServerMessage(s, {
+			type: "surface",
+			spec: makeSpec("loaded-extensions", "Ext"),
+		});
+		expect(getSurfaceSpec(next, "loaded-extensions")?.title).toBe("Ext");
+	});
+});
+
 describe("unsubscribe", () => {
 	it("emits unsubscribe and drops the spec", () => {
 		let s = initialState();
@@ -118,6 +189,15 @@ describe("unsubscribe", () => {
 		expect(result.state.subscriptions.has("s1")).toBe(false);
 	});
 
+	it("includes conversationId for a scoped subscription", () => {
+		let s = initialState();
+		s = subscribe(s, "cw", "conv-A").state;
+		const result = unsubscribe(s, "cw");
+		expect(result.outgoing).toEqual([
+			{ type: "unsubscribe", surfaceId: "cw", conversationId: "conv-A" },
+		]);
+	});
+
 	it("is a no-op if not subscribed", () => {
 		const s = initialState();
 		const result = unsubscribe(s, "nope");
@@ -143,6 +223,20 @@ describe("invoke", () => {
 		]);
 	});
 
+	it("includes conversationId when provided", () => {
+		const s = initialState();
+		const result = invoke(s, "cw", "cache-warming/set-interval", 120, "conv-A");
+		expect(result.outgoing).toEqual([
+			{
+				type: "invoke",
+				surfaceId: "cw",
+				actionId: "cache-warming/set-interval",
+				payload: 120,
+				conversationId: "conv-A",
+			},
+		]);
+	});
+
 	it("does not mutate state", () => {
 		const s = initialState();
 		const result = invoke(s, "s1", "a1");
diff --git a/src/core/protocol/reducer.ts b/src/core/protocol/reducer.ts
index 992a918..3d6b1c8 100644
--- a/src/core/protocol/reducer.ts
+++ b/src/core/protocol/reducer.ts
@@ -2,6 +2,7 @@ import type {
 	InvokeMessage,
 	SubscribeMessage,
 	SurfaceServerMessage,
+	SurfaceSpec,
 	UnsubscribeMessage,
 } from "@dispatch/ui-contract";
 import type { ProtocolResult, ProtocolState } from "./types";
@@ -15,6 +16,31 @@ export function initialState(): ProtocolState {
 	};
 }
 
+// ── Message builders (respect exactOptionalPropertyTypes: omit `conversationId`
+//    entirely for a global subscription rather than setting it to `undefined`). ──
+
+function subMsg(surfaceId: string, conversationId: string | undefined): SubscribeMessage {
+	return conversationId === undefined
+		? { type: "subscribe", surfaceId }
+		: { type: "subscribe", surfaceId, conversationId };
+}
+
+function unsubMsg(surfaceId: string, conversationId: string | undefined): UnsubscribeMessage {
+	return conversationId === undefined
+		? { type: "unsubscribe", surfaceId }
+		: { type: "unsubscribe", surfaceId, conversationId };
+}
+
+/**
+ * Is an inbound spec/update (which echoes `echoedId`) current for the
+ * subscription whose desired scope is `desiredId`? A scoped surface echoes its
+ * conversationId, so it must match the one we last subscribed with; a GLOBAL
+ * surface echoes nothing (`undefined`) and is always current.
+ */
+function isCurrent(desiredId: string | undefined, echoedId: string | undefined): boolean {
+	return echoedId === undefined || echoedId === desiredId;
+}
+
 /** Fold an inbound server message into the next protocol state. */
 export function applyServerMessage(state: ProtocolState, msg: SurfaceServerMessage): ProtocolState {
 	switch (msg.type) {
@@ -22,18 +48,21 @@ export function applyServerMessage(state: ProtocolState, msg: SurfaceServerMessa
 			return { ...state, catalog: msg.catalog };
 
 		case "surface": {
-			const surfaceId = msg.spec.id;
-			if (!state.subscriptions.has(surfaceId)) return state;
+			const sub = state.subscriptions.get(msg.spec.id);
+			if (sub === undefined) return state;
+			if (!isCurrent(sub.conversationId, msg.conversationId)) return state;
 			const subs = new Map(state.subscriptions);
-			subs.set(surfaceId, msg.spec);
+			subs.set(msg.spec.id, { conversationId: sub.conversationId, spec: msg.spec });
 			return { ...state, subscriptions: subs };
 		}
 
 		case "update": {
-			const surfaceId = msg.update.surfaceId;
-			if (!state.subscriptions.has(surfaceId)) return state;
+			const { surfaceId, spec, conversationId } = msg.update;
+			const sub = state.subscriptions.get(surfaceId);
+			if (sub === undefined) return state;
+			if (!isCurrent(sub.conversationId, conversationId)) return state;
 			const subs = new Map(state.subscriptions);
-			subs.set(surfaceId, msg.update.spec);
+			subs.set(surfaceId, { conversationId: sub.conversationId, spec });
 			return { ...state, subscriptions: subs };
 		}
 
@@ -43,40 +72,72 @@ export function applyServerMessage(state: ProtocolState, msg: SurfaceServerMessa
 }
 
 /**
- * Subscribe to a surface. Idempotent: if already subscribed, returns the same
- * state with no outgoing message.
+ * Subscribe to a surface for a given conversation (omit `conversationId` for a
+ * GLOBAL surface / when no conversation is focused).
+ *
+ * - Not yet subscribed → emits one `subscribe`.
+ * - Already subscribed with the SAME scope → idempotent no-op.
+ * - Already subscribed with a DIFFERENT conversation (a re-scope on conversation
+ *   switch) → emits `unsubscribe` for the old pair then `subscribe` for the new
+ *   one, retaining the previous spec until the new one arrives (no flicker).
  */
-export function subscribe(state: ProtocolState, surfaceId: string): ProtocolResult {
-	if (state.subscriptions.has(surfaceId)) {
+export function subscribe(
+	state: ProtocolState,
+	surfaceId: string,
+	conversationId?: string,
+): ProtocolResult {
+	const existing = state.subscriptions.get(surfaceId);
+	if (existing !== undefined && existing.conversationId === conversationId) {
 		return { state, outgoing: [] };
 	}
 	const subs = new Map(state.subscriptions);
-	subs.set(surfaceId, null);
-	const outgoing: SubscribeMessage = { type: "subscribe", surfaceId };
-	return { state: { ...state, subscriptions: subs }, outgoing: [outgoing] };
+	const outgoing: (SubscribeMessage | UnsubscribeMessage)[] = [];
+	const priorSpec: SurfaceSpec | null = existing?.spec ?? null;
+	if (existing !== undefined) {
+		outgoing.push(unsubMsg(surfaceId, existing.conversationId));
+	}
+	subs.set(surfaceId, { conversationId, spec: priorSpec });
+	outgoing.push(subMsg(surfaceId, conversationId));
+	return { state: { ...state, subscriptions: subs }, outgoing };
 }
 
 /**
- * Unsubscribe from a surface. Drops the local spec and emits one unsubscribe.
- * If not subscribed, returns the same state with no outgoing.
+ * Unsubscribe from a surface. Drops the local subscription and emits one
+ * `unsubscribe` (for the conversation pair it was subscribed under). No-op if
+ * not subscribed.
  */
 export function unsubscribe(state: ProtocolState, surfaceId: string): ProtocolResult {
-	if (!state.subscriptions.has(surfaceId)) {
+	const existing = state.subscriptions.get(surfaceId);
+	if (existing === undefined) {
 		return { state, outgoing: [] };
 	}
 	const subs = new Map(state.subscriptions);
 	subs.delete(surfaceId);
-	const outgoing: UnsubscribeMessage = { type: "unsubscribe", surfaceId };
-	return { state: { ...state, subscriptions: subs }, outgoing: [outgoing] };
+	return {
+		state: { ...state, subscriptions: subs },
+		outgoing: [unsubMsg(surfaceId, existing.conversationId)],
+	};
 }
 
-/** Invoke a field's action on a surface. Emits an InvokeMessage; no state change. */
+/**
+ * Invoke a field's action on a surface. Emits an InvokeMessage (carrying
+ * `conversationId` for a scoped surface); no state change.
+ */
 export function invoke(
 	state: ProtocolState,
 	surfaceId: string,
 	actionId: string,
 	payload?: unknown,
+	conversationId?: string,
 ): ProtocolResult {
-	const outgoing: InvokeMessage = { type: "invoke", surfaceId, actionId, payload };
+	const outgoing: InvokeMessage =
+		conversationId === undefined
+			? { type: "invoke", surfaceId, actionId, payload }
+			: { type: "invoke", surfaceId, actionId, payload, conversationId };
 	return { state, outgoing: [outgoing] };
 }
+
+/** The current spec for a subscribed surface, or `null` if absent/unsubscribed. */
+export function getSurfaceSpec(state: ProtocolState, surfaceId: string): SurfaceSpec | null {
+	return state.subscriptions.get(surfaceId)?.spec ?? null;
+}
diff --git a/src/core/protocol/types.ts b/src/core/protocol/types.ts
index effec0d..db8886a 100644
--- a/src/core/protocol/types.ts
+++ b/src/core/protocol/types.ts
@@ -5,12 +5,27 @@ import type {
 	SurfaceSpec,
 } from "@dispatch/ui-contract";
 
+/**
+ * One surface subscription's local state.
+ *
+ * `conversationId` is the conversation we last subscribed this surface WITH
+ * (`undefined` = subscribed globally, no conversation in focus). It is the
+ * "desired" scope: an inbound `surface`/`update` that echoes a DIFFERENT
+ * conversation is stale (we have since re-scoped) and is dropped. A GLOBAL
+ * surface ignores the id server-side and echoes none — that (`undefined` echo)
+ * is always accepted. `spec` is `null` until the first `surface` arrives.
+ */
+export interface Subscription {
+	readonly conversationId: string | undefined;
+	readonly spec: SurfaceSpec | null;
+}
+
 /** The client-side view of the surface protocol state. */
 export interface ProtocolState {
 	/** The latest catalog received from the server (empty until first CatalogMessage). */
 	readonly catalog: SurfaceCatalog;
-	/** Surfaces the client intends to be subscribed to; null = subscribed but no spec yet. */
-	readonly subscriptions: ReadonlyMap<string, SurfaceSpec | null>;
+	/** Surfaces the client intends to be subscribed to, keyed by surfaceId. */
+	readonly subscriptions: ReadonlyMap<string, Subscription>;
 	/** The last error received from the server, if any. */
 	readonly lastError: SurfaceErrorMessage | null;
 }
diff --git a/src/features/cache-warming/index.ts b/src/features/cache-warming/index.ts
new file mode 100644
index 0000000..c432de6
--- /dev/null
+++ b/src/features/cache-warming/index.ts
@@ -0,0 +1,8 @@
+export type { WarmFeedback, WarmNow } from "./logic/view-model";
+export { default as CacheWarmingView } from "./ui/CacheWarmingView.svelte";
+
+/** Public module manifest — aggregated by the shell's "Loaded Modules" view. */
+export const manifest = {
+	name: "cache-warming",
+	description: "Prompt-cache warming controls, history, and countdown",
+} as const;
diff --git a/src/features/cache-warming/logic/view-model.test.ts b/src/features/cache-warming/logic/view-model.test.ts
new file mode 100644
index 0000000..3d6f6d0
--- /dev/null
+++ b/src/features/cache-warming/logic/view-model.test.ts
@@ -0,0 +1,220 @@
+import type { SurfaceSpec } from "@dispatch/ui-contract";
+import { describe, expect, it } from "vitest";
+import {
+	clampMinutes,
+	clampSeconds,
+	colorClass,
+	formatCountdown,
+	formatWarmLabel,
+	fromMinSec,
+	initialWarmingState,
+	observeWarm,
+	parseControls,
+	parsePct,
+	secondsUntilNext,
+	statusForPct,
+	toMinSec,
+} from "./view-model";
+
+const spec = (fields: SurfaceSpec["fields"]): SurfaceSpec => ({
+	id: "cache-warming",
+	region: "side",
+	title: "Cache Warming",
+	fields,
+});
+
+describe("parsePct", () => {
+	it("parses a percentage string", () => {
+		expect(parsePct("100%")).toBe(100);
+		expect(parsePct("93 %")).toBe(93);
+		expect(parsePct("0%")).toBe(0);
+	});
+	it("returns null for a dash / non-numeric", () => {
+		expect(parsePct("—")).toBeNull();
+		expect(parsePct("n/a")).toBeNull();
+	});
+});
+
+describe("parseControls", () => {
+	it("returns empty defaults for a null spec", () => {
+		const c = parseControls(null);
+		expect(c).toEqual({
+			enabled: false,
+			toggleActionId: null,
+			intervalSeconds: 0,
+			setIntervalActionId: null,
+			lastPct: null,
+			retentionPct: null,
+			nextWarmAt: null,
+			lastWarmAt: null,
+		});
+	});
+
+	it("extracts toggle / number / both stats / timer by kind", () => {
+		const c = parseControls(
+			spec([
+				{
+					kind: "toggle",
+					label: "Enabled",
+					value: true,
+					action: { actionId: "cache-warming/toggle" },
+				},
+				{
+					kind: "number",
+					label: "Interval",
+					value: 240,
+					unit: "s",
+					action: { actionId: "cache-warming/set-interval" },
+				},
+				{ kind: "stat", label: "Last cache rate", value: "61%" },
+				{ kind: "stat", label: "Cache retention", value: "100%" },
+				{
+					kind: "custom",
+					rendererId: "cache-warming-timer",
+					payload: { nextWarmAt: 1_700_000_240_000, lastWarmAt: 1_700_000_000_000 },
+				},
+			]),
+		);
+		expect(c).toEqual({
+			enabled: true,
+			toggleActionId: "cache-warming/toggle",
+			intervalSeconds: 240,
+			setIntervalActionId: "cache-warming/set-interval",
+			lastPct: 61,
+			retentionPct: 100,
+			nextWarmAt: 1_700_000_240_000,
+			lastWarmAt: 1_700_000_000_000,
+		});
+	});
+
+	it("tells the retention stat apart from the rate stat by label", () => {
+		const c = parseControls(
+			spec([
+				{ kind: "stat", label: "Cache retention", value: "100%" },
+				{ kind: "stat", label: "Last cache rate", value: "61%" },
+			]),
+		);
+		expect(c.retentionPct).toBe(100);
+		expect(c.lastPct).toBe(61);
+	});
+
+	it("treats a '—' stat as no pct", () => {
+		const c = parseControls(spec([{ kind: "stat", label: "Last cache rate", value: "—" }]));
+		expect(c.lastPct).toBeNull();
+	});
+
+	it("ignores an unknown custom renderer and a malformed timer payload", () => {
+		const c = parseControls(
+			spec([
+				{ kind: "custom", rendererId: "something-else", payload: { nextWarmAt: 5 } },
+				{ kind: "custom", rendererId: "cache-warming-timer", payload: "nope" },
+			]),
+		);
+		expect(c.nextWarmAt).toBeNull();
+		expect(c.lastWarmAt).toBeNull();
+	});
+});
+
+describe("interval ↔ min/sec", () => {
+	it("clampSeconds caps at 0..59", () => {
+		expect(clampSeconds(75)).toBe(59);
+		expect(clampSeconds(-3)).toBe(0);
+		expect(clampSeconds(30)).toBe(30);
+		expect(clampSeconds(Number.NaN)).toBe(0);
+	});
+	it("clampMinutes floors at 0", () => {
+		expect(clampMinutes(-1)).toBe(0);
+		expect(clampMinutes(4)).toBe(4);
+	});
+	it("toMinSec splits total seconds", () => {
+		expect(toMinSec(240)).toEqual({ minutes: 4, seconds: 0 });
+		expect(toMinSec(125)).toEqual({ minutes: 2, seconds: 5 });
+		expect(toMinSec(45)).toEqual({ minutes: 0, seconds: 45 });
+	});
+	it("fromMinSec combines (clamping seconds to 59)", () => {
+		expect(fromMinSec(4, 0)).toBe(240);
+		expect(fromMinSec(2, 5)).toBe(125);
+		expect(fromMinSec(1, 75)).toBe(119); // 75s clamped to 59
+	});
+});
+
+describe("status + formatting", () => {
+	it("statusForPct buckets high/mid/low", () => {
+		expect(statusForPct(100)).toBe("success");
+		expect(statusForPct(80)).toBe("success");
+		expect(statusForPct(60)).toBe("warning");
+		expect(statusForPct(40)).toBe("warning");
+		expect(statusForPct(10)).toBe("error");
+	});
+	it("colorClass maps to literal DaisyUI classes", () => {
+		expect(colorClass("success")).toBe("text-success");
+		expect(colorClass("warning")).toBe("text-warning");
+		expect(colorClass("error")).toBe("text-error");
+	});
+	it("formatWarmLabel matches the manual-warm phrasing", () => {
+		expect(formatWarmLabel(100)).toBe("Warmed — 100% cache hit");
+		expect(formatWarmLabel(92.6)).toBe("Warmed — 93% cache hit");
+	});
+	it("formatCountdown renders s and m:ss", () => {
+		expect(formatCountdown(9)).toBe("9s");
+		expect(formatCountdown(59)).toBe("59s");
+		expect(formatCountdown(60)).toBe("1:00");
+		expect(formatCountdown(185)).toBe("3:05");
+		expect(formatCountdown(-5)).toBe("0s");
+	});
+});
+
+describe("warming history reducer (observeWarm)", () => {
+	it("starts empty", () => {
+		const s = initialWarmingState();
+		expect(s.history).toEqual([]);
+		expect(s.lastWarmAt).toBeNull();
+	});
+
+	it("records a new entry on each new authoritative lastWarmAt", () => {
+		let s = initialWarmingState();
+		s = observeWarm(s, 1000, 100);
+		s = observeWarm(s, 2000, 90);
+		expect(s.history).toEqual([
+			{ pct: 90, at: 2000 },
+			{ pct: 100, at: 1000 },
+		]);
+		expect(s.lastWarmAt).toBe(2000);
+	});
+
+	it("de-duplicates on the timestamp, not the pct (a re-pushed surface → no dup)", () => {
+		let s = initialWarmingState();
+		s = observeWarm(s, 1000, 100); // warm
+		s = observeWarm(s, 1000, 100); // toggle/interval re-push, same lastWarmAt → skip
+		expect(s.history).toHaveLength(1);
+	});
+
+	it("records two warms with the SAME pct (distinct timestamps both count)", () => {
+		let s = initialWarmingState();
+		s = observeWarm(s, 1000, 100);
+		s = observeWarm(s, 2000, 100);
+		expect(s.history.map((e) => e.at)).toEqual([2000, 1000]);
+	});
+
+	it("ignores a null lastWarmAt; a null pct advances the key without an entry", () => {
+		let s = initialWarmingState();
+		s = observeWarm(s, null, 100);
+		expect(s.history).toEqual([]);
+		s = observeWarm(s, 1000, null);
+		expect(s.history).toEqual([]);
+		expect(s.lastWarmAt).toBe(1000);
+	});
+});
+
+describe("secondsUntilNext (authoritative, from nextWarmAt)", () => {
+	it("is null when nothing is scheduled (nextWarmAt null)", () => {
+		expect(secondsUntilNext(null, 5000)).toBeNull();
+	});
+
+	it("counts down to nextWarmAt, floored at 0", () => {
+		expect(secondsUntilNext(10_000, 10_000)).toBe(0);
+		expect(secondsUntilNext(250_000, 10_000)).toBe(240);
+		expect(secondsUntilNext(70_000, 10_000)).toBe(60);
+		expect(secondsUntilNext(5_000, 999_999)).toBe(0); // already past
+	});
+});
diff --git a/src/features/cache-warming/logic/view-model.ts b/src/features/cache-warming/logic/view-model.ts
new file mode 100644
index 0000000..f7740d7
--- /dev/null
+++ b/src/features/cache-warming/logic/view-model.ts
@@ -0,0 +1,242 @@
+import type { SurfaceSpec } from "@dispatch/ui-contract";
+
+/**
+ * Pure core for the cache-warming view — zero DOM, zero effects, zero Svelte.
+ *
+ * The backend's `cache-warming` surface carries a toggle, a number interval (in
+ * seconds), two `stat`s ("last cache rate" + "cache retention"), and a `custom`
+ * `cache-warming-timer` field bearing the AUTHORITATIVE `nextWarmAt`/`lastWarmAt`
+ * epoch-ms timestamps. This module turns those inputs into the view-model the
+ * (thin) Svelte component renders: parsed controls, a warming-history reducer
+ * keyed off the authoritative `lastWarmAt`, an authoritative countdown, and the
+ * status/format helpers.
+ */
+
+// ── Manual-warm port (consumer-defines-port; the composition root adapts the
+//    store's `POST /chat/warm` result to this shape). ──────────────────────────
+export type WarmFeedback =
+	| { readonly ok: true; readonly cachePct: number; readonly expectedCacheRate: number }
+	| { readonly ok: false; readonly error: string };
+
+export type WarmNow = () => Promise<WarmFeedback | null>;
+
+// ── Parsed surface controls ───────────────────────────────────────────────────
+
+export interface ParsedControls {
+	readonly enabled: boolean;
+	readonly toggleActionId: string | null;
+	readonly intervalSeconds: number;
+	readonly setIntervalActionId: string | null;
+	/** Most recent warm's cache-hit %, from the "last cache rate" stat (`null` when "—"/absent). */
+	readonly lastPct: number | null;
+	/** Cross-turn retention %, from the "cache retention" stat (`null` when "—"/absent). */
+	readonly retentionPct: number | null;
+	/** Authoritative epoch-ms the next AUTOMATIC warm fires, or `null` when not scheduled. */
+	readonly nextWarmAt: number | null;
+	/** Authoritative epoch-ms of the most recent completed warm, or `null` if none. */
+	readonly lastWarmAt: number | null;
+}
+
+const EMPTY_CONTROLS: ParsedControls = {
+	enabled: false,
+	toggleActionId: null,
+	intervalSeconds: 0,
+	setIntervalActionId: null,
+	lastPct: null,
+	retentionPct: null,
+	nextWarmAt: null,
+	lastWarmAt: null,
+};
+
+/** The `cache-warming-timer` custom field's renderer id (this feature owns it). */
+const TIMER_RENDERER_ID = "cache-warming-timer";
+
+/** Parse a stat's display string (e.g. "100%", "93 %", "—") into a number or null. */
+export function parsePct(value: string): number | null {
+	const match = value.match(/-?\d+(?:\.\d+)?/);
+	if (match === null) return null;
+	const n = Number(match[0]);
+	return Number.isFinite(n) ? n : null;
+}
+
+/** A finite number, else null. */
+function numOrNull(v: unknown): number | null {
+	return typeof v === "number" && Number.isFinite(v) ? v : null;
+}
+
+/** Pull the authoritative `nextWarmAt`/`lastWarmAt` out of the timer custom payload. */
+function parseTimer(payload: unknown): { nextWarmAt: number | null; lastWarmAt: number | null } {
+	if (typeof payload !== "object" || payload === null) {
+		return { nextWarmAt: null, lastWarmAt: null };
+	}
+	const p = payload as Record<string, unknown>;
+	return { nextWarmAt: numOrNull(p.nextWarmAt), lastWarmAt: numOrNull(p.lastWarmAt) };
+}
+
+/**
+ * Extract the cache-warming controls from the surface spec by FIELD KIND. The
+ * surface has one toggle, one number, two stats (rate + retention, told apart by
+ * label), and one `custom` timer field. Returns empty defaults when the spec is
+ * absent.
+ */
+export function parseControls(spec: SurfaceSpec | null): ParsedControls {
+	if (spec === null) return EMPTY_CONTROLS;
+	let enabled = false;
+	let toggleActionId: string | null = null;
+	let intervalSeconds = 0;
+	let setIntervalActionId: string | null = null;
+	let lastPct: number | null = null;
+	let retentionPct: number | null = null;
+	let nextWarmAt: number | null = null;
+	let lastWarmAt: number | null = null;
+	let seenToggle = false;
+	let seenNumber = false;
+	let seenRateStat = false;
+	for (const field of spec.fields) {
+		if (field.kind === "toggle" && !seenToggle) {
+			enabled = field.value;
+			toggleActionId = field.action.actionId;
+			seenToggle = true;
+		} else if (field.kind === "number" && !seenNumber) {
+			intervalSeconds = field.value;
+			setIntervalActionId = field.action.actionId;
+			seenNumber = true;
+		} else if (field.kind === "stat") {
+			// Retention is told apart by its label; everything else is the cache rate
+			// (first one wins, so a stray later stat can't clobber it).
+			if (/retention/i.test(field.label)) {
+				retentionPct = parsePct(field.value);
+			} else if (!seenRateStat) {
+				lastPct = parsePct(field.value);
+				seenRateStat = true;
+			}
+		} else if (field.kind === "custom" && field.rendererId === TIMER_RENDERER_ID) {
+			const timer = parseTimer(field.payload);
+			nextWarmAt = timer.nextWarmAt;
+			lastWarmAt = timer.lastWarmAt;
+		}
+	}
+	return {
+		enabled,
+		toggleActionId,
+		intervalSeconds,
+		setIntervalActionId,
+		lastPct,
+		retentionPct,
+		nextWarmAt,
+		lastWarmAt,
+	};
+}
+
+// ── Interval ↔ minutes/seconds (seconds capped at 59) ─────────────────────────
+
+export interface MinSec {
+	readonly minutes: number;
+	readonly seconds: number;
+}
+
+export function clampSeconds(n: number): number {
+	if (!Number.isFinite(n)) return 0;
+	return Math.min(59, Math.max(0, Math.floor(n)));
+}
+
+export function clampMinutes(n: number): number {
+	if (!Number.isFinite(n)) return 0;
+	return Math.max(0, Math.floor(n));
+}
+
+export function toMinSec(totalSeconds: number): MinSec {
+	const total = Math.max(0, Math.floor(totalSeconds));
+	return { minutes: Math.floor(total / 60), seconds: total % 60 };
+}
+
+/** Combine a minutes + seconds pair (each clamped) into total seconds. */
+export function fromMinSec(minutes: number, seconds: number): number {
+	return clampMinutes(minutes) * 60 + clampSeconds(seconds);
+}
+
+// ── Status + formatting ───────────────────────────────────────────────────────
+
+export type WarmStatus = "success" | "warning" | "error";
+
+/** Cache-hit % → semantic status (green high, yellow mid, red low). */
+export function statusForPct(pct: number): WarmStatus {
+	if (pct >= 80) return "success";
+	if (pct >= 40) return "warning";
+	return "error";
+}
+
+/** A status → its DaisyUI text-colour class (full literal so Tailwind keeps it). */
+export function colorClass(status: WarmStatus): string {
+	switch (status) {
+		case "success":
+			return "text-success";
+		case "warning":
+			return "text-warning";
+		case "error":
+			return "text-error";
+	}
+}
+
+/** The status line for a warm, matching the manual-warm feedback phrasing. */
+export function formatWarmLabel(pct: number): string {
+	return `Warmed — ${Math.round(pct)}% cache hit`;
+}
+
+/** Seconds → a short countdown string (e.g. "3:05", "9s"). */
+export function formatCountdown(seconds: number): string {
+	const s = Math.max(0, Math.floor(seconds));
+	if (s < 60) return `${s}s`;
+	const m = Math.floor(s / 60);
+	const rem = s % 60;
+	return `${m}:${String(rem).padStart(2, "0")}`;
+}
+
+// ── Warming history reducer (keyed off the authoritative `lastWarmAt`) ─────────
+
+export interface WarmEntry {
+	readonly pct: number;
+	/** Authoritative epoch-ms of this warm (the surface's `lastWarmAt`). */
+	readonly at: number;
+}
+
+export interface WarmingViewState {
+	/** Warmings, MOST RECENT FIRST. */
+	readonly history: readonly WarmEntry[];
+	/** The last authoritative `lastWarmAt` recorded, for change-detection (de-dup key). */
+	readonly lastWarmAt: number | null;
+}
+
+const MAX_HISTORY = 50;
+
+export function initialWarmingState(): WarmingViewState {
+	return { history: [], lastWarmAt: null };
+}
+
+/**
+ * Fold the surface's authoritative `lastWarmAt` + current "last cache rate" into
+ * history. Records a new entry only when `lastWarmAt` CHANGED (a toggle/interval
+ * update re-pushes the same timestamp → no entry), de-duplicated on the timestamp
+ * (not the pct, so two warms with the same % both count). A null `lastWarmAt` is
+ * ignored; a null pct advances the de-dup key without adding an entry.
+ */
+export function observeWarm(
+	state: WarmingViewState,
+	lastWarmAt: number | null,
+	pct: number | null,
+): WarmingViewState {
+	if (lastWarmAt === null || lastWarmAt === state.lastWarmAt) return state;
+	if (pct === null) return { ...state, lastWarmAt };
+	const history = [{ pct, at: lastWarmAt }, ...state.history].slice(0, MAX_HISTORY);
+	return { history, lastWarmAt };
+}
+
+/**
+ * Seconds until the next automatic warm, AUTHORITATIVE: derived straight from the
+ * backend's `nextWarmAt` epoch-ms (never FE-anchored/guessed). `null` when nothing
+ * is scheduled (disabled, or a turn is generating so the timer is cancelled).
+ */
+export function secondsUntilNext(nextWarmAt: number | null, now: number): number | null {
+	if (nextWarmAt === null) return null;
+	return Math.max(0, Math.ceil((nextWarmAt - now) / 1000));
+}
diff --git a/src/features/cache-warming/ui/CacheWarmingView.svelte b/src/features/cache-warming/ui/CacheWarmingView.svelte
new file mode 100644
index 0000000..ced5e99
--- /dev/null
+++ b/src/features/cache-warming/ui/CacheWarmingView.svelte
@@ -0,0 +1,234 @@
+<script lang="ts">
+	import type { InvokeMessage, SurfaceSpec } from "@dispatch/ui-contract";
+	import { onMount, untrack } from "svelte";
+	import {
+		clampMinutes,
+		clampSeconds,
+		colorClass,
+		formatCountdown,
+		formatWarmLabel,
+		fromMinSec,
+		initialWarmingState,
+		observeWarm,
+		parseControls,
+		secondsUntilNext,
+		statusForPct,
+		toMinSec,
+		type WarmingViewState,
+		type WarmNow,
+	} from "../logic/view-model";
+
+	let {
+		spec,
+		canWarm,
+		onInvoke,
+		warmNow,
+	}: {
+		/** The cache-warming surface spec for the focused conversation, or null. */
+		spec: SurfaceSpec | null;
+		/** Whether a real conversation is focused (a draft has nothing to warm). */
+		canWarm: boolean;
+		onInvoke: (msg: InvokeMessage) => void;
+		warmNow: WarmNow;
+	} = $props();
+
+	const controls = $derived(parseControls(spec));
+
+	// View-model state (pure reducer) + the injected clock — owned here, not ambient.
+	let vm = $state<WarmingViewState>(initialWarmingState());
+	let now = $state(Date.now());
+	let warming = $state(false);
+	let errorText = $state<string | null>(null);
+	// Transient result of the most recent manual warm (immediate feedback; history
+	// itself is driven authoritatively by the surface's `lastWarmAt`).
+	let manualResult = $state<{ cachePct: number; expectedCacheRate: number } | null>(null);
+
+	// Local interval inputs, seeded from the surface and re-seeded only when the
+	// surface's interval differs from what's shown (so a stray update mid-edit
+	// doesn't clobber typing).
+	let minutes = $state(0);
+	let seconds = $state(0);
+
+	onMount(() => {
+		const id = setInterval(() => {
+			now = Date.now();
+		}, 1000);
+		return () => clearInterval(id);
+	});
+
+	// Fold each authoritative warm (new `lastWarmAt`) into history.
+	$effect(() => {
+		const at = controls.lastWarmAt;
+		const pct = controls.lastPct;
+		untrack(() => {
+			vm = observeWarm(vm, at, pct);
+		});
+	});
+
+	// Keep the min/sec inputs in sync with the surface's interval.
+	$effect(() => {
+		const target = controls.intervalSeconds;
+		untrack(() => {
+			if (fromMinSec(minutes, seconds) !== target) {
+				const ms = toMinSec(target);
+				minutes = ms.minutes;
+				seconds = ms.seconds;
+			}
+		});
+	});
+
+	const remaining = $derived(secondsUntilNext(controls.nextWarmAt, now));
+	const history = $derived(vm.history);
+	const latest = $derived(history[0] ?? null);
+	const earlier = $derived(history.slice(1));
+
+	function commitInterval() {
+		const actionId = controls.setIntervalActionId;
+		if (actionId === null || spec === null) return;
+		onInvoke({ type: "invoke", surfaceId: spec.id, actionId, payload: fromMinSec(minutes, seconds) });
+	}
+
+	function onMinutes(event: Event) {
+		const next = (event.target as HTMLInputElement).valueAsNumber;
+		if (Number.isNaN(next)) return; // empty input — ignore, don't clobber to 0
+		minutes = clampMinutes(next);
+		commitInterval();
+	}
+
+	function onSeconds(event: Event) {
+		const next = (event.target as HTMLInputElement).valueAsNumber;
+		if (Number.isNaN(next)) return; // empty input — ignore, don't clobber to 0
+		seconds = clampSeconds(next);
+		commitInterval();
+	}
+
+	function onToggle() {
+		const actionId = controls.toggleActionId;
+		if (actionId === null || spec === null) return;
+		// The toggle action FLIPS server-side; no payload.
+		onInvoke({ type: "invoke", surfaceId: spec.id, actionId });
+	}
+
+	async function handleWarm() {
+		if (warming) return;
+		warming = true;
+		errorText = null;
+		const result = await warmNow();
+		warming = false;
+		if (result === null) return;
+		if (result.ok) {
+			// Immediate feedback only — the authoritative surface `update` (new
+			// `lastWarmAt`) drives the history via `observeWarm`.
+			manualResult = { cachePct: result.cachePct, expectedCacheRate: result.expectedCacheRate };
+		} else {
+			manualResult = null;
+			errorText = result.error;
+		}
+	}
+</script>
+
+<div class="flex flex-col gap-3">
+	<!-- Enabled -->
+	<label class="flex items-center justify-between gap-2 text-sm">
+		<span>Enabled</span>
+		<input
+			type="checkbox"
+			class="toggle toggle-sm toggle-success"
+			checked={controls.enabled}
+			disabled={spec === null}
+			onchange={onToggle}
+		/>
+	</label>
+
+	<!-- Refresh interval: minutes + seconds (seconds capped at 59) -->
+	<div class="flex items-center justify-between gap-2 text-sm">
+		<span>Refresh interval</span>
+		<span class="flex items-center gap-1">
+			<input
+				type="number"
+				class="input input-bordered input-sm w-16"
+				min="0"
+				value={minutes}
+				disabled={spec === null}
+				onchange={onMinutes}
+				aria-label="Interval minutes"
+			/>
+			<span class="opacity-60">m</span>
+			<input
+				type="number"
+				class="input input-bordered input-sm w-16"
+				min="0"
+				max="59"
+				value={seconds}
+				disabled={spec === null}
+				onchange={onSeconds}
+				aria-label="Interval seconds"
+			/>
+			<span class="opacity-60">s</span>
+		</span>
+	</div>
+
+	<!-- Countdown to the next automatic warm (authoritative: driven by nextWarmAt) -->
+	{#if !controls.enabled}
+		<p class="text-xs opacity-50">Warming paused.</p>
+	{:else if remaining !== null}
+		<p class="text-xs opacity-70">Next warm in {formatCountdown(remaining)}</p>
+	{:else}
+		<p class="text-xs opacity-50">Next warm: waiting…</p>
+	{/if}
+
+	<!-- Cross-turn retention (the "is warming working?" health signal) -->
+	{#if controls.retentionPct !== null}
+		<p class="text-xs {colorClass(statusForPct(controls.retentionPct))}">
+			Cache retention: {controls.retentionPct}%
+		</p>
+	{/if}
+
+	<!-- Manual trigger -->
+	<button
+		type="button"
+		class="btn btn-sm btn-outline"
+		disabled={!canWarm || warming}
+		onclick={handleWarm}
+	>
+		{#if warming}
+			<span class="loading loading-spinner loading-xs"></span>
+			Warming…
+		{:else}
+			Warm now
+		{/if}
+	</button>
+
+	{#if !canWarm}
+		<p class="text-xs opacity-60">Open or start a conversation to control its cache warming.</p>
+	{:else if errorText}
+		<p class="text-xs text-error">{errorText}</p>
+	{:else if manualResult}
+		<!-- Headline the retention (cache health) over the raw hit %. -->
+		<p class="text-xs {colorClass(statusForPct(manualResult.expectedCacheRate))}">
+			Warmed — {manualResult.expectedCacheRate}% retained ({manualResult.cachePct}% of prompt cached)
+		</p>
+	{/if}
+
+	<!-- Warming history: collapse whose title is the most recent warm, coloured by
+	     hit %, with the earlier warmings inside. -->
+	{#if latest}
+		<div class="collapse collapse-arrow bg-base-200">
+			<input type="checkbox" aria-label="Toggle warming history" />
+			<div class="collapse-title min-h-0 py-2 font-normal text-sm {colorClass(statusForPct(latest.pct))}">
+				{formatWarmLabel(latest.pct)}
+			</div>
+			<div class="collapse-content flex flex-col gap-1 text-sm">
+				{#if earlier.length > 0}
+					{#each earlier as entry, i (i)}
+						<p class={colorClass(statusForPct(entry.pct))}>{formatWarmLabel(entry.pct)}</p>
+					{/each}
+				{:else}
+					<p class="text-xs opacity-60">No earlier warmings.</p>
+				{/if}
+			</div>
+		</div>
+	{:else}
+		<p class="text-xs opacity-60">No warming yet.</p>
+	{/if}
+</div>
diff --git a/src/features/chat/ui/ChatView.svelte b/src/features/chat/ui/ChatView.svelte
index 3d1421d..00691aa 100644
--- a/src/features/chat/ui/ChatView.svelte
+++ b/src/features/chat/ui/ChatView.svelte
@@ -3,10 +3,12 @@
 	import {
 		interleaveTurnMetrics,
 		viewCacheRate,
+		viewExpectedCache,
 		viewStepMetrics,
 		viewTurnMetrics,
 		type TurnMetricsEntry,
 	} from "../../../core/metrics";
+	import { Markdown } from "../../markdown";
 
 	const badgeClass = {
 		success: "badge-success",
@@ -113,7 +115,7 @@
 		<div class="chat chat-start [&>.chat-bubble]:max-w-5xl">
 			<div class="chat-bubble w-full bg-transparent">
 				{#if rendered.chunk.type === "text"}
-					<p>{rendered.chunk.text}</p>
+					<Markdown text={rendered.chunk.text} streaming={rendered.streaming ?? false} />
 				{:else if rendered.chunk.type === "error"}
 					<div class="text-error" role="alert">
 						{rendered.chunk.message}
@@ -146,6 +148,7 @@
 			{@const turnView = viewTurnMetrics(row.turn)}
 			{@const lastCache = viewCacheRate(row.turn.usage)}
 			{@const chatCache = viewCacheRate(row.cumulativeUsage)}
+			{@const retention = viewExpectedCache(row.turn.usage, row.prevTurnUsage)}
 			<div class="chat chat-start">
 				<div class="chat-bubble w-full max-w-5xl bg-transparent p-0">
 					<div class="flex flex-col gap-1 text-xs">
@@ -163,6 +166,12 @@
 								<span class="opacity-70">Chat Total:</span>
 								<span class="badge badge-sm {badgeClass[chatCache.level]}">{chatCache.pct}%</span>
 							</span>
+							{#if retention}
+								<span class="flex items-center gap-1">
+									<span class="opacity-70">Retention:</span>
+									<span class="badge badge-sm {badgeClass[retention.level]}">{retention.pct}%</span>
+								</span>
+							{/if}
 						</div>
 					</div>
 				</div>
diff --git a/src/features/markdown/index.ts b/src/features/markdown/index.ts
new file mode 100644
index 0000000..f5406b2
--- /dev/null
+++ b/src/features/markdown/index.ts
@@ -0,0 +1,8 @@
+export { renderMarkdown } from "./logic/markdown";
+export { default as Markdown } from "./ui/Markdown.svelte";
+
+/** Public module manifest — aggregated by the shell's "Loaded Modules" view. */
+export const manifest = {
+	name: "markdown",
+	description: "Renders assistant messages as sanitized Markdown (GFM + syntax highlighting)",
+} as const;
diff --git a/src/features/markdown/logic/markdown.test.ts b/src/features/markdown/logic/markdown.test.ts
new file mode 100644
index 0000000..7dbb878
--- /dev/null
+++ b/src/features/markdown/logic/markdown.test.ts
@@ -0,0 +1,58 @@
+import { describe, expect, it } from "vitest";
+import { renderMarkdown } from "./markdown";
+
+describe("renderMarkdown", () => {
+	it("renders GFM markdown (headings, emphasis)", () => {
+		const html = renderMarkdown("# Title\n\nSome **bold** text.");
+		expect(html).toContain("<h1");
+		expect(html).toContain("Title");
+		expect(html).toContain("<strong>bold</strong>");
+	});
+
+	it("highlights fenced code for a known language", () => {
+		const html = renderMarkdown("```javascript\nconst x = 1;\n```");
+		expect(html).toContain("language-javascript");
+		expect(html).toContain("hljs-keyword"); // `const` got highlighted
+	});
+
+	it("resolves language aliases (js -> javascript)", () => {
+		const html = renderMarkdown("```js\nconst x = 1;\n```");
+		expect(html).toContain("hljs-keyword");
+	});
+
+	it("escapes code for an unknown language without throwing", () => {
+		const html = renderMarkdown("```nope\n<b>x</b>\n```");
+		expect(html).toContain("&lt;b&gt;");
+	});
+
+	it("sanitizes dangerous HTML", () => {
+		const html = renderMarkdown("Hi <script>alert(1)</script> there");
+		expect(html).not.toContain("<script>");
+		expect(html).toContain("Hi");
+	});
+
+	it("balances dangling bold emphasis while streaming", () => {
+		expect(renderMarkdown("a **bold", { streaming: true })).toContain("<strong>bold</strong>");
+	});
+
+	it("does not balance delimiters when not streaming", () => {
+		expect(renderMarkdown("a **bold")).not.toContain("<strong>");
+	});
+
+	it("wraps fenced code blocks with a copy button", () => {
+		const html = renderMarkdown("```js\nconst x = 1;\n```");
+		expect(html).toContain("code-block");
+		expect(html).toContain("data-copy");
+		expect(html).toContain("<pre>");
+	});
+
+	it("does not add a copy button to inline code", () => {
+		const html = renderMarkdown("use `npm run dev` please");
+		expect(html).not.toContain("data-copy");
+		expect(html).toContain("<code>npm run dev</code>");
+	});
+
+	it("returns an empty string for empty input", () => {
+		expect(renderMarkdown("")).toBe("");
+	});
+});
diff --git a/src/features/markdown/logic/markdown.ts b/src/features/markdown/logic/markdown.ts
new file mode 100644
index 0000000..3a6e5a6
--- /dev/null
+++ b/src/features/markdown/logic/markdown.ts
@@ -0,0 +1,165 @@
+/**
+ * Pure Markdown → sanitized-HTML renderer for assistant messages.
+ *
+ * Mirrors old Dispatch's stack (marked + marked-highlight + highlight.js +
+ * DOMPurify; GFM + line breaks; streaming delimiter-closing), but kept fully
+ * SYNCHRONOUS and pure: `input → output`, no effects, no `$effect`. Languages
+ * are a fixed "hot set" registered at module load (no lazy dynamic import), so a
+ * single `renderMarkdown(text)` call is deterministic and unit-testable.
+ *
+ * The only ambient dependency is DOMPurify, which sanitizes against the DOM —
+ * present in the browser and in the jsdom test env.
+ */
+
+import DOMPurify from "dompurify";
+import type { LanguageFn } from "highlight.js";
+import hljs from "highlight.js/lib/core";
+import bash from "highlight.js/lib/languages/bash";
+import c from "highlight.js/lib/languages/c";
+import cpp from "highlight.js/lib/languages/cpp";
+import csharp from "highlight.js/lib/languages/csharp";
+import css from "highlight.js/lib/languages/css";
+import go from "highlight.js/lib/languages/go";
+import java from "highlight.js/lib/languages/java";
+import javascript from "highlight.js/lib/languages/javascript";
+import json from "highlight.js/lib/languages/json";
+import markdownLang from "highlight.js/lib/languages/markdown";
+import php from "highlight.js/lib/languages/php";
+import plaintext from "highlight.js/lib/languages/plaintext";
+import python from "highlight.js/lib/languages/python";
+import ruby from "highlight.js/lib/languages/ruby";
+import rust from "highlight.js/lib/languages/rust";
+import shell from "highlight.js/lib/languages/shell";
+import sql from "highlight.js/lib/languages/sql";
+import typescript from "highlight.js/lib/languages/typescript";
+import xml from "highlight.js/lib/languages/xml";
+import yaml from "highlight.js/lib/languages/yaml";
+import { Marked } from "marked";
+import { markedHighlight } from "marked-highlight";
+
+// Hot set: registered eagerly so common code blocks highlight on first paint.
+const HOT_LANGUAGES: Record<string, LanguageFn> = {
+	bash,
+	c,
+	cpp,
+	csharp,
+	css,
+	go,
+	java,
+	javascript,
+	json,
+	markdown: markdownLang,
+	php,
+	plaintext,
+	python,
+	ruby,
+	rust,
+	shell,
+	sql,
+	typescript,
+	xml,
+	yaml,
+};
+for (const [name, lang] of Object.entries(HOT_LANGUAGES)) {
+	hljs.registerLanguage(name, lang);
+}
+
+// Normalize common fence aliases to canonical highlight.js names.
+const ALIASES: Record<string, string> = {
+	js: "javascript",
+	jsx: "javascript",
+	mjs: "javascript",
+	cjs: "javascript",
+	ts: "typescript",
+	tsx: "typescript",
+	py: "python",
+	py3: "python",
+	rb: "ruby",
+	sh: "bash",
+	zsh: "bash",
+	yml: "yaml",
+	"c++": "cpp",
+	cxx: "cpp",
+	"c#": "csharp",
+	cs: "csharp",
+	htm: "xml",
+	html: "xml",
+	svg: "xml",
+	md: "markdown",
+	mdx: "markdown",
+	golang: "go",
+	rs: "rust",
+};
+
+function normalizeLang(lang: string): string {
+	const lower = lang.toLowerCase().trim();
+	return ALIASES[lower] ?? lower;
+}
+
+function escapeHtml(s: string): string {
+	return s
+		.replace(/&/g, "&amp;")
+		.replace(/</g, "&lt;")
+		.replace(/>/g, "&gt;")
+		.replace(/"/g, "&quot;")
+		.replace(/'/g, "&#39;");
+}
+
+const md = new Marked(
+	markedHighlight({
+		emptyLangClass: "hljs",
+		langPrefix: "hljs language-",
+		highlight(code: string, lang: string): string {
+			if (!lang) return escapeHtml(code);
+			const name = normalizeLang(lang);
+			if (!hljs.getLanguage(name)) return escapeHtml(code);
+			try {
+				return hljs.highlight(code, { language: name, ignoreIllegals: true }).value;
+			} catch {
+				return escapeHtml(code);
+			}
+		},
+	}),
+	{ gfm: true, breaks: true },
+);
+
+/**
+ * While a message is still streaming, balance dangling fences / emphasis so the
+ * partial text renders cleanly instead of flashing raw markers.
+ */
+function closeOpenDelimiters(src: string): string {
+	let out = src;
+	const fenceCount = (out.match(/^```/gm) ?? []).length;
+	if (fenceCount % 2 !== 0) out += "\n```";
+	const boldCount = (out.match(/\*\*/g) ?? []).length;
+	if (boldCount % 2 !== 0) out += "**";
+	const inlineCode = (out.match(/(?<!`)`(?!`)/g) ?? []).length;
+	if (inlineCode % 2 !== 0) out += "`";
+	return out;
+}
+
+// Wrap each fenced code block (`<pre>…</pre>`) in a positioned container with a
+// copy button. marked emits exactly one `<pre>`/`</pre>` pair per block and
+// escapes `<`/`>` inside code, so these literal tags only ever delimit blocks.
+// `data-copy` is the delegation hook the component listens for; DOMPurify keeps
+// `<button>` + `data-*` by default. Inline `<code>` has no `<pre>`, so it's untouched.
+const COPY_BUTTON =
+	'<button type="button" data-copy aria-label="Copy code"' +
+	' class="copy-btn btn btn-xs absolute right-2 top-2 opacity-0 transition-opacity group-hover:opacity-100">Copy</button>';
+
+function addCopyButtons(html: string): string {
+	return html
+		.replace(/<pre>/g, `<div class="code-block group relative">${COPY_BUTTON}<pre>`)
+		.replace(/<\/pre>/g, "</pre></div>");
+}
+
+/** Render Markdown to sanitized HTML. Returns `""` if parsing ever throws. */
+export function renderMarkdown(text: string, opts?: { streaming?: boolean }): string {
+	const src = opts?.streaming === true ? closeOpenDelimiters(text) : text;
+	try {
+		const raw = md.parse(src) as string;
+		return DOMPurify.sanitize(addCopyButtons(raw));
+	} catch {
+		return "";
+	}
+}
diff --git a/src/features/markdown/ui/Markdown.svelte b/src/features/markdown/ui/Markdown.svelte
new file mode 100644
index 0000000..b828ab9
--- /dev/null
+++ b/src/features/markdown/ui/Markdown.svelte
@@ -0,0 +1,58 @@
+<script lang="ts">
+	import { renderMarkdown } from "../logic/markdown";
+
+	let {
+		text,
+		streaming = false,
+	}: {
+		text: string;
+		/** Balance dangling delimiters while the message is still generating. */
+		streaming?: boolean;
+	} = $props();
+
+	// Pure transform; the HTML is already DOMPurify-sanitized in renderMarkdown.
+	const html = $derived(renderMarkdown(text, { streaming }));
+
+	let container: HTMLElement;
+
+	// One delegated listener on the stable container handles every code block's
+	// copy button — including blocks re-created when `html` changes (streaming),
+	// since the listener lives on the container, not the buttons. Clipboard is the
+	// edge effect; absent (insecure context) → no-op.
+	$effect(() => {
+		const el = container;
+		if (el === undefined) return;
+
+		const onClick = (event: Event): void => {
+			const target = event.target;
+			if (!(target instanceof Element)) return;
+			const button = target.closest<HTMLButtonElement>("[data-copy]");
+			if (button === null) return;
+
+			const code = button.closest(".code-block")?.querySelector("code")?.textContent ?? "";
+			const clipboard = navigator.clipboard;
+			if (clipboard === undefined) return;
+
+			void clipboard
+				.writeText(code)
+				.then(() => {
+					const prev = button.textContent;
+					button.textContent = "Copied";
+					setTimeout(() => {
+						button.textContent = prev;
+					}, 1200);
+				})
+				.catch(() => {
+					// Clipboard denied — leave the button as-is.
+				});
+		};
+
+		el.addEventListener("click", onClick);
+		return () => el.removeEventListener("click", onClick);
+	});
+</script>
+
+<div class="markdown-body" bind:this={container}>
+	<!-- {@html} is safe here: `html` is DOMPurify-sanitized inside renderMarkdown. -->
+	{@html html}
+</div>
diff --git a/src/features/markdown/ui/markdown.test.ts b/src/features/markdown/ui/markdown.test.ts
new file mode 100644
index 0000000..e34a4af
--- /dev/null
+++ b/src/features/markdown/ui/markdown.test.ts
@@ -0,0 +1,40 @@
+import { fireEvent, render, screen } from "@testing-library/svelte";
+import { describe, expect, it, vi } from "vitest";
+import Markdown from "./Markdown.svelte";
+
+describe("Markdown", () => {
+	it("renders markdown into a .markdown-body container", () => {
+		const { container } = render(Markdown, { props: { text: "# Hello\n\n**hi**" } });
+
+		expect(container.querySelector(".markdown-body")).not.toBeNull();
+		expect(screen.getByRole("heading", { level: 1, name: "Hello" })).toBeInTheDocument();
+		expect(container.querySelector("strong")?.textContent).toBe("hi");
+	});
+
+	it("strips dangerous markup", () => {
+		const { container } = render(Markdown, {
+			props: { text: "before <script>alert(1)</script> after" },
+		});
+
+		expect(container.querySelector("script")).toBeNull();
+		expect(container.textContent).toContain("before");
+	});
+
+	it("renders a copy button on a code block that copies the code to the clipboard", async () => {
+		const writeText = vi.fn().mockResolvedValue(undefined);
+		Object.defineProperty(navigator, "clipboard", { value: { writeText }, configurable: true });
+
+		const { container } = render(Markdown, {
+			props: { text: "```js\nconst x = 1;\n```" },
+		});
+
+		const button = container.querySelector<HTMLElement>("[data-copy]");
+		expect(button).not.toBeNull();
+		if (button === null) throw new Error("expected a copy button");
+
+		await fireEvent.click(button);
+
+		expect(writeText).toHaveBeenCalledTimes(1);
+		expect(writeText.mock.calls[0]?.[0]).toContain("const x = 1;");
+	});
+});
diff --git a/src/features/surface-host/logic/plan.test.ts b/src/features/surface-host/logic/plan.test.ts
index a5727b4..be296a7 100644
--- a/src/features/surface-host/logic/plan.test.ts
+++ b/src/features/surface-host/logic/plan.test.ts
@@ -57,6 +57,47 @@ describe("planSurface", () => {
 		expect(plan.fields).toEqual([{ kind: "stat", label: "Tokens", value: "1,234" }]);
 	});
 
+	it("maps a number field to a NumberFieldView, carrying optional hints", () => {
+		const plan = planSurface(
+			makeSpec({
+				kind: "number",
+				label: "Interval",
+				value: 240,
+				min: 1,
+				step: 1,
+				unit: "s",
+				action: { actionId: "cache-warming/set-interval" },
+			}),
+		);
+		expect(plan.fields).toEqual([
+			{
+				kind: "number",
+				label: "Interval",
+				value: 240,
+				min: 1,
+				step: 1,
+				unit: "s",
+				action: { actionId: "cache-warming/set-interval" },
+			},
+		]);
+	});
+
+	it("omits absent number hints (no max key when undefined)", () => {
+		const plan = planSurface(
+			makeSpec({
+				kind: "number",
+				label: "Interval",
+				value: 240,
+				min: 1,
+				action: { actionId: "set" },
+			}),
+		);
+		const field = plan.fields[0];
+		expect(field).not.toHaveProperty("max");
+		expect(field).not.toHaveProperty("step");
+		expect(field).not.toHaveProperty("unit");
+	});
+
 	it("maps a button field to a ButtonFieldView", () => {
 		const plan = planSurface(
 			makeSpec({ kind: "button", label: "Retry", action: { actionId: "retry" } }),
diff --git a/src/features/surface-host/logic/plan.ts b/src/features/surface-host/logic/plan.ts
index 769f9f9..89088c3 100644
--- a/src/features/surface-host/logic/plan.ts
+++ b/src/features/surface-host/logic/plan.ts
@@ -1,7 +1,21 @@
 import type { InvokeMessage, SurfaceSpec } from "@dispatch/ui-contract";
-import type { FieldView, RenderGroup, StatFieldView, SurfaceRenderPlan } from "./types";
+import type {
+	FieldView,
+	NumberFieldView,
+	RenderGroup,
+	StatFieldView,
+	SurfaceRenderPlan,
+} from "./types";
 
-const KNOWN_KINDS = new Set(["toggle", "progress", "selector", "stat", "button", "custom"]);
+const KNOWN_KINDS = new Set([
+	"toggle",
+	"progress",
+	"selector",
+	"stat",
+	"number",
+	"button",
+	"custom",
+]);
 
 /**
  * Validate and normalise a SurfaceSpec into a renderable plan.
@@ -46,6 +60,21 @@ export function planSurface(spec: SurfaceSpec): SurfaceRenderPlan {
 					value: field.value,
 				});
 				break;
+			case "number": {
+				// Carry optional hints only when present (exactOptionalPropertyTypes).
+				const view: NumberFieldView = {
+					kind: "number",
+					label: field.label,
+					value: field.value,
+					action: field.action,
+					...(field.min !== undefined ? { min: field.min } : {}),
+					...(field.max !== undefined ? { max: field.max } : {}),
+					...(field.step !== undefined ? { step: field.step } : {}),
+					...(field.unit !== undefined ? { unit: field.unit } : {}),
+				};
+				fields.push(view);
+				break;
+			}
 			case "button":
 				fields.push({
 					kind: "button",
diff --git a/src/features/surface-host/logic/types.ts b/src/features/surface-host/logic/types.ts
index d1888a2..23f8757 100644
--- a/src/features/surface-host/logic/types.ts
+++ b/src/features/surface-host/logic/types.ts
@@ -31,6 +31,22 @@ export interface StatFieldView {
 	readonly value: string;
 }
 
+/**
+ * Normalised view-model for a number field — the free-value counterpart to
+ * selector. `min`/`max`/`step`/`unit` are optional semantic hints (absent when
+ * the spec omits them). The renderer posts the new number as the action payload.
+ */
+export interface NumberFieldView {
+	readonly kind: "number";
+	readonly label: string;
+	readonly value: number;
+	readonly min?: number;
+	readonly max?: number;
+	readonly step?: number;
+	readonly unit?: string;
+	readonly action: ActionRef;
+}
+
 /** Normalised view-model for a button field. */
 export interface ButtonFieldView {
 	readonly kind: "button";
@@ -55,6 +71,7 @@ export type FieldView =
 	| ProgressFieldView
 	| SelectorFieldView
 	| StatFieldView
+	| NumberFieldView
 	| ButtonFieldView
 	| CustomFieldView;
 
diff --git a/src/features/surface-host/ui/Number.svelte b/src/features/surface-host/ui/Number.svelte
new file mode 100644
index 0000000..0f3323d
--- /dev/null
+++ b/src/features/surface-host/ui/Number.svelte
@@ -0,0 +1,43 @@
+<script lang="ts">
+	import type { InvokeMessage } from "@dispatch/ui-contract";
+	import type { NumberFieldView } from "../logic/types";
+
+	let {
+		field,
+		surfaceId,
+		onInvoke,
+	}: { field: NumberFieldView; surfaceId: string; onInvoke: (msg: InvokeMessage) => void } =
+		$props();
+
+	// Commit on change/Enter rather than every keystroke. Ignore empty/non-numeric
+	// input (the backend also floors/validates); send the new number as payload.
+	function commit(event: Event) {
+		const target = event.target as HTMLInputElement;
+		const next = target.valueAsNumber;
+		if (Number.isNaN(next)) return;
+		onInvoke({
+			type: "invoke",
+			surfaceId,
+			actionId: field.action.actionId,
+			payload: next,
+		});
+	}
+</script>
+
+<label class="flex items-center justify-between gap-2 text-sm">
+	<span>{field.label}</span>
+	<span class="flex items-center gap-1">
+		<input
+			type="number"
+			class="input input-bordered input-sm w-24"
+			value={field.value}
+			min={field.min}
+			max={field.max}
+			step={field.step}
+			onchange={commit}
+		/>
+		{#if field.unit}
+			<span class="opacity-60">{field.unit}</span>
+		{/if}
+	</span>
+</label>
diff --git a/src/features/surface-host/ui/SurfaceView.svelte b/src/features/surface-host/ui/SurfaceView.svelte
index 5210e8c..24be8b8 100644
--- a/src/features/surface-host/ui/SurfaceView.svelte
+++ b/src/features/surface-host/ui/SurfaceView.svelte
@@ -2,6 +2,7 @@
 	import type { InvokeMessage, SurfaceSpec } from "@dispatch/ui-contract";
 	import { groupRenderFields, planSurface } from "../logic/plan";
 	import Button from "./Button.svelte";
+	import Number from "./Number.svelte";
 	import Progress from "./Progress.svelte";
 	import Selector from "./Selector.svelte";
 	import StatTable from "./StatTable.svelte";
@@ -30,6 +31,8 @@
 			<Progress field={group.field} />
 		{:else if group.field.kind === "selector"}
 			<Selector field={group.field} surfaceId={spec.id} {onInvoke} />
+		{:else if group.field.kind === "number"}
+			<Number field={group.field} surfaceId={spec.id} {onInvoke} />
 		{:else if group.field.kind === "button"}
 			<Button field={group.field} surfaceId={spec.id} {onInvoke} />
 		{:else if group.field.kind === "custom"}
-- 
cgit v1.2.3