2 files changed, 270 insertions, 2 deletions
diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts
index b9b4510..95fb558 100644
--- a/packages/api/tests/agent-manager.test.ts
+++ b/packages/api/tests/agent-manager.test.ts
@@ -91,6 +91,19 @@ function setFakeSetting(key: string, value: string): void {
 	fakeSettings.set(key, value);
 }
 
+// Capture every appendChunks(tabId, drafts) call so tests can assert what got
+// persisted (e.g. usage side-channel rows). The real explodeTurn is mocked to
+// return [], so content drafts are empty here; usage rows are pushed directly
+// by processMessage's flushAssistant, making them the visible drafts.
+interface AppendChunksCall {
+	tabId: string;
+	drafts: Array<{ turnId: string; step: number; role: string; type: string; data: unknown }>;
+}
+const appendChunksCalls: AppendChunksCall[] = [];
+function resetAppendChunksCalls(): void {
+	appendChunksCalls.length = 0;
+}
+
 // Allow tests to swap in a custom `run` generator (e.g. to simulate
 // a fallback failure mid-stream). Returning to undefined restores
 // the default.
@@ -345,7 +358,8 @@ vi.mock("@dispatch/core", () => ({
 	getSetting(key: string) {
 		return fakeSettings.get(key) ?? null;
 	},
-	appendChunks() {
+	appendChunks(tabId: string, drafts: AppendChunksCall["drafts"]) {
+		appendChunksCalls.push({ tabId, drafts: [...drafts] });
 		return [];
 	},
 	explodeUserText() {
@@ -406,6 +420,7 @@ describe("AgentManager", () => {
 		resetFakeSettings();
 		setRunImpl(null);
 		appendEventToChunksSpy.mockClear();
+		resetAppendChunksCalls();
 	});
 
 	it("initial status is idle", () => {
@@ -1331,4 +1346,177 @@ describe("AgentManager", () => {
 			expect(tools).not.toContain("read_tab");
 		});
 	});
+
+	// ─── Usage side-channel persistence ──────────────────────────────
+	//
+	// `usage` AgentEvents (one per LLM round-trip) are persisted as invisible
+	// `type:"usage"` chunk rows so per-tab token/cache telemetry survives a
+	// reload. They ride the SAME atomic appendChunks call as the turn's content
+	// rows (one fsync, contiguous seqs). A superseded fallback attempt's usage is
+	// discarded with its `chunks` (per-attempt accumulator).
+	describe("usage persistence", () => {
+		it("writes one usage row per usage event emitted during a turn", async () => {
+			const manager = new AgentManager();
+			setRunImpl(async function* () {
+				yield { type: "status", status: "running" } as const;
+				yield {
+					type: "usage",
+					usage: { inputTokens: 1000, outputTokens: 40, cacheReadTokens: 0, cacheWriteTokens: 900 },
+				} as const;
+				yield { type: "text-delta", delta: "step two" } as const;
+				yield {
+					type: "usage",
+					usage: {
+						inputTokens: 1200,
+						outputTokens: 60,
+						cacheReadTokens: 1000,
+						cacheWriteTokens: 100,
+					},
+				} as const;
+				yield {
+					type: "done",
+					message: { role: "assistant", chunks: [{ type: "text", text: "step two" }] },
+				} as const;
+				yield { type: "status", status: "idle" } as const;
+			});
+
+			await manager.processMessage("tab-usage-rows", "go");
+
+			const usageDrafts = appendChunksCalls
+				.flatMap((c) => c.drafts)
+				.filter((d) => d.type === "usage");
+			expect(usageDrafts).toHaveLength(2);
+			// One row per event, role=assistant, step cosmetic (0).
+			expect(usageDrafts.every((d) => d.role === "assistant" && d.step === 0)).toBe(true);
+			expect(usageDrafts[0]?.data).toEqual({
+				inputTokens: 1000,
+				outputTokens: 40,
+				cacheReadTokens: 0,
+				cacheWriteTokens: 900,
+			});
+			expect(usageDrafts[1]?.data).toEqual({
+				inputTokens: 1200,
+				outputTokens: 60,
+				cacheReadTokens: 1000,
+				cacheWriteTokens: 100,
+			});
+		});
+
+		it("emits usage rows in the SAME appendChunks call as the turn's content (one atomic write)", async () => {
+			const manager = new AgentManager();
+			setRunImpl(async function* () {
+				yield { type: "status", status: "running" } as const;
+				yield { type: "text-delta", delta: "hi" } as const;
+				yield {
+					type: "usage",
+					usage: { inputTokens: 5, outputTokens: 1, cacheReadTokens: 2, cacheWriteTokens: 3 },
+				} as const;
+				yield {
+					type: "done",
+					message: { role: "assistant", chunks: [{ type: "text", text: "hi" }] },
+				} as const;
+				yield { type: "status", status: "idle" } as const;
+			});
+
+			await manager.processMessage("tab-usage-atomic", "go");
+
+			// Exactly one appendChunks call carries the usage draft (the flush). The
+			// user-message append and any system-row appends carry no usage rows.
+			const callsWithUsage = appendChunksCalls.filter((c) =>
+				c.drafts.some((d) => d.type === "usage"),
+			);
+			expect(callsWithUsage).toHaveLength(1);
+			expect(callsWithUsage[0]?.tabId).toBe("tab-usage-atomic");
+		});
+
+		it("discards a superseded (rate-limited) attempt's usage on fallback", async () => {
+			const manager = new AgentManager();
+			// Inject a minimal model registry so the rate-limit fallback path is
+			// taken (real `processMessage` requires modelRegistry + a resolved
+			// keyId + a next fallback entry to retry).
+			const markKeyExhausted = vi.fn();
+			(
+				manager as unknown as {
+					modelRegistry: {
+						getKeys(): Array<{ definition: Record<string, unknown> }>;
+						markKeyExhausted(): void;
+					};
+				}
+			).modelRegistry = {
+				getKeys: () => [
+					{
+						definition: {
+							id: "k1",
+							provider: "openai-compatible",
+							env: "ENV1",
+							base_url: "http://x",
+						},
+					},
+					{
+						definition: {
+							id: "k2",
+							provider: "openai-compatible",
+							env: "ENV2",
+							base_url: "http://y",
+						},
+					},
+				],
+				markKeyExhausted,
+			};
+
+			let attempt = 0;
+			setRunImpl(async function* () {
+				attempt++;
+				yield { type: "status", status: "running" } as const;
+				if (attempt === 1) {
+					// Attempt 1 emits usage then rate-limits — its usage must be dropped.
+					yield {
+						type: "usage",
+						usage: { inputTokens: 999, outputTokens: 9, cacheReadTokens: 0, cacheWriteTokens: 0 },
+					} as const;
+					yield { type: "error", error: "rate limit exceeded (status=429)" } as const;
+					return;
+				}
+				// Attempt 2 succeeds — only its usage should persist.
+				yield {
+					type: "usage",
+					usage: { inputTokens: 222, outputTokens: 22, cacheReadTokens: 100, cacheWriteTokens: 5 },
+				} as const;
+				yield {
+					type: "done",
+					message: { role: "assistant", chunks: [{ type: "text", text: "recovered" }] },
+				} as const;
+				yield { type: "status", status: "idle" } as const;
+			});
+
+			const agentModels = [
+				{ key_id: "k1", model_id: "m1" },
+				{ key_id: "k2", model_id: "m2" },
+			];
+			await manager.processMessage(
+				"tab-usage-fallback",
+				"go",
+				undefined,
+				undefined,
+				undefined,
+				undefined,
+				agentModels,
+			);
+
+			expect(attempt).toBe(2); // confirm the fallback retry actually happened
+			expect(markKeyExhausted).toHaveBeenCalled();
+
+			const usageDrafts = appendChunksCalls
+				.flatMap((c) => c.drafts)
+				.filter((d) => d.type === "usage");
+			// Only attempt 2's usage survives.
+			expect(usageDrafts).toHaveLength(1);
+			expect(usageDrafts[0]?.data).toEqual({
+				inputTokens: 222,
+				outputTokens: 22,
+				cacheReadTokens: 100,
+				cacheWriteTokens: 5,
+			});
+		});
+	});
 });
diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts
index c768cee..ad6d5b1 100644
--- a/packages/api/tests/routes.test.ts
+++ b/packages/api/tests/routes.test.ts
@@ -1,6 +1,24 @@
 import type { ToolDefinition } from "@dispatch/core";
 import { describe, expect, it, vi } from "vitest";
 
+// Seedable backing stores for the tabs route (GET /tabs enrichment). Declared
+// before vi.mock so the hoisted factory closure can reference them; populated
+// per-test.
+interface FakeOpenTab {
+	id: string;
+	title: string;
+	keyId: string | null;
+	modelId: string | null;
+	parentTabId: string | null;
+	status: string;
+	isOpen: boolean;
+	position: number;
+	createdAt: number;
+	updatedAt: number;
+}
+const fakeOpenTabs: FakeOpenTab[] = [];
+const fakeUsageStats = new Map<string, unknown>();
+
 // Mock @dispatch/core's Agent to avoid real LLM calls
 vi.mock("@dispatch/core", () => ({
 	Agent: class MockAgent {
@@ -170,7 +188,7 @@ vi.mock("@dispatch/core", () => ({
 		return null;
 	},
 	listOpenTabs() {
-		return [];
+		return [...fakeOpenTabs];
 	},
 	resolveTabPrefix() {
 		return { status: "none" };
@@ -230,6 +248,9 @@ vi.mock("@dispatch/core", () => ({
 	getTotalChunkCount() {
 		return 0;
 	},
+	getUsageStatsForTab(tabId: string) {
+		return fakeUsageStats.get(tabId) ?? null;
+	},
 	appendEventToChunks(_chunks: unknown[], _event: unknown) {
 		// no-op stub
 	},
@@ -413,6 +434,65 @@ describe("POST /chat", () => {
 	});
 });
 
+describe("GET /tabs", () => {
+	it("enriches each open tab with its persisted usageStats aggregate", async () => {
+		fakeOpenTabs.length = 0;
+		fakeUsageStats.clear();
+		fakeOpenTabs.push({
+			id: "tab-u",
+			title: "Has usage",
+			keyId: null,
+			modelId: null,
+			parentTabId: null,
+			status: "idle",
+			isOpen: true,
+			position: 0,
+			createdAt: 0,
+			updatedAt: 0,
+		});
+		fakeOpenTabs.push({
+			id: "tab-none",
+			title: "No usage",
+			keyId: null,
+			modelId: null,
+			parentTabId: null,
+			status: "idle",
+			isOpen: true,
+			position: 1,
+			createdAt: 0,
+			updatedAt: 0,
+		});
+		fakeUsageStats.set("tab-u", {
+			inputTokens: 2200,
+			outputTokens: 100,
+			cacheReadTokens: 1000,
+			cacheWriteTokens: 1000,
+			requests: 2,
+			last: { inputTokens: 1200, outputTokens: 60, cacheReadTokens: 1000, cacheWriteTokens: 100 },
+		});
+
+		const res = await app.request("/tabs");
+		expect(res.status).toBe(200);
+		const body = await res.json();
+		expect(Array.isArray(body.tabs)).toBe(true);
+		const tabU = body.tabs.find((t: { id: string }) => t.id === "tab-u");
+		const tabNone = body.tabs.find((t: { id: string }) => t.id === "tab-none");
+		expect(tabU.usageStats).toEqual({
+			inputTokens: 2200,
+			outputTokens: 100,
+			cacheReadTokens: 1000,
+			cacheWriteTokens: 1000,
+			requests: 2,
+			last: { inputTokens: 1200, outputTokens: 60, cacheReadTokens: 1000, cacheWriteTokens: 100 },
+		});
+		// A tab with no usage rows surfaces null (not undefined/missing).
+		expect(tabNone.usageStats).toBeNull();
+
+		fakeOpenTabs.length = 0;
+		fakeUsageStats.clear();
+	});
+});
+
 describe("GET /tabs/:id/chunks", () => {
 	it("returns the raw chunk window shape { chunks, total, oldestSeq }", async () => {
 		const res = await app.request("/tabs/tab-x/chunks?limit=50");