diff options
Diffstat (limited to 'packages/api/tests')
| -rw-r--r-- | packages/api/tests/agent-manager.test.ts | 190 | ||||
| -rw-r--r-- | packages/api/tests/routes.test.ts | 82 |
2 files changed, 270 insertions, 2 deletions
diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index b9b4510..95fb558 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -91,6 +91,19 @@ function setFakeSetting(key: string, value: string): void { fakeSettings.set(key, value); } +// Capture every appendChunks(tabId, drafts) call so tests can assert what got +// persisted (e.g. usage side-channel rows). The real explodeTurn is mocked to +// return [], so content drafts are empty here; usage rows are pushed directly +// by processMessage's flushAssistant, making them the visible drafts. +interface AppendChunksCall { + tabId: string; + drafts: Array<{ turnId: string; step: number; role: string; type: string; data: unknown }>; +} +const appendChunksCalls: AppendChunksCall[] = []; +function resetAppendChunksCalls(): void { + appendChunksCalls.length = 0; +} + // Allow tests to swap in a custom `run` generator (e.g. to simulate // a fallback failure mid-stream). Returning to undefined restores // the default. @@ -345,7 +358,8 @@ vi.mock("@dispatch/core", () => ({ getSetting(key: string) { return fakeSettings.get(key) ?? null; }, - appendChunks() { + appendChunks(tabId: string, drafts: AppendChunksCall["drafts"]) { + appendChunksCalls.push({ tabId, drafts: [...drafts] }); return []; }, explodeUserText() { @@ -406,6 +420,7 @@ describe("AgentManager", () => { resetFakeSettings(); setRunImpl(null); appendEventToChunksSpy.mockClear(); + resetAppendChunksCalls(); }); it("initial status is idle", () => { @@ -1331,4 +1346,177 @@ describe("AgentManager", () => { expect(tools).not.toContain("read_tab"); }); }); + + // ─── Usage side-channel persistence ────────────────────────────── + // + // `usage` AgentEvents (one per LLM round-trip) are persisted as invisible + // `type:"usage"` chunk rows so per-tab token/cache telemetry survives a + // reload. They ride the SAME atomic appendChunks call as the turn's content + // rows (one fsync, contiguous seqs). A superseded fallback attempt's usage is + // discarded with its `chunks` (per-attempt accumulator). + describe("usage persistence", () => { + it("writes one usage row per usage event emitted during a turn", async () => { + const manager = new AgentManager(); + setRunImpl(async function* () { + yield { type: "status", status: "running" } as const; + yield { + type: "usage", + usage: { inputTokens: 1000, outputTokens: 40, cacheReadTokens: 0, cacheWriteTokens: 900 }, + } as const; + yield { type: "text-delta", delta: "step two" } as const; + yield { + type: "usage", + usage: { + inputTokens: 1200, + outputTokens: 60, + cacheReadTokens: 1000, + cacheWriteTokens: 100, + }, + } as const; + yield { + type: "done", + message: { role: "assistant", chunks: [{ type: "text", text: "step two" }] }, + } as const; + yield { type: "status", status: "idle" } as const; + }); + + await manager.processMessage("tab-usage-rows", "go"); + + const usageDrafts = appendChunksCalls + .flatMap((c) => c.drafts) + .filter((d) => d.type === "usage"); + expect(usageDrafts).toHaveLength(2); + // One row per event, role=assistant, step cosmetic (0). + expect(usageDrafts.every((d) => d.role === "assistant" && d.step === 0)).toBe(true); + expect(usageDrafts[0]?.data).toEqual({ + inputTokens: 1000, + outputTokens: 40, + cacheReadTokens: 0, + cacheWriteTokens: 900, + }); + expect(usageDrafts[1]?.data).toEqual({ + inputTokens: 1200, + outputTokens: 60, + cacheReadTokens: 1000, + cacheWriteTokens: 100, + }); + }); + + it("emits usage rows in the SAME appendChunks call as the turn's content (one atomic write)", async () => { + const manager = new AgentManager(); + setRunImpl(async function* () { + yield { type: "status", status: "running" } as const; + yield { type: "text-delta", delta: "hi" } as const; + yield { + type: "usage", + usage: { inputTokens: 5, outputTokens: 1, cacheReadTokens: 2, cacheWriteTokens: 3 }, + } as const; + yield { + type: "done", + message: { role: "assistant", chunks: [{ type: "text", text: "hi" }] }, + } as const; + yield { type: "status", status: "idle" } as const; + }); + + await manager.processMessage("tab-usage-atomic", "go"); + + // Exactly one appendChunks call carries the usage draft (the flush). The + // user-message append and any system-row appends carry no usage rows. + const callsWithUsage = appendChunksCalls.filter((c) => + c.drafts.some((d) => d.type === "usage"), + ); + expect(callsWithUsage).toHaveLength(1); + expect(callsWithUsage[0]?.tabId).toBe("tab-usage-atomic"); + }); + + it("discards a superseded (rate-limited) attempt's usage on fallback", async () => { + const manager = new AgentManager(); + // Inject a minimal model registry so the rate-limit fallback path is + // taken (real `processMessage` requires modelRegistry + a resolved + // keyId + a next fallback entry to retry). + const markKeyExhausted = vi.fn(); + ( + manager as unknown as { + modelRegistry: { + getKeys(): Array<{ definition: Record<string, unknown> }>; + markKeyExhausted(): void; + }; + } + ).modelRegistry = { + getKeys: () => [ + { + definition: { + id: "k1", + provider: "openai-compatible", + env: "ENV1", + base_url: "http://x", + }, + }, + { + definition: { + id: "k2", + provider: "openai-compatible", + env: "ENV2", + base_url: "http://y", + }, + }, + ], + markKeyExhausted, + }; + + let attempt = 0; + setRunImpl(async function* () { + attempt++; + yield { type: "status", status: "running" } as const; + if (attempt === 1) { + // Attempt 1 emits usage then rate-limits — its usage must be dropped. + yield { + type: "usage", + usage: { inputTokens: 999, outputTokens: 9, cacheReadTokens: 0, cacheWriteTokens: 0 }, + } as const; + yield { type: "error", error: "rate limit exceeded (status=429)" } as const; + return; + } + // Attempt 2 succeeds — only its usage should persist. + yield { + type: "usage", + usage: { inputTokens: 222, outputTokens: 22, cacheReadTokens: 100, cacheWriteTokens: 5 }, + } as const; + yield { + type: "done", + message: { role: "assistant", chunks: [{ type: "text", text: "recovered" }] }, + } as const; + yield { type: "status", status: "idle" } as const; + }); + + const agentModels = [ + { key_id: "k1", model_id: "m1" }, + { key_id: "k2", model_id: "m2" }, + ]; + await manager.processMessage( + "tab-usage-fallback", + "go", + undefined, + undefined, + undefined, + undefined, + agentModels, + ); + + expect(attempt).toBe(2); // confirm the fallback retry actually happened + expect(markKeyExhausted).toHaveBeenCalled(); + + const usageDrafts = appendChunksCalls + .flatMap((c) => c.drafts) + .filter((d) => d.type === "usage"); + // Only attempt 2's usage survives. + expect(usageDrafts).toHaveLength(1); + expect(usageDrafts[0]?.data).toEqual({ + inputTokens: 222, + outputTokens: 22, + cacheReadTokens: 100, + cacheWriteTokens: 5, + }); + }); + }); }); diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts index c768cee..ad6d5b1 100644 --- a/packages/api/tests/routes.test.ts +++ b/packages/api/tests/routes.test.ts @@ -1,6 +1,24 @@ import type { ToolDefinition } from "@dispatch/core"; import { describe, expect, it, vi } from "vitest"; +// Seedable backing stores for the tabs route (GET /tabs enrichment). Declared +// before vi.mock so the hoisted factory closure can reference them; populated +// per-test. +interface FakeOpenTab { + id: string; + title: string; + keyId: string | null; + modelId: string | null; + parentTabId: string | null; + status: string; + isOpen: boolean; + position: number; + createdAt: number; + updatedAt: number; +} +const fakeOpenTabs: FakeOpenTab[] = []; +const fakeUsageStats = new Map<string, unknown>(); + // Mock @dispatch/core's Agent to avoid real LLM calls vi.mock("@dispatch/core", () => ({ Agent: class MockAgent { @@ -170,7 +188,7 @@ vi.mock("@dispatch/core", () => ({ return null; }, listOpenTabs() { - return []; + return [...fakeOpenTabs]; }, resolveTabPrefix() { return { status: "none" }; @@ -230,6 +248,9 @@ vi.mock("@dispatch/core", () => ({ getTotalChunkCount() { return 0; }, + getUsageStatsForTab(tabId: string) { + return fakeUsageStats.get(tabId) ?? null; + }, appendEventToChunks(_chunks: unknown[], _event: unknown) { // no-op stub }, @@ -413,6 +434,65 @@ describe("POST /chat", () => { }); }); +describe("GET /tabs", () => { + it("enriches each open tab with its persisted usageStats aggregate", async () => { + fakeOpenTabs.length = 0; + fakeUsageStats.clear(); + fakeOpenTabs.push({ + id: "tab-u", + title: "Has usage", + keyId: null, + modelId: null, + parentTabId: null, + status: "idle", + isOpen: true, + position: 0, + createdAt: 0, + updatedAt: 0, + }); + fakeOpenTabs.push({ + id: "tab-none", + title: "No usage", + keyId: null, + modelId: null, + parentTabId: null, + status: "idle", + isOpen: true, + position: 1, + createdAt: 0, + updatedAt: 0, + }); + fakeUsageStats.set("tab-u", { + inputTokens: 2200, + outputTokens: 100, + cacheReadTokens: 1000, + cacheWriteTokens: 1000, + requests: 2, + last: { inputTokens: 1200, outputTokens: 60, cacheReadTokens: 1000, cacheWriteTokens: 100 }, + }); + + const res = await app.request("/tabs"); + expect(res.status).toBe(200); + const body = await res.json(); + expect(Array.isArray(body.tabs)).toBe(true); + const tabU = body.tabs.find((t: { id: string }) => t.id === "tab-u"); + const tabNone = body.tabs.find((t: { id: string }) => t.id === "tab-none"); + expect(tabU.usageStats).toEqual({ + inputTokens: 2200, + outputTokens: 100, + cacheReadTokens: 1000, + cacheWriteTokens: 1000, + requests: 2, + last: { inputTokens: 1200, outputTokens: 60, cacheReadTokens: 1000, cacheWriteTokens: 100 }, + }); + // A tab with no usage rows surfaces null (not undefined/missing). + expect(tabNone.usageStats).toBeNull(); + + fakeOpenTabs.length = 0; + fakeUsageStats.clear(); + }); +}); + describe("GET /tabs/:id/chunks", () => { it("returns the raw chunk window shape { chunks, total, oldestSeq }", async () => { const res = await app.request("/tabs/tab-x/chunks?limit=50"); |
