feat(context-window): show current/max context usage per tab/model

Add a 'Context Window' sidebar view showing the live context occupancy (latest request's input+output) against the model's maximum context window, resolved dynamically from the models.dev catalog. - core: models.dev catalog module (resolveContextLimit) with disk cache, TTL, stale-fallback + offline penalty memo; null for unknown models. - api: GET /models/context-limit?provider=&modelId=. - frontend: ContextWindowPanel + computeContextUsage helper; App resolves + caches the active model's max (anthropic/opencode-anthropic only); percent shown to 2 decimals; degrades to bare token count when max unknown. - tests: core catalog (13), api route (3), frontend helper (6).
author: Adam Malczewski <[email protected]> 2026-06-02 13:25:23 +0900
committer: Adam Malczewski <[email protected]> 2026-06-02 13:25:23 +0900
commit: 6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02 (patch)
tree: 78b30dedd471ab76177b3631a956ab160615e303
parent: 3f629a8469fe483243671e1ca15582a111e96541 (diff)
download: dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.tar.gz
dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.zip
11 files changed, 668 insertions, 1 deletions
diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts
index 03c079a..6a0f5dc 100644
--- a/packages/api/src/routes/models.ts
+++ b/packages/api/src/routes/models.ts
@@ -17,6 +17,7 @@ import {
 	listStoredCredentials,
 	refreshAccountCredentialsAsync,
 	resolveApiKey,
+	resolveContextLimit,
 	setApiKey,
 	validateAccountCredentials,
 } from "@dispatch/core";
@@ -161,6 +162,21 @@ modelsRoutes.get("/available", async (c) => {
 	return c.json({ models });
 });
 
+// Resolve a model's MAXIMUM context window (in tokens) from the models.dev
+// catalog. Returns `{ contextLimit: number | null }`; `null` means the model's
+// limit is unknown (unsupported provider, unknown model, or catalog offline),
+// which the frontend renders without a denominator/percentage.
+modelsRoutes.get("/context-limit", async (c) => {
+	const provider = c.req.query("provider");
+	const modelId = c.req.query("modelId");
+	if (!provider || !modelId) {
+		return c.json({ error: "provider and modelId query parameters are required" }, 400);
+	}
+
+	const contextLimit = await resolveContextLimit(provider, modelId);
+	return c.json({ contextLimit });
+});
+
 // List available Claude accounts with validated credentials
 modelsRoutes.get("/claude-accounts", async (c) => {
 	const candidates = resolveClaudeAccounts();
diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts
index c768cee..e4b8f0f 100644
--- a/packages/api/tests/routes.test.ts
+++ b/packages/api/tests/routes.test.ts
@@ -268,6 +268,13 @@ vi.mock("@dispatch/core", () => ({
 			execute: async () => "mock",
 		};
 	},
+	// ── models.dev context-limit stub ─────────────────────────────
+	resolveContextLimit(provider: string, modelId: string) {
+		if (provider === "anthropic" && modelId === "claude-sonnet-4-5") {
+			return Promise.resolve(200000);
+		}
+		return Promise.resolve(null);
+	},
 	// ── ntfy notifications stubs ──────────────────────────────────
 	NotificationDispatcher: class MockNotificationDispatcher {
 		attachToAgentManager() {
@@ -751,3 +758,28 @@ describe("Wake schedule routes", () => {
 		expect(body.schedule["13"]).toBeUndefined();
 	});
 });
+
+describe("GET /models/context-limit", () => {
+	it("returns the resolved context limit for a known model", async () => {
+		const res = await app.request(
+			"/models/context-limit?provider=anthropic&modelId=claude-sonnet-4-5",
+		);
+		expect(res.status).toBe(200);
+		const body = (await res.json()) as { contextLimit: number | null };
+		expect(body.contextLimit).toBe(200000);
+	});
+
+	it("returns null contextLimit for an unknown model", async () => {
+		const res = await app.request("/models/context-limit?provider=anthropic&modelId=mystery");
+		expect(res.status).toBe(200);
+		const body = (await res.json()) as { contextLimit: number | null };
+		expect(body.contextLimit).toBeNull();
+	});
+
+	it("400s when provider or modelId is missing", async () => {
+		const res1 = await app.request("/models/context-limit?provider=anthropic");
+		expect(res1.status).toBe(400);
+		const res2 = await app.request("/models/context-limit?modelId=claude-sonnet-4-5");
+		expect(res2.status).toBe(400);
+	});
+});
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 327b0a5..9d7133f 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -67,7 +67,11 @@ export {
 } from "./llm/debug-logger.js";
 export { createProvider } from "./llm/provider.js";
 // Models
-export { ModelRegistry } from "./models/index.js";
+export {
+	getModelsCatalog,
+	ModelRegistry,
+	resolveContextLimit,
+} from "./models/index.js";
 // Notifications (ntfy.sh)
 export * from "./notifications/index.js";
 export * from "./permission/index.js";
diff --git a/packages/core/src/models/catalog.ts b/packages/core/src/models/catalog.ts
new file mode 100644
index 0000000..dea4647
--- /dev/null
+++ b/packages/core/src/models/catalog.ts
@@ -0,0 +1,179 @@
+import { mkdirSync, readFileSync, renameSync, statSync, writeFileSync } from "node:fs";
+import { dirname } from "node:path";
+
+/**
+ * models.dev-backed model catalog. Resolves a model's MAXIMUM context window
+ * (`limit.context`) dynamically from the public models.dev API, mirroring how
+ * opencode determines per-model context limits — no hardcoded table.
+ *
+ * The catalog is fetched once, cached on disk with a short TTL, and reused. On
+ * fetch failure we fall back to a stale-but-present cache so the lookup keeps
+ * working offline. Lookups never throw: an unknown/unreachable model resolves
+ * to `null`, which the UI renders as "max unknown".
+ */
+
+/** Shape of the slice of models.dev's `/api.json` we consume. */
+interface ModelsDevModel {
+	limit?: {
+		context?: number;
+		output?: number;
+	};
+}
+
+interface ModelsDevProvider {
+	id: string;
+	models: Record<string, ModelsDevModel | undefined>;
+}
+
+type ModelsDevCatalog = Record<string, ModelsDevProvider | undefined>;
+
+/** Where models.dev's API lives. Overridable for tests / private mirrors. */
+const MODELS_URL = process.env.DISPATCH_MODELS_URL || "https://models.dev";
+
+/** Disk cache path (reuses the repo's `/tmp/dispatch` convention). */
+const CACHE_PATH = "/tmp/dispatch/models-dev.json";
+
+/** How long a cached catalog stays fresh before we re-fetch. */
+const CACHE_TTL_MS = 5 * 60 * 1000;
+
+/** Network timeout for the catalog fetch. */
+const FETCH_TIMEOUT_MS = 10_000;
+
+/**
+ * After a failed fetch we memoize the fallback for this long before retrying,
+ * so a sustained outage doesn't make every lookup hang on a fresh timeout.
+ */
+const FETCH_PENALTY_MS = 60_000;
+
+/**
+ * Dispatch provider id → models.dev provider ids to search, in priority order.
+ * We only support Claude-backed providers (per product scope). `anthropic` and
+ * `opencode-anthropic` are both Claude; we try the first-party `anthropic`
+ * catalog first, then the `opencode` gateway catalog as a fallback.
+ */
+const PROVIDER_MAP: Record<string, string[]> = {
+	anthropic: ["anthropic", "opencode"],
+	"opencode-anthropic": ["anthropic", "opencode"],
+};
+
+/** In-process memoized catalog promise (one fetch/parse per TTL window). */
+let cached: { catalog: ModelsDevCatalog; fetchedAt: number } | null = null;
+let inflight: Promise<ModelsDevCatalog> | null = null;
+
+function readDiskCache(): { catalog: ModelsDevCatalog; mtimeMs: number } | null {
+	try {
+		const stat = statSync(CACHE_PATH);
+		const text = readFileSync(CACHE_PATH, "utf-8");
+		return { catalog: JSON.parse(text) as ModelsDevCatalog, mtimeMs: stat.mtimeMs };
+	} catch {
+		return null;
+	}
+}
+
+function writeDiskCache(text: string): void {
+	try {
+		mkdirSync(dirname(CACHE_PATH), { recursive: true });
+		// Write-then-rename so a concurrent reader never sees a half-written
+		// file (rename is atomic on the same filesystem). The temp name is
+		// process-scoped to avoid two writers clobbering each other's temp.
+		const tmp = `${CACHE_PATH}.${process.pid}.tmp`;
+		writeFileSync(tmp, text, "utf-8");
+		renameSync(tmp, CACHE_PATH);
+	} catch {
+		// Best-effort: a read-only /tmp shouldn't break lookups.
+	}
+}
+
+async function fetchCatalog(): Promise<ModelsDevCatalog> {
+	const controller = new AbortController();
+	const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+	try {
+		const res = await fetch(`${MODELS_URL}/api.json`, { signal: controller.signal });
+		if (!res.ok) throw new Error(`models.dev returned HTTP ${res.status}`);
+		const text = await res.text();
+		const catalog = JSON.parse(text) as ModelsDevCatalog;
+		writeDiskCache(text);
+		return catalog;
+	} finally {
+		clearTimeout(timer);
+	}
+}
+
+/**
+ * Load the models.dev catalog, preferring in-process memo, then a fresh disk
+ * cache, then a network fetch. On network failure, falls back to any stale
+ * disk cache; if nothing is available, returns an empty catalog.
+ */
+export async function getModelsCatalog(): Promise<ModelsDevCatalog> {
+	if (process.env.DISPATCH_DISABLE_MODELS_FETCH) {
+		const disk = readDiskCache();
+		return disk?.catalog ?? {};
+	}
+
+	const now = Date.now();
+	if (cached && now - cached.fetchedAt < CACHE_TTL_MS) return cached.catalog;
+
+	// Fresh disk cache satisfies the request without a network round-trip.
+	const disk = readDiskCache();
+	if (disk && now - disk.mtimeMs < CACHE_TTL_MS) {
+		// Inherit the file's mtime as `fetchedAt` so loading a disk cache into
+		// a fresh process doesn't reset its TTL (which would otherwise double
+		// the worst-case staleness across process boundaries).
+		cached = { catalog: disk.catalog, fetchedAt: disk.mtimeMs };
+		return disk.catalog;
+	}
+
+	if (!inflight) {
+		inflight = fetchCatalog()
+			.then((catalog) => {
+				cached = { catalog, fetchedAt: Date.now() };
+				return catalog;
+			})
+			.catch((err) => {
+				// Network failed — serve a stale cache if we have one.
+				console.warn(
+					`dispatch: failed to fetch models.dev catalog: ${err instanceof Error ? err.message : String(err)}`,
+				);
+				const fallback = disk?.catalog ?? ({} as ModelsDevCatalog);
+				// Memoize the fallback with a short "penalty" TTL so a sustained
+				// outage doesn't make every lookup hang on a fresh 10s timeout.
+				// `fetchedAt` is backdated so the memo expires after FETCH_PENALTY_MS.
+				cached = {
+					catalog: fallback,
+					fetchedAt: Date.now() - CACHE_TTL_MS + FETCH_PENALTY_MS,
+				};
+				return fallback;
+			})
+			.finally(() => {
+				inflight = null;
+			});
+	}
+	return inflight;
+}
+
+/**
+ * Resolve a model's maximum context window (in tokens) for the given Dispatch
+ * provider + model id. Returns `null` when the provider is unsupported, the
+ * model is unknown, or the catalog is unavailable — callers should render that
+ * as "max unknown" (no denominator / percentage).
+ */
+export async function resolveContextLimit(
+	provider: string,
+	modelId: string,
+): Promise<number | null> {
+	const candidates = PROVIDER_MAP[provider];
+	if (!candidates || !modelId) return null;
+
+	const catalog = await getModelsCatalog();
+	for (const providerId of candidates) {
+		const ctx = catalog[providerId]?.models?.[modelId]?.limit?.context;
+		if (typeof ctx === "number" && ctx > 0) return ctx;
+	}
+	return null;
+}
+
+/** Test-only: reset the in-process memo so a test can re-exercise loading. */
+export function __resetCatalogCacheForTests(): void {
+	cached = null;
+	inflight = null;
+}
diff --git a/packages/core/src/models/index.ts b/packages/core/src/models/index.ts
index cf59749..2fcd657 100644
--- a/packages/core/src/models/index.ts
+++ b/packages/core/src/models/index.ts
@@ -1 +1,5 @@
+export {
+	getModelsCatalog,
+	resolveContextLimit,
+} from "./catalog.js";
 export { ModelRegistry } from "./registry.js";
diff --git a/packages/core/tests/models/catalog.test.ts b/packages/core/tests/models/catalog.test.ts
new file mode 100644
index 0000000..51043e6
--- /dev/null
+++ b/packages/core/tests/models/catalog.test.ts
@@ -0,0 +1,158 @@
+import { existsSync, rmSync, utimesSync, writeFileSync } from "node:fs";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+	__resetCatalogCacheForTests,
+	getModelsCatalog,
+	resolveContextLimit,
+} from "../../src/models/catalog.js";
+
+const CACHE_PATH = "/tmp/dispatch/models-dev.json";
+
+// A trimmed models.dev-shaped catalog covering the providers we support.
+const CATALOG = {
+	anthropic: {
+		id: "anthropic",
+		models: {
+			"claude-sonnet-4-5": { limit: { context: 200000, output: 64000 } },
+			"claude-sonnet-4-6": { limit: { context: 1000000, output: 64000 } },
+		},
+	},
+	opencode: {
+		id: "opencode",
+		models: {
+			"glm-4-6": { limit: { context: 131072, output: 8192 } },
+		},
+	},
+};
+
+function mockFetchOnce(catalog: unknown, ok = true, status = 200) {
+	const fn = vi.fn(() =>
+		Promise.resolve({
+			ok,
+			status,
+			text: () => Promise.resolve(JSON.stringify(catalog)),
+		} as Response),
+	);
+	vi.stubGlobal("fetch", fn);
+	return fn;
+}
+
+beforeEach(() => {
+	__resetCatalogCacheForTests();
+	if (existsSync(CACHE_PATH)) rmSync(CACHE_PATH);
+	delete process.env.DISPATCH_DISABLE_MODELS_FETCH;
+});
+
+afterEach(() => {
+	vi.unstubAllGlobals();
+	if (existsSync(CACHE_PATH)) rmSync(CACHE_PATH);
+});
+
+describe("resolveContextLimit", () => {
+	it("resolves a known anthropic model to its context window", async () => {
+		mockFetchOnce(CATALOG);
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000);
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-6")).toBe(1000000);
+	});
+
+	it("maps opencode-anthropic to the anthropic catalog, then opencode fallback", async () => {
+		mockFetchOnce(CATALOG);
+		// Present in the anthropic catalog.
+		expect(await resolveContextLimit("opencode-anthropic", "claude-sonnet-4-5")).toBe(200000);
+		// Absent in anthropic, found in the opencode gateway catalog.
+		expect(await resolveContextLimit("opencode-anthropic", "glm-4-6")).toBe(131072);
+	});
+
+	it("returns null for an unknown model id", async () => {
+		mockFetchOnce(CATALOG);
+		expect(await resolveContextLimit("anthropic", "no-such-model")).toBeNull();
+	});
+
+	it("returns null for an unsupported provider (no network needed)", async () => {
+		const fetchFn = mockFetchOnce(CATALOG);
+		expect(await resolveContextLimit("google", "gemini-2.5-pro")).toBeNull();
+		expect(await resolveContextLimit("anthropic", "")).toBeNull();
+		expect(fetchFn).not.toHaveBeenCalled();
+	});
+
+	it("returns null when the model has no positive context limit", async () => {
+		mockFetchOnce({
+			anthropic: { id: "anthropic", models: { broken: { limit: { context: 0 } } } },
+		});
+		expect(await resolveContextLimit("anthropic", "broken")).toBeNull();
+	});
+
+	it("does not throw on a malformed provider entry missing `models`", async () => {
+		// A provider object without a `models` map must degrade to null, not crash.
+		mockFetchOnce({ anthropic: { id: "anthropic" } });
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+	});
+
+	it("does not throw when limit/context fields are absent", async () => {
+		mockFetchOnce({ anthropic: { id: "anthropic", models: { m: {} } } });
+		expect(await resolveContextLimit("anthropic", "m")).toBeNull();
+	});
+});
+
+describe("getModelsCatalog caching", () => {
+	it("fetches once and serves the in-process memo on subsequent calls", async () => {
+		const fetchFn = mockFetchOnce(CATALOG);
+		await resolveContextLimit("anthropic", "claude-sonnet-4-5");
+		await resolveContextLimit("anthropic", "claude-sonnet-4-6");
+		await getModelsCatalog();
+		expect(fetchFn).toHaveBeenCalledTimes(1);
+	});
+
+	it("reuses a fresh disk cache without re-fetching across processes", async () => {
+		// Simulate another process having written a fresh cache.
+		writeFileSync(CACHE_PATH, JSON.stringify(CATALOG), "utf-8");
+		const fetchFn = vi.fn(() => Promise.reject(new Error("network should not be hit")));
+		vi.stubGlobal("fetch", fetchFn);
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000);
+		expect(fetchFn).not.toHaveBeenCalled();
+	});
+
+	it("falls back to a STALE disk cache when the network fails", async () => {
+		writeFileSync(CACHE_PATH, JSON.stringify(CATALOG), "utf-8");
+		// Age the cache well past the TTL so the fetch path is taken.
+		const old = Date.now() / 1000 - 3600;
+		utimesSync(CACHE_PATH, old, old);
+		const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+		vi.stubGlobal("fetch", fetchFn);
+		const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000);
+		expect(fetchFn).toHaveBeenCalledTimes(1);
+		warn.mockRestore();
+	});
+
+	it("returns null when fetch fails and no cache exists", async () => {
+		const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+		vi.stubGlobal("fetch", fetchFn);
+		const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+		warn.mockRestore();
+	});
+
+	it("does not hit the network when DISPATCH_DISABLE_MODELS_FETCH is set", async () => {
+		process.env.DISPATCH_DISABLE_MODELS_FETCH = "1";
+		const fetchFn = vi.fn(() => Promise.reject(new Error("should not fetch")));
+		vi.stubGlobal("fetch", fetchFn);
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+		expect(fetchFn).not.toHaveBeenCalled();
+	});
+
+	it("memoizes the fallback after a failed fetch so it does not re-hit the network", async () => {
+		const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+		vi.stubGlobal("fetch", fetchFn);
+		const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+		// First lookup triggers the (failing) fetch.
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+		// Subsequent lookups within the penalty window must NOT re-fetch.
+		expect(await resolveContextLimit("anthropic", "claude-sonnet-4-6")).toBeNull();
+		await getModelsCatalog();
+		expect(fetchFn).toHaveBeenCalledTimes(1);
+		warn.mockRestore();
+	});
+});
diff --git a/packages/frontend/src/App.svelte b/packages/frontend/src/App.svelte
index eaa28e8..0344af4 100644
--- a/packages/frontend/src/App.svelte
+++ b/packages/frontend/src/App.svelte
@@ -74,6 +74,62 @@ $effect(() => {
 	}
 });
 
+// ─── Context-window max lookup ─────────────────────────────────
+// Resolve the active model's MAXIMUM context window from models.dev (via the
+// API), so the Context Window sidebar view can show `current / max`. Cached
+// per provider+model; `null` when unknown (the view then hides the
+// denominator/percentage). Only Claude-backed providers are resolvable.
+let contextLimit = $state<number | null>(null);
+const contextLimitCache = new Map<string, number | null>();
+
+$effect(() => {
+	const tab = tabStore.activeTab;
+	const keyId = tab?.keyId ?? null;
+	const modelId = tab?.modelId ?? null;
+	const provider = keyId ? (modelsData.keys.find((k) => k.id === keyId)?.provider ?? null) : null;
+
+	if (!provider || !modelId) {
+		contextLimit = null;
+		return;
+	}
+
+	const cacheKey = `${provider}/${modelId}`;
+	if (contextLimitCache.has(cacheKey)) {
+		contextLimit = contextLimitCache.get(cacheKey) ?? null;
+		return;
+	}
+
+	// Clear immediately so a slow/failed fetch can't leave the PREVIOUS
+	// model's max on screen (which would render this model's tokens against
+	// the wrong denominator). The view degrades to a bare token count until
+	// the fetch resolves.
+	contextLimit = null;
+
+	// Fetch is async; guard against a stale response overwriting a newer
+	// selection by re-checking the active tab's key/model on resolve.
+	void (async () => {
+		try {
+			const res = await fetch(
+				`${config.apiBase}/models/context-limit?provider=${encodeURIComponent(provider)}&modelId=${encodeURIComponent(modelId)}`,
+			);
+			if (!res.ok) return;
+			const data = (await res.json()) as { contextLimit?: number | null };
+			const limit = data.contextLimit ?? null;
+			contextLimitCache.set(cacheKey, limit);
+			const current = tabStore.activeTab;
+			const currentProvider = current?.keyId
+				? (modelsData.keys.find((k) => k.id === current.keyId)?.provider ?? null)
+				: null;
+			if (currentProvider === provider && current?.modelId === modelId) {
+				contextLimit = limit;
+			}
+		} catch {
+			// Leave contextLimit as-is on network error; view falls back to
+			// showing the bare token count.
+		}
+	})();
+});
+
 onMount(() => {
 	// Apply persisted theme (or the shared DEFAULT_THEME if nothing is
 	// stored) so the first paint matches what the Settings panel will
@@ -137,6 +193,7 @@ onMount(() => {
 				tasks={tabStore.activeTab?.tasks ?? []}
 				cacheStats={tabStore.activeTab?.cacheStats ?? null}
 				cacheTabTitle={tabStore.activeTab?.title ?? null}
+				{contextLimit}
 				permissionLog={tabStore.permissionLog}
 				apiBase={config.apiBase}
 				activeKeyId={tabStore.activeTab?.keyId ?? null}
diff --git a/packages/frontend/src/lib/components/ContextWindowPanel.svelte b/packages/frontend/src/lib/components/ContextWindowPanel.svelte
new file mode 100644
index 0000000..6c7de05
--- /dev/null
+++ b/packages/frontend/src/lib/components/ContextWindowPanel.svelte
@@ -0,0 +1,85 @@
+<script lang="ts">
+import { computeContextUsage } from "../context-window.js";
+import type { CacheStats } from "../types.js";
+
+const {
+	cacheStats = null,
+	contextLimit = null,
+	tabTitle = null,
+	modelId = null,
+}: {
+	cacheStats?: CacheStats | null;
+	contextLimit?: number | null;
+	tabTitle?: string | null;
+	modelId?: string | null;
+} = $props();
+
+const usage = $derived(computeContextUsage(cacheStats, contextLimit));
+
+// As the window fills, escalate color: calm → warning → danger.
+function fillClass(pct: number): string {
+	if (pct >= 90) return "progress-error";
+	if (pct >= 70) return "progress-warning";
+	return "progress-success";
+}
+
+function fmt(n: number): string {
+	return n.toLocaleString();
+}
+
+const hasUsage = $derived((cacheStats?.last ?? null) !== null);
+</script>
+
+<div class="flex flex-col gap-3 flex-1 min-h-0 overflow-y-auto">
+	{#if !hasUsage}
+		<p class="text-xs text-base-content/50">
+			No context data yet. Send a message — the current context size appears
+			here after the first response.
+		</p>
+	{:else}
+		<div class="bg-base-200 rounded-lg p-2">
+			<div class="flex items-center gap-1.5 mb-2">
+				<span class="text-xs font-semibold">Context Window</span>
+				{#if tabTitle}
+					<span class="badge badge-xs badge-ghost">{tabTitle}</span>
+				{/if}
+				{#if usage.percent !== null}
+					<span class="badge badge-xs ml-auto">{usage.percent.toFixed(2)}%</span>
+				{/if}
+			</div>
+
+			<!-- Headline: current / max (or just current when max is unknown) -->
+			<div class="flex items-baseline gap-1.5">
+				<span class="text-lg font-mono font-semibold">{fmt(usage.current)}</span>
+				{#if usage.max !== null}
+					<span class="text-xs text-base-content/50 font-mono">/ {fmt(usage.max)}</span>
+				{/if}
+				<span class="text-xs text-base-content/40 ml-1">tokens</span>
+			</div>
+
+			{#if usage.percent !== null}
+				<progress
+					class="progress w-full h-2 mt-1.5 {fillClass(usage.percent)}"
+					value={usage.percent}
+					max="100"
+				></progress>
+			{:else}
+				<p class="text-xs text-base-content/40 mt-1.5">
+					Max context size unknown for this model.
+				</p>
+			{/if}
+
+			{#if modelId}
+				<div class="text-xs text-base-content/40 mt-1.5 truncate" title={modelId}>
+					{modelId}
+				</div>
+			{/if}
+		</div>
+
+		<p class="text-xs text-base-content/40">
+			Current context = the most recent request's prompt + output (what the
+			model actually held in its window that turn). Grows as the conversation
+			gets longer. Resets on reload.
+		</p>
+	{/if}
+</div>
diff --git a/packages/frontend/src/lib/components/SidebarPanel.svelte b/packages/frontend/src/lib/components/SidebarPanel.svelte
index 491b1bd..573a6fc 100644
--- a/packages/frontend/src/lib/components/SidebarPanel.svelte
+++ b/packages/frontend/src/lib/components/SidebarPanel.svelte
@@ -4,6 +4,7 @@ import type { CacheStats, KeyInfo, LogEntry, TaskItem } from "../types.js";
 import CacheRatePanel from "./CacheRatePanel.svelte";
 import ClaudeReset from "./ClaudeReset.svelte";
 import ConfigPanel from "./ConfigPanel.svelte";
+import ContextWindowPanel from "./ContextWindowPanel.svelte";
 import DebugPanel from "./DebugPanel.svelte";
 import KeyUsage from "./KeyUsage.svelte";
 import ModelSelector from "./ModelSelector.svelte";
@@ -27,6 +28,7 @@ const {
 	tasks = [],
 	cacheStats = null,
 	cacheTabTitle = null,
+	contextLimit = null,
 	permissionLog = [],
 	apiBase = "",
 	activeKeyId = null,
@@ -47,6 +49,7 @@ const {
 	tasks?: TaskItem[];
 	cacheStats?: CacheStats | null;
 	cacheTabTitle?: string | null;
+	contextLimit?: number | null;
 	permissionLog?: LogEntry[];
 	apiBase?: string;
 	activeKeyId?: string | null;
@@ -89,6 +92,7 @@ const viewOptions = [
 	"Chat Settings",
 	"Key Usage",
 	"Cache Rate",
+	"Context Window",
 	"Claude Reset",
 	"Model Status",
 	"Tasks",
@@ -170,6 +174,13 @@ function contentClass(_selected: string): string {
 					<KeyUsage {keys} {apiBase} />
 				{:else if panel.selected === "Cache Rate"}
 					<CacheRatePanel {cacheStats} tabTitle={cacheTabTitle} />
+				{:else if panel.selected === "Context Window"}
+					<ContextWindowPanel
+						{cacheStats}
+						{contextLimit}
+						tabTitle={cacheTabTitle}
+						modelId={activeModelId}
+					/>
 				{:else if panel.selected === "Claude Reset"}
 					<ClaudeReset {apiBase} />
 				{:else if panel.selected === "Model Status"}
diff --git a/packages/frontend/src/lib/context-window.ts b/packages/frontend/src/lib/context-window.ts
new file mode 100644
index 0000000..c4321f8
--- /dev/null
+++ b/packages/frontend/src/lib/context-window.ts
@@ -0,0 +1,37 @@
+import type { CacheStats } from "./types.js";
+
+/**
+ * Context-window occupancy for the current tab/model.
+ *
+ * `current` is the size of the model's context on the MOST RECENT request —
+ * the last turn's full prompt (`inputTokens`, which already includes cached
+ * tokens for Anthropic) plus what the model generated that turn
+ * (`outputTokens`). This mirrors how opencode derives context fullness from
+ * the last assistant message, and reflects what actually occupies the model's
+ * window — NOT the session-cumulative totals shown by the Cache Rate view.
+ *
+ * `max` is the model's maximum context window from models.dev (or `null` when
+ * unknown). `percent` is `current / max * 100` clamped to [0, 100] (unrounded;
+ * the UI decides the displayed precision), or `null` when
+ * `max` is unknown — in which case the UI shows the bare token count with no
+ * denominator or progress bar.
+ */
+export interface ContextUsage {
+	current: number;
+	max: number | null;
+	percent: number | null;
+}
+
+export function computeContextUsage(
+	cacheStats: CacheStats | null | undefined,
+	contextLimit: number | null | undefined,
+): ContextUsage {
+	const last = cacheStats?.last ?? null;
+	const current = last ? last.inputTokens + last.outputTokens : 0;
+	const max = typeof contextLimit === "number" && contextLimit > 0 ? contextLimit : null;
+	// Precise (unrounded) percentage clamped to [0, 100]; the UI formats the
+	// decimal places. Kept unrounded so small contexts against huge windows
+	// (e.g. a few thousand tokens vs. 1,000,000) still read non-zero.
+	const percent = max ? Math.max(0, Math.min(100, (current / max) * 100)) : null;
+	return { current, max, percent };
+}
diff --git a/packages/frontend/tests/context-window.test.ts b/packages/frontend/tests/context-window.test.ts
new file mode 100644
index 0000000..bb64ed5
--- /dev/null
+++ b/packages/frontend/tests/context-window.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, it } from "vitest";
+import { computeContextUsage } from "../src/lib/context-window.js";
+import type { CacheStats } from "../src/lib/types.js";
+
+function stats(last: CacheStats["last"]): CacheStats {
+	return {
+		inputTokens: 0,
+		outputTokens: 0,
+		cacheReadTokens: 0,
+		cacheWriteTokens: 0,
+		requests: last ? 1 : 0,
+		last,
+	};
+}
+
+describe("computeContextUsage", () => {
+	it("derives current context from the LAST request's input + output", () => {
+		const usage = computeContextUsage(
+			stats({
+				inputTokens: 47000,
+				outputTokens: 1200,
+				cacheReadTokens: 40000,
+				cacheWriteTokens: 0,
+			}),
+			200000,
+		);
+		// 47000 + 1200 — NOT the cumulative totals, and cache tokens are already
+		// inside inputTokens (not re-added).
+		expect(usage.current).toBe(48200);
+		expect(usage.max).toBe(200000);
+		expect(usage.percent).toBeCloseTo(24.1, 5); // 48200 / 200000 * 100, unrounded
+	});
+
+	it("returns max=null and percent=null when the limit is unknown", () => {
+		const usage = computeContextUsage(
+			stats({ inputTokens: 100, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+			null,
+		);
+		expect(usage.current).toBe(100);
+		expect(usage.max).toBeNull();
+		expect(usage.percent).toBeNull();
+	});
+
+	it("treats a non-positive limit as unknown", () => {
+		const usage = computeContextUsage(
+			stats({ inputTokens: 100, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+			0,
+		);
+		expect(usage.max).toBeNull();
+		expect(usage.percent).toBeNull();
+	});
+
+	it("reports zero usage when no request has completed yet", () => {
+		expect(computeContextUsage(null, 200000)).toEqual({
+			current: 0,
+			max: 200000,
+			percent: 0,
+		});
+		expect(computeContextUsage(stats(null), 200000)).toEqual({
+			current: 0,
+			max: 200000,
+			percent: 0,
+		});
+	});
+
+	it("clamps percent to 100 when context overflows the window", () => {
+		const usage = computeContextUsage(
+			stats({ inputTokens: 250000, outputTokens: 5000, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+			200000,
+		);
+		expect(usage.current).toBe(255000);
+		expect(usage.percent).toBe(100);
+	});
+
+	it("keeps an unrounded percent so the UI can show 2 decimals", () => {
+		const usage = computeContextUsage(
+			stats({ inputTokens: 3690, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+			1000000,
+		);
+		// 3690 / 1,000,000 * 100 = 0.369 → displayed as "0.37%" (toFixed(2)).
+		expect(usage.percent).toBeCloseTo(0.369, 6);
+		expect((usage.percent as number).toFixed(2)).toBe("0.37");
+	});
+});
author	Adam Malczewski <[email protected]>	2026-06-02 13:25:23 +0900
committer	Adam Malczewski <[email protected]>	2026-06-02 13:25:23 +0900
commit	6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02 (patch)
tree	78b30dedd471ab76177b3631a956ab160615e303
parent	3f629a8469fe483243671e1ca15582a111e96541 (diff)
download	dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.tar.gz dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.zip