summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-02 13:25:23 +0900
committerAdam Malczewski <[email protected]>2026-06-02 13:25:23 +0900
commit6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02 (patch)
tree78b30dedd471ab76177b3631a956ab160615e303
parent3f629a8469fe483243671e1ca15582a111e96541 (diff)
downloaddispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.tar.gz
dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.zip
feat(context-window): show current/max context usage per tab/model
Add a 'Context Window' sidebar view showing the live context occupancy (latest request's input+output) against the model's maximum context window, resolved dynamically from the models.dev catalog. - core: models.dev catalog module (resolveContextLimit) with disk cache, TTL, stale-fallback + offline penalty memo; null for unknown models. - api: GET /models/context-limit?provider=&modelId=. - frontend: ContextWindowPanel + computeContextUsage helper; App resolves + caches the active model's max (anthropic/opencode-anthropic only); percent shown to 2 decimals; degrades to bare token count when max unknown. - tests: core catalog (13), api route (3), frontend helper (6).
-rw-r--r--packages/api/src/routes/models.ts16
-rw-r--r--packages/api/tests/routes.test.ts32
-rw-r--r--packages/core/src/index.ts6
-rw-r--r--packages/core/src/models/catalog.ts179
-rw-r--r--packages/core/src/models/index.ts4
-rw-r--r--packages/core/tests/models/catalog.test.ts158
-rw-r--r--packages/frontend/src/App.svelte57
-rw-r--r--packages/frontend/src/lib/components/ContextWindowPanel.svelte85
-rw-r--r--packages/frontend/src/lib/components/SidebarPanel.svelte11
-rw-r--r--packages/frontend/src/lib/context-window.ts37
-rw-r--r--packages/frontend/tests/context-window.test.ts84
11 files changed, 668 insertions, 1 deletions
diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts
index 03c079a..6a0f5dc 100644
--- a/packages/api/src/routes/models.ts
+++ b/packages/api/src/routes/models.ts
@@ -17,6 +17,7 @@ import {
listStoredCredentials,
refreshAccountCredentialsAsync,
resolveApiKey,
+ resolveContextLimit,
setApiKey,
validateAccountCredentials,
} from "@dispatch/core";
@@ -161,6 +162,21 @@ modelsRoutes.get("/available", async (c) => {
return c.json({ models });
});
+// Resolve a model's MAXIMUM context window (in tokens) from the models.dev
+// catalog. Returns `{ contextLimit: number | null }`; `null` means the model's
+// limit is unknown (unsupported provider, unknown model, or catalog offline),
+// which the frontend renders without a denominator/percentage.
+modelsRoutes.get("/context-limit", async (c) => {
+ const provider = c.req.query("provider");
+ const modelId = c.req.query("modelId");
+ if (!provider || !modelId) {
+ return c.json({ error: "provider and modelId query parameters are required" }, 400);
+ }
+
+ const contextLimit = await resolveContextLimit(provider, modelId);
+ return c.json({ contextLimit });
+});
+
// List available Claude accounts with validated credentials
modelsRoutes.get("/claude-accounts", async (c) => {
const candidates = resolveClaudeAccounts();
diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts
index c768cee..e4b8f0f 100644
--- a/packages/api/tests/routes.test.ts
+++ b/packages/api/tests/routes.test.ts
@@ -268,6 +268,13 @@ vi.mock("@dispatch/core", () => ({
execute: async () => "mock",
};
},
+ // ── models.dev context-limit stub ─────────────────────────────
+ resolveContextLimit(provider: string, modelId: string) {
+ if (provider === "anthropic" && modelId === "claude-sonnet-4-5") {
+ return Promise.resolve(200000);
+ }
+ return Promise.resolve(null);
+ },
// ── ntfy notifications stubs ──────────────────────────────────
NotificationDispatcher: class MockNotificationDispatcher {
attachToAgentManager() {
@@ -751,3 +758,28 @@ describe("Wake schedule routes", () => {
expect(body.schedule["13"]).toBeUndefined();
});
});
+
+describe("GET /models/context-limit", () => {
+ it("returns the resolved context limit for a known model", async () => {
+ const res = await app.request(
+ "/models/context-limit?provider=anthropic&modelId=claude-sonnet-4-5",
+ );
+ expect(res.status).toBe(200);
+ const body = (await res.json()) as { contextLimit: number | null };
+ expect(body.contextLimit).toBe(200000);
+ });
+
+ it("returns null contextLimit for an unknown model", async () => {
+ const res = await app.request("/models/context-limit?provider=anthropic&modelId=mystery");
+ expect(res.status).toBe(200);
+ const body = (await res.json()) as { contextLimit: number | null };
+ expect(body.contextLimit).toBeNull();
+ });
+
+ it("400s when provider or modelId is missing", async () => {
+ const res1 = await app.request("/models/context-limit?provider=anthropic");
+ expect(res1.status).toBe(400);
+ const res2 = await app.request("/models/context-limit?modelId=claude-sonnet-4-5");
+ expect(res2.status).toBe(400);
+ });
+});
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 327b0a5..9d7133f 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -67,7 +67,11 @@ export {
} from "./llm/debug-logger.js";
export { createProvider } from "./llm/provider.js";
// Models
-export { ModelRegistry } from "./models/index.js";
+export {
+ getModelsCatalog,
+ ModelRegistry,
+ resolveContextLimit,
+} from "./models/index.js";
// Notifications (ntfy.sh)
export * from "./notifications/index.js";
export * from "./permission/index.js";
diff --git a/packages/core/src/models/catalog.ts b/packages/core/src/models/catalog.ts
new file mode 100644
index 0000000..dea4647
--- /dev/null
+++ b/packages/core/src/models/catalog.ts
@@ -0,0 +1,179 @@
+import { mkdirSync, readFileSync, renameSync, statSync, writeFileSync } from "node:fs";
+import { dirname } from "node:path";
+
+/**
+ * models.dev-backed model catalog. Resolves a model's MAXIMUM context window
+ * (`limit.context`) dynamically from the public models.dev API, mirroring how
+ * opencode determines per-model context limits — no hardcoded table.
+ *
+ * The catalog is fetched once, cached on disk with a short TTL, and reused. On
+ * fetch failure we fall back to a stale-but-present cache so the lookup keeps
+ * working offline. Lookups never throw: an unknown/unreachable model resolves
+ * to `null`, which the UI renders as "max unknown".
+ */
+
+/** Shape of the slice of models.dev's `/api.json` we consume. */
+interface ModelsDevModel {
+ limit?: {
+ context?: number;
+ output?: number;
+ };
+}
+
+interface ModelsDevProvider {
+ id: string;
+ models: Record<string, ModelsDevModel | undefined>;
+}
+
+type ModelsDevCatalog = Record<string, ModelsDevProvider | undefined>;
+
+/** Where models.dev's API lives. Overridable for tests / private mirrors. */
+const MODELS_URL = process.env.DISPATCH_MODELS_URL || "https://models.dev";
+
+/** Disk cache path (reuses the repo's `/tmp/dispatch` convention). */
+const CACHE_PATH = "/tmp/dispatch/models-dev.json";
+
+/** How long a cached catalog stays fresh before we re-fetch. */
+const CACHE_TTL_MS = 5 * 60 * 1000;
+
+/** Network timeout for the catalog fetch. */
+const FETCH_TIMEOUT_MS = 10_000;
+
+/**
+ * After a failed fetch we memoize the fallback for this long before retrying,
+ * so a sustained outage doesn't make every lookup hang on a fresh timeout.
+ */
+const FETCH_PENALTY_MS = 60_000;
+
+/**
+ * Dispatch provider id → models.dev provider ids to search, in priority order.
+ * We only support Claude-backed providers (per product scope). `anthropic` and
+ * `opencode-anthropic` are both Claude; we try the first-party `anthropic`
+ * catalog first, then the `opencode` gateway catalog as a fallback.
+ */
+const PROVIDER_MAP: Record<string, string[]> = {
+ anthropic: ["anthropic", "opencode"],
+ "opencode-anthropic": ["anthropic", "opencode"],
+};
+
+/** In-process memoized catalog promise (one fetch/parse per TTL window). */
+let cached: { catalog: ModelsDevCatalog; fetchedAt: number } | null = null;
+let inflight: Promise<ModelsDevCatalog> | null = null;
+
+function readDiskCache(): { catalog: ModelsDevCatalog; mtimeMs: number } | null {
+ try {
+ const stat = statSync(CACHE_PATH);
+ const text = readFileSync(CACHE_PATH, "utf-8");
+ return { catalog: JSON.parse(text) as ModelsDevCatalog, mtimeMs: stat.mtimeMs };
+ } catch {
+ return null;
+ }
+}
+
+function writeDiskCache(text: string): void {
+ try {
+ mkdirSync(dirname(CACHE_PATH), { recursive: true });
+ // Write-then-rename so a concurrent reader never sees a half-written
+ // file (rename is atomic on the same filesystem). The temp name is
+ // process-scoped to avoid two writers clobbering each other's temp.
+ const tmp = `${CACHE_PATH}.${process.pid}.tmp`;
+ writeFileSync(tmp, text, "utf-8");
+ renameSync(tmp, CACHE_PATH);
+ } catch {
+ // Best-effort: a read-only /tmp shouldn't break lookups.
+ }
+}
+
+async function fetchCatalog(): Promise<ModelsDevCatalog> {
+ const controller = new AbortController();
+ const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+ try {
+ const res = await fetch(`${MODELS_URL}/api.json`, { signal: controller.signal });
+ if (!res.ok) throw new Error(`models.dev returned HTTP ${res.status}`);
+ const text = await res.text();
+ const catalog = JSON.parse(text) as ModelsDevCatalog;
+ writeDiskCache(text);
+ return catalog;
+ } finally {
+ clearTimeout(timer);
+ }
+}
+
+/**
+ * Load the models.dev catalog, preferring in-process memo, then a fresh disk
+ * cache, then a network fetch. On network failure, falls back to any stale
+ * disk cache; if nothing is available, returns an empty catalog.
+ */
+export async function getModelsCatalog(): Promise<ModelsDevCatalog> {
+ if (process.env.DISPATCH_DISABLE_MODELS_FETCH) {
+ const disk = readDiskCache();
+ return disk?.catalog ?? {};
+ }
+
+ const now = Date.now();
+ if (cached && now - cached.fetchedAt < CACHE_TTL_MS) return cached.catalog;
+
+ // Fresh disk cache satisfies the request without a network round-trip.
+ const disk = readDiskCache();
+ if (disk && now - disk.mtimeMs < CACHE_TTL_MS) {
+ // Inherit the file's mtime as `fetchedAt` so loading a disk cache into
+ // a fresh process doesn't reset its TTL (which would otherwise double
+ // the worst-case staleness across process boundaries).
+ cached = { catalog: disk.catalog, fetchedAt: disk.mtimeMs };
+ return disk.catalog;
+ }
+
+ if (!inflight) {
+ inflight = fetchCatalog()
+ .then((catalog) => {
+ cached = { catalog, fetchedAt: Date.now() };
+ return catalog;
+ })
+ .catch((err) => {
+ // Network failed — serve a stale cache if we have one.
+ console.warn(
+ `dispatch: failed to fetch models.dev catalog: ${err instanceof Error ? err.message : String(err)}`,
+ );
+ const fallback = disk?.catalog ?? ({} as ModelsDevCatalog);
+ // Memoize the fallback with a short "penalty" TTL so a sustained
+ // outage doesn't make every lookup hang on a fresh 10s timeout.
+ // `fetchedAt` is backdated so the memo expires after FETCH_PENALTY_MS.
+ cached = {
+ catalog: fallback,
+ fetchedAt: Date.now() - CACHE_TTL_MS + FETCH_PENALTY_MS,
+ };
+ return fallback;
+ })
+ .finally(() => {
+ inflight = null;
+ });
+ }
+ return inflight;
+}
+
+/**
+ * Resolve a model's maximum context window (in tokens) for the given Dispatch
+ * provider + model id. Returns `null` when the provider is unsupported, the
+ * model is unknown, or the catalog is unavailable — callers should render that
+ * as "max unknown" (no denominator / percentage).
+ */
+export async function resolveContextLimit(
+ provider: string,
+ modelId: string,
+): Promise<number | null> {
+ const candidates = PROVIDER_MAP[provider];
+ if (!candidates || !modelId) return null;
+
+ const catalog = await getModelsCatalog();
+ for (const providerId of candidates) {
+ const ctx = catalog[providerId]?.models?.[modelId]?.limit?.context;
+ if (typeof ctx === "number" && ctx > 0) return ctx;
+ }
+ return null;
+}
+
+/** Test-only: reset the in-process memo so a test can re-exercise loading. */
+export function __resetCatalogCacheForTests(): void {
+ cached = null;
+ inflight = null;
+}
diff --git a/packages/core/src/models/index.ts b/packages/core/src/models/index.ts
index cf59749..2fcd657 100644
--- a/packages/core/src/models/index.ts
+++ b/packages/core/src/models/index.ts
@@ -1 +1,5 @@
+export {
+ getModelsCatalog,
+ resolveContextLimit,
+} from "./catalog.js";
export { ModelRegistry } from "./registry.js";
diff --git a/packages/core/tests/models/catalog.test.ts b/packages/core/tests/models/catalog.test.ts
new file mode 100644
index 0000000..51043e6
--- /dev/null
+++ b/packages/core/tests/models/catalog.test.ts
@@ -0,0 +1,158 @@
+import { existsSync, rmSync, utimesSync, writeFileSync } from "node:fs";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+ __resetCatalogCacheForTests,
+ getModelsCatalog,
+ resolveContextLimit,
+} from "../../src/models/catalog.js";
+
+const CACHE_PATH = "/tmp/dispatch/models-dev.json";
+
+// A trimmed models.dev-shaped catalog covering the providers we support.
+const CATALOG = {
+ anthropic: {
+ id: "anthropic",
+ models: {
+ "claude-sonnet-4-5": { limit: { context: 200000, output: 64000 } },
+ "claude-sonnet-4-6": { limit: { context: 1000000, output: 64000 } },
+ },
+ },
+ opencode: {
+ id: "opencode",
+ models: {
+ "glm-4-6": { limit: { context: 131072, output: 8192 } },
+ },
+ },
+};
+
+function mockFetchOnce(catalog: unknown, ok = true, status = 200) {
+ const fn = vi.fn(() =>
+ Promise.resolve({
+ ok,
+ status,
+ text: () => Promise.resolve(JSON.stringify(catalog)),
+ } as Response),
+ );
+ vi.stubGlobal("fetch", fn);
+ return fn;
+}
+
+beforeEach(() => {
+ __resetCatalogCacheForTests();
+ if (existsSync(CACHE_PATH)) rmSync(CACHE_PATH);
+ delete process.env.DISPATCH_DISABLE_MODELS_FETCH;
+});
+
+afterEach(() => {
+ vi.unstubAllGlobals();
+ if (existsSync(CACHE_PATH)) rmSync(CACHE_PATH);
+});
+
+describe("resolveContextLimit", () => {
+ it("resolves a known anthropic model to its context window", async () => {
+ mockFetchOnce(CATALOG);
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000);
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-6")).toBe(1000000);
+ });
+
+ it("maps opencode-anthropic to the anthropic catalog, then opencode fallback", async () => {
+ mockFetchOnce(CATALOG);
+ // Present in the anthropic catalog.
+ expect(await resolveContextLimit("opencode-anthropic", "claude-sonnet-4-5")).toBe(200000);
+ // Absent in anthropic, found in the opencode gateway catalog.
+ expect(await resolveContextLimit("opencode-anthropic", "glm-4-6")).toBe(131072);
+ });
+
+ it("returns null for an unknown model id", async () => {
+ mockFetchOnce(CATALOG);
+ expect(await resolveContextLimit("anthropic", "no-such-model")).toBeNull();
+ });
+
+ it("returns null for an unsupported provider (no network needed)", async () => {
+ const fetchFn = mockFetchOnce(CATALOG);
+ expect(await resolveContextLimit("google", "gemini-2.5-pro")).toBeNull();
+ expect(await resolveContextLimit("anthropic", "")).toBeNull();
+ expect(fetchFn).not.toHaveBeenCalled();
+ });
+
+ it("returns null when the model has no positive context limit", async () => {
+ mockFetchOnce({
+ anthropic: { id: "anthropic", models: { broken: { limit: { context: 0 } } } },
+ });
+ expect(await resolveContextLimit("anthropic", "broken")).toBeNull();
+ });
+
+ it("does not throw on a malformed provider entry missing `models`", async () => {
+ // A provider object without a `models` map must degrade to null, not crash.
+ mockFetchOnce({ anthropic: { id: "anthropic" } });
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+ });
+
+ it("does not throw when limit/context fields are absent", async () => {
+ mockFetchOnce({ anthropic: { id: "anthropic", models: { m: {} } } });
+ expect(await resolveContextLimit("anthropic", "m")).toBeNull();
+ });
+});
+
+describe("getModelsCatalog caching", () => {
+ it("fetches once and serves the in-process memo on subsequent calls", async () => {
+ const fetchFn = mockFetchOnce(CATALOG);
+ await resolveContextLimit("anthropic", "claude-sonnet-4-5");
+ await resolveContextLimit("anthropic", "claude-sonnet-4-6");
+ await getModelsCatalog();
+ expect(fetchFn).toHaveBeenCalledTimes(1);
+ });
+
+ it("reuses a fresh disk cache without re-fetching across processes", async () => {
+ // Simulate another process having written a fresh cache.
+ writeFileSync(CACHE_PATH, JSON.stringify(CATALOG), "utf-8");
+ const fetchFn = vi.fn(() => Promise.reject(new Error("network should not be hit")));
+ vi.stubGlobal("fetch", fetchFn);
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000);
+ expect(fetchFn).not.toHaveBeenCalled();
+ });
+
+ it("falls back to a STALE disk cache when the network fails", async () => {
+ writeFileSync(CACHE_PATH, JSON.stringify(CATALOG), "utf-8");
+ // Age the cache well past the TTL so the fetch path is taken.
+ const old = Date.now() / 1000 - 3600;
+ utimesSync(CACHE_PATH, old, old);
+ const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+ vi.stubGlobal("fetch", fetchFn);
+ const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000);
+ expect(fetchFn).toHaveBeenCalledTimes(1);
+ warn.mockRestore();
+ });
+
+ it("returns null when fetch fails and no cache exists", async () => {
+ const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+ vi.stubGlobal("fetch", fetchFn);
+ const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+ warn.mockRestore();
+ });
+
+ it("does not hit the network when DISPATCH_DISABLE_MODELS_FETCH is set", async () => {
+ process.env.DISPATCH_DISABLE_MODELS_FETCH = "1";
+ const fetchFn = vi.fn(() => Promise.reject(new Error("should not fetch")));
+ vi.stubGlobal("fetch", fetchFn);
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+ expect(fetchFn).not.toHaveBeenCalled();
+ });
+
+ it("memoizes the fallback after a failed fetch so it does not re-hit the network", async () => {
+ const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+ vi.stubGlobal("fetch", fetchFn);
+ const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+ // First lookup triggers the (failing) fetch.
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull();
+ // Subsequent lookups within the penalty window must NOT re-fetch.
+ expect(await resolveContextLimit("anthropic", "claude-sonnet-4-6")).toBeNull();
+ await getModelsCatalog();
+ expect(fetchFn).toHaveBeenCalledTimes(1);
+ warn.mockRestore();
+ });
+});
diff --git a/packages/frontend/src/App.svelte b/packages/frontend/src/App.svelte
index eaa28e8..0344af4 100644
--- a/packages/frontend/src/App.svelte
+++ b/packages/frontend/src/App.svelte
@@ -74,6 +74,62 @@ $effect(() => {
}
});
+// ─── Context-window max lookup ─────────────────────────────────
+// Resolve the active model's MAXIMUM context window from models.dev (via the
+// API), so the Context Window sidebar view can show `current / max`. Cached
+// per provider+model; `null` when unknown (the view then hides the
+// denominator/percentage). Only Claude-backed providers are resolvable.
+let contextLimit = $state<number | null>(null);
+const contextLimitCache = new Map<string, number | null>();
+
+$effect(() => {
+ const tab = tabStore.activeTab;
+ const keyId = tab?.keyId ?? null;
+ const modelId = tab?.modelId ?? null;
+ const provider = keyId ? (modelsData.keys.find((k) => k.id === keyId)?.provider ?? null) : null;
+
+ if (!provider || !modelId) {
+ contextLimit = null;
+ return;
+ }
+
+ const cacheKey = `${provider}/${modelId}`;
+ if (contextLimitCache.has(cacheKey)) {
+ contextLimit = contextLimitCache.get(cacheKey) ?? null;
+ return;
+ }
+
+ // Clear immediately so a slow/failed fetch can't leave the PREVIOUS
+ // model's max on screen (which would render this model's tokens against
+ // the wrong denominator). The view degrades to a bare token count until
+ // the fetch resolves.
+ contextLimit = null;
+
+ // Fetch is async; guard against a stale response overwriting a newer
+ // selection by re-checking the active tab's key/model on resolve.
+ void (async () => {
+ try {
+ const res = await fetch(
+ `${config.apiBase}/models/context-limit?provider=${encodeURIComponent(provider)}&modelId=${encodeURIComponent(modelId)}`,
+ );
+ if (!res.ok) return;
+ const data = (await res.json()) as { contextLimit?: number | null };
+ const limit = data.contextLimit ?? null;
+ contextLimitCache.set(cacheKey, limit);
+ const current = tabStore.activeTab;
+ const currentProvider = current?.keyId
+ ? (modelsData.keys.find((k) => k.id === current.keyId)?.provider ?? null)
+ : null;
+ if (currentProvider === provider && current?.modelId === modelId) {
+ contextLimit = limit;
+ }
+ } catch {
+ // Leave contextLimit as-is on network error; view falls back to
+ // showing the bare token count.
+ }
+ })();
+});
+
onMount(() => {
// Apply persisted theme (or the shared DEFAULT_THEME if nothing is
// stored) so the first paint matches what the Settings panel will
@@ -137,6 +193,7 @@ onMount(() => {
tasks={tabStore.activeTab?.tasks ?? []}
cacheStats={tabStore.activeTab?.cacheStats ?? null}
cacheTabTitle={tabStore.activeTab?.title ?? null}
+ {contextLimit}
permissionLog={tabStore.permissionLog}
apiBase={config.apiBase}
activeKeyId={tabStore.activeTab?.keyId ?? null}
diff --git a/packages/frontend/src/lib/components/ContextWindowPanel.svelte b/packages/frontend/src/lib/components/ContextWindowPanel.svelte
new file mode 100644
index 0000000..6c7de05
--- /dev/null
+++ b/packages/frontend/src/lib/components/ContextWindowPanel.svelte
@@ -0,0 +1,85 @@
+<script lang="ts">
+import { computeContextUsage } from "../context-window.js";
+import type { CacheStats } from "../types.js";
+
+const {
+ cacheStats = null,
+ contextLimit = null,
+ tabTitle = null,
+ modelId = null,
+}: {
+ cacheStats?: CacheStats | null;
+ contextLimit?: number | null;
+ tabTitle?: string | null;
+ modelId?: string | null;
+} = $props();
+
+const usage = $derived(computeContextUsage(cacheStats, contextLimit));
+
+// As the window fills, escalate color: calm → warning → danger.
+function fillClass(pct: number): string {
+ if (pct >= 90) return "progress-error";
+ if (pct >= 70) return "progress-warning";
+ return "progress-success";
+}
+
+function fmt(n: number): string {
+ return n.toLocaleString();
+}
+
+const hasUsage = $derived((cacheStats?.last ?? null) !== null);
+</script>
+
+<div class="flex flex-col gap-3 flex-1 min-h-0 overflow-y-auto">
+ {#if !hasUsage}
+ <p class="text-xs text-base-content/50">
+ No context data yet. Send a message — the current context size appears
+ here after the first response.
+ </p>
+ {:else}
+ <div class="bg-base-200 rounded-lg p-2">
+ <div class="flex items-center gap-1.5 mb-2">
+ <span class="text-xs font-semibold">Context Window</span>
+ {#if tabTitle}
+ <span class="badge badge-xs badge-ghost">{tabTitle}</span>
+ {/if}
+ {#if usage.percent !== null}
+ <span class="badge badge-xs ml-auto">{usage.percent.toFixed(2)}%</span>
+ {/if}
+ </div>
+
+ <!-- Headline: current / max (or just current when max is unknown) -->
+ <div class="flex items-baseline gap-1.5">
+ <span class="text-lg font-mono font-semibold">{fmt(usage.current)}</span>
+ {#if usage.max !== null}
+ <span class="text-xs text-base-content/50 font-mono">/ {fmt(usage.max)}</span>
+ {/if}
+ <span class="text-xs text-base-content/40 ml-1">tokens</span>
+ </div>
+
+ {#if usage.percent !== null}
+ <progress
+ class="progress w-full h-2 mt-1.5 {fillClass(usage.percent)}"
+ value={usage.percent}
+ max="100"
+ ></progress>
+ {:else}
+ <p class="text-xs text-base-content/40 mt-1.5">
+ Max context size unknown for this model.
+ </p>
+ {/if}
+
+ {#if modelId}
+ <div class="text-xs text-base-content/40 mt-1.5 truncate" title={modelId}>
+ {modelId}
+ </div>
+ {/if}
+ </div>
+
+ <p class="text-xs text-base-content/40">
+ Current context = the most recent request's prompt + output (what the
+ model actually held in its window that turn). Grows as the conversation
+ gets longer. Resets on reload.
+ </p>
+ {/if}
+</div>
diff --git a/packages/frontend/src/lib/components/SidebarPanel.svelte b/packages/frontend/src/lib/components/SidebarPanel.svelte
index 491b1bd..573a6fc 100644
--- a/packages/frontend/src/lib/components/SidebarPanel.svelte
+++ b/packages/frontend/src/lib/components/SidebarPanel.svelte
@@ -4,6 +4,7 @@ import type { CacheStats, KeyInfo, LogEntry, TaskItem } from "../types.js";
import CacheRatePanel from "./CacheRatePanel.svelte";
import ClaudeReset from "./ClaudeReset.svelte";
import ConfigPanel from "./ConfigPanel.svelte";
+import ContextWindowPanel from "./ContextWindowPanel.svelte";
import DebugPanel from "./DebugPanel.svelte";
import KeyUsage from "./KeyUsage.svelte";
import ModelSelector from "./ModelSelector.svelte";
@@ -27,6 +28,7 @@ const {
tasks = [],
cacheStats = null,
cacheTabTitle = null,
+ contextLimit = null,
permissionLog = [],
apiBase = "",
activeKeyId = null,
@@ -47,6 +49,7 @@ const {
tasks?: TaskItem[];
cacheStats?: CacheStats | null;
cacheTabTitle?: string | null;
+ contextLimit?: number | null;
permissionLog?: LogEntry[];
apiBase?: string;
activeKeyId?: string | null;
@@ -89,6 +92,7 @@ const viewOptions = [
"Chat Settings",
"Key Usage",
"Cache Rate",
+ "Context Window",
"Claude Reset",
"Model Status",
"Tasks",
@@ -170,6 +174,13 @@ function contentClass(_selected: string): string {
<KeyUsage {keys} {apiBase} />
{:else if panel.selected === "Cache Rate"}
<CacheRatePanel {cacheStats} tabTitle={cacheTabTitle} />
+ {:else if panel.selected === "Context Window"}
+ <ContextWindowPanel
+ {cacheStats}
+ {contextLimit}
+ tabTitle={cacheTabTitle}
+ modelId={activeModelId}
+ />
{:else if panel.selected === "Claude Reset"}
<ClaudeReset {apiBase} />
{:else if panel.selected === "Model Status"}
diff --git a/packages/frontend/src/lib/context-window.ts b/packages/frontend/src/lib/context-window.ts
new file mode 100644
index 0000000..c4321f8
--- /dev/null
+++ b/packages/frontend/src/lib/context-window.ts
@@ -0,0 +1,37 @@
+import type { CacheStats } from "./types.js";
+
+/**
+ * Context-window occupancy for the current tab/model.
+ *
+ * `current` is the size of the model's context on the MOST RECENT request —
+ * the last turn's full prompt (`inputTokens`, which already includes cached
+ * tokens for Anthropic) plus what the model generated that turn
+ * (`outputTokens`). This mirrors how opencode derives context fullness from
+ * the last assistant message, and reflects what actually occupies the model's
+ * window — NOT the session-cumulative totals shown by the Cache Rate view.
+ *
+ * `max` is the model's maximum context window from models.dev (or `null` when
+ * unknown). `percent` is `current / max * 100` clamped to [0, 100] (unrounded;
+ * the UI decides the displayed precision), or `null` when
+ * `max` is unknown — in which case the UI shows the bare token count with no
+ * denominator or progress bar.
+ */
+export interface ContextUsage {
+ current: number;
+ max: number | null;
+ percent: number | null;
+}
+
+export function computeContextUsage(
+ cacheStats: CacheStats | null | undefined,
+ contextLimit: number | null | undefined,
+): ContextUsage {
+ const last = cacheStats?.last ?? null;
+ const current = last ? last.inputTokens + last.outputTokens : 0;
+ const max = typeof contextLimit === "number" && contextLimit > 0 ? contextLimit : null;
+ // Precise (unrounded) percentage clamped to [0, 100]; the UI formats the
+ // decimal places. Kept unrounded so small contexts against huge windows
+ // (e.g. a few thousand tokens vs. 1,000,000) still read non-zero.
+ const percent = max ? Math.max(0, Math.min(100, (current / max) * 100)) : null;
+ return { current, max, percent };
+}
diff --git a/packages/frontend/tests/context-window.test.ts b/packages/frontend/tests/context-window.test.ts
new file mode 100644
index 0000000..bb64ed5
--- /dev/null
+++ b/packages/frontend/tests/context-window.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, it } from "vitest";
+import { computeContextUsage } from "../src/lib/context-window.js";
+import type { CacheStats } from "../src/lib/types.js";
+
+function stats(last: CacheStats["last"]): CacheStats {
+ return {
+ inputTokens: 0,
+ outputTokens: 0,
+ cacheReadTokens: 0,
+ cacheWriteTokens: 0,
+ requests: last ? 1 : 0,
+ last,
+ };
+}
+
+describe("computeContextUsage", () => {
+ it("derives current context from the LAST request's input + output", () => {
+ const usage = computeContextUsage(
+ stats({
+ inputTokens: 47000,
+ outputTokens: 1200,
+ cacheReadTokens: 40000,
+ cacheWriteTokens: 0,
+ }),
+ 200000,
+ );
+ // 47000 + 1200 — NOT the cumulative totals, and cache tokens are already
+ // inside inputTokens (not re-added).
+ expect(usage.current).toBe(48200);
+ expect(usage.max).toBe(200000);
+ expect(usage.percent).toBeCloseTo(24.1, 5); // 48200 / 200000 * 100, unrounded
+ });
+
+ it("returns max=null and percent=null when the limit is unknown", () => {
+ const usage = computeContextUsage(
+ stats({ inputTokens: 100, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+ null,
+ );
+ expect(usage.current).toBe(100);
+ expect(usage.max).toBeNull();
+ expect(usage.percent).toBeNull();
+ });
+
+ it("treats a non-positive limit as unknown", () => {
+ const usage = computeContextUsage(
+ stats({ inputTokens: 100, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+ 0,
+ );
+ expect(usage.max).toBeNull();
+ expect(usage.percent).toBeNull();
+ });
+
+ it("reports zero usage when no request has completed yet", () => {
+ expect(computeContextUsage(null, 200000)).toEqual({
+ current: 0,
+ max: 200000,
+ percent: 0,
+ });
+ expect(computeContextUsage(stats(null), 200000)).toEqual({
+ current: 0,
+ max: 200000,
+ percent: 0,
+ });
+ });
+
+ it("clamps percent to 100 when context overflows the window", () => {
+ const usage = computeContextUsage(
+ stats({ inputTokens: 250000, outputTokens: 5000, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+ 200000,
+ );
+ expect(usage.current).toBe(255000);
+ expect(usage.percent).toBe(100);
+ });
+
+ it("keeps an unrounded percent so the UI can show 2 decimals", () => {
+ const usage = computeContextUsage(
+ stats({ inputTokens: 3690, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }),
+ 1000000,
+ );
+ // 3690 / 1,000,000 * 100 = 0.369 → displayed as "0.37%" (toFixed(2)).
+ expect(usage.percent).toBeCloseTo(0.369, 6);
+ expect((usage.percent as number).toFixed(2)).toBe("0.37");
+ });
+});