diff options
| author | Adam Malczewski <[email protected]> | 2026-06-02 13:25:23 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-02 13:25:23 +0900 |
| commit | 6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02 (patch) | |
| tree | 78b30dedd471ab76177b3631a956ab160615e303 | |
| parent | 3f629a8469fe483243671e1ca15582a111e96541 (diff) | |
| download | dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.tar.gz dispatch-6433cc42de1ceca7210e2b64ad3b98b3a5ce7d02.zip | |
feat(context-window): show current/max context usage per tab/model
Add a 'Context Window' sidebar view showing the live context occupancy
(latest request's input+output) against the model's maximum context
window, resolved dynamically from the models.dev catalog.
- core: models.dev catalog module (resolveContextLimit) with disk cache,
TTL, stale-fallback + offline penalty memo; null for unknown models.
- api: GET /models/context-limit?provider=&modelId=.
- frontend: ContextWindowPanel + computeContextUsage helper; App resolves
+ caches the active model's max (anthropic/opencode-anthropic only);
percent shown to 2 decimals; degrades to bare token count when max
unknown.
- tests: core catalog (13), api route (3), frontend helper (6).
| -rw-r--r-- | packages/api/src/routes/models.ts | 16 | ||||
| -rw-r--r-- | packages/api/tests/routes.test.ts | 32 | ||||
| -rw-r--r-- | packages/core/src/index.ts | 6 | ||||
| -rw-r--r-- | packages/core/src/models/catalog.ts | 179 | ||||
| -rw-r--r-- | packages/core/src/models/index.ts | 4 | ||||
| -rw-r--r-- | packages/core/tests/models/catalog.test.ts | 158 | ||||
| -rw-r--r-- | packages/frontend/src/App.svelte | 57 | ||||
| -rw-r--r-- | packages/frontend/src/lib/components/ContextWindowPanel.svelte | 85 | ||||
| -rw-r--r-- | packages/frontend/src/lib/components/SidebarPanel.svelte | 11 | ||||
| -rw-r--r-- | packages/frontend/src/lib/context-window.ts | 37 | ||||
| -rw-r--r-- | packages/frontend/tests/context-window.test.ts | 84 |
11 files changed, 668 insertions, 1 deletions
diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts index 03c079a..6a0f5dc 100644 --- a/packages/api/src/routes/models.ts +++ b/packages/api/src/routes/models.ts @@ -17,6 +17,7 @@ import { listStoredCredentials, refreshAccountCredentialsAsync, resolveApiKey, + resolveContextLimit, setApiKey, validateAccountCredentials, } from "@dispatch/core"; @@ -161,6 +162,21 @@ modelsRoutes.get("/available", async (c) => { return c.json({ models }); }); +// Resolve a model's MAXIMUM context window (in tokens) from the models.dev +// catalog. Returns `{ contextLimit: number | null }`; `null` means the model's +// limit is unknown (unsupported provider, unknown model, or catalog offline), +// which the frontend renders without a denominator/percentage. +modelsRoutes.get("/context-limit", async (c) => { + const provider = c.req.query("provider"); + const modelId = c.req.query("modelId"); + if (!provider || !modelId) { + return c.json({ error: "provider and modelId query parameters are required" }, 400); + } + + const contextLimit = await resolveContextLimit(provider, modelId); + return c.json({ contextLimit }); +}); + // List available Claude accounts with validated credentials modelsRoutes.get("/claude-accounts", async (c) => { const candidates = resolveClaudeAccounts(); diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts index c768cee..e4b8f0f 100644 --- a/packages/api/tests/routes.test.ts +++ b/packages/api/tests/routes.test.ts @@ -268,6 +268,13 @@ vi.mock("@dispatch/core", () => ({ execute: async () => "mock", }; }, + // ── models.dev context-limit stub ───────────────────────────── + resolveContextLimit(provider: string, modelId: string) { + if (provider === "anthropic" && modelId === "claude-sonnet-4-5") { + return Promise.resolve(200000); + } + return Promise.resolve(null); + }, // ── ntfy notifications stubs ────────────────────────────────── NotificationDispatcher: class MockNotificationDispatcher { attachToAgentManager() { @@ -751,3 +758,28 @@ describe("Wake schedule routes", () => { expect(body.schedule["13"]).toBeUndefined(); }); }); + +describe("GET /models/context-limit", () => { + it("returns the resolved context limit for a known model", async () => { + const res = await app.request( + "/models/context-limit?provider=anthropic&modelId=claude-sonnet-4-5", + ); + expect(res.status).toBe(200); + const body = (await res.json()) as { contextLimit: number | null }; + expect(body.contextLimit).toBe(200000); + }); + + it("returns null contextLimit for an unknown model", async () => { + const res = await app.request("/models/context-limit?provider=anthropic&modelId=mystery"); + expect(res.status).toBe(200); + const body = (await res.json()) as { contextLimit: number | null }; + expect(body.contextLimit).toBeNull(); + }); + + it("400s when provider or modelId is missing", async () => { + const res1 = await app.request("/models/context-limit?provider=anthropic"); + expect(res1.status).toBe(400); + const res2 = await app.request("/models/context-limit?modelId=claude-sonnet-4-5"); + expect(res2.status).toBe(400); + }); +}); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 327b0a5..9d7133f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -67,7 +67,11 @@ export { } from "./llm/debug-logger.js"; export { createProvider } from "./llm/provider.js"; // Models -export { ModelRegistry } from "./models/index.js"; +export { + getModelsCatalog, + ModelRegistry, + resolveContextLimit, +} from "./models/index.js"; // Notifications (ntfy.sh) export * from "./notifications/index.js"; export * from "./permission/index.js"; diff --git a/packages/core/src/models/catalog.ts b/packages/core/src/models/catalog.ts new file mode 100644 index 0000000..dea4647 --- /dev/null +++ b/packages/core/src/models/catalog.ts @@ -0,0 +1,179 @@ +import { mkdirSync, readFileSync, renameSync, statSync, writeFileSync } from "node:fs"; +import { dirname } from "node:path"; + +/** + * models.dev-backed model catalog. Resolves a model's MAXIMUM context window + * (`limit.context`) dynamically from the public models.dev API, mirroring how + * opencode determines per-model context limits — no hardcoded table. + * + * The catalog is fetched once, cached on disk with a short TTL, and reused. On + * fetch failure we fall back to a stale-but-present cache so the lookup keeps + * working offline. Lookups never throw: an unknown/unreachable model resolves + * to `null`, which the UI renders as "max unknown". + */ + +/** Shape of the slice of models.dev's `/api.json` we consume. */ +interface ModelsDevModel { + limit?: { + context?: number; + output?: number; + }; +} + +interface ModelsDevProvider { + id: string; + models: Record<string, ModelsDevModel | undefined>; +} + +type ModelsDevCatalog = Record<string, ModelsDevProvider | undefined>; + +/** Where models.dev's API lives. Overridable for tests / private mirrors. */ +const MODELS_URL = process.env.DISPATCH_MODELS_URL || "https://models.dev"; + +/** Disk cache path (reuses the repo's `/tmp/dispatch` convention). */ +const CACHE_PATH = "/tmp/dispatch/models-dev.json"; + +/** How long a cached catalog stays fresh before we re-fetch. */ +const CACHE_TTL_MS = 5 * 60 * 1000; + +/** Network timeout for the catalog fetch. */ +const FETCH_TIMEOUT_MS = 10_000; + +/** + * After a failed fetch we memoize the fallback for this long before retrying, + * so a sustained outage doesn't make every lookup hang on a fresh timeout. + */ +const FETCH_PENALTY_MS = 60_000; + +/** + * Dispatch provider id → models.dev provider ids to search, in priority order. + * We only support Claude-backed providers (per product scope). `anthropic` and + * `opencode-anthropic` are both Claude; we try the first-party `anthropic` + * catalog first, then the `opencode` gateway catalog as a fallback. + */ +const PROVIDER_MAP: Record<string, string[]> = { + anthropic: ["anthropic", "opencode"], + "opencode-anthropic": ["anthropic", "opencode"], +}; + +/** In-process memoized catalog promise (one fetch/parse per TTL window). */ +let cached: { catalog: ModelsDevCatalog; fetchedAt: number } | null = null; +let inflight: Promise<ModelsDevCatalog> | null = null; + +function readDiskCache(): { catalog: ModelsDevCatalog; mtimeMs: number } | null { + try { + const stat = statSync(CACHE_PATH); + const text = readFileSync(CACHE_PATH, "utf-8"); + return { catalog: JSON.parse(text) as ModelsDevCatalog, mtimeMs: stat.mtimeMs }; + } catch { + return null; + } +} + +function writeDiskCache(text: string): void { + try { + mkdirSync(dirname(CACHE_PATH), { recursive: true }); + // Write-then-rename so a concurrent reader never sees a half-written + // file (rename is atomic on the same filesystem). The temp name is + // process-scoped to avoid two writers clobbering each other's temp. + const tmp = `${CACHE_PATH}.${process.pid}.tmp`; + writeFileSync(tmp, text, "utf-8"); + renameSync(tmp, CACHE_PATH); + } catch { + // Best-effort: a read-only /tmp shouldn't break lookups. + } +} + +async function fetchCatalog(): Promise<ModelsDevCatalog> { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + try { + const res = await fetch(`${MODELS_URL}/api.json`, { signal: controller.signal }); + if (!res.ok) throw new Error(`models.dev returned HTTP ${res.status}`); + const text = await res.text(); + const catalog = JSON.parse(text) as ModelsDevCatalog; + writeDiskCache(text); + return catalog; + } finally { + clearTimeout(timer); + } +} + +/** + * Load the models.dev catalog, preferring in-process memo, then a fresh disk + * cache, then a network fetch. On network failure, falls back to any stale + * disk cache; if nothing is available, returns an empty catalog. + */ +export async function getModelsCatalog(): Promise<ModelsDevCatalog> { + if (process.env.DISPATCH_DISABLE_MODELS_FETCH) { + const disk = readDiskCache(); + return disk?.catalog ?? {}; + } + + const now = Date.now(); + if (cached && now - cached.fetchedAt < CACHE_TTL_MS) return cached.catalog; + + // Fresh disk cache satisfies the request without a network round-trip. + const disk = readDiskCache(); + if (disk && now - disk.mtimeMs < CACHE_TTL_MS) { + // Inherit the file's mtime as `fetchedAt` so loading a disk cache into + // a fresh process doesn't reset its TTL (which would otherwise double + // the worst-case staleness across process boundaries). + cached = { catalog: disk.catalog, fetchedAt: disk.mtimeMs }; + return disk.catalog; + } + + if (!inflight) { + inflight = fetchCatalog() + .then((catalog) => { + cached = { catalog, fetchedAt: Date.now() }; + return catalog; + }) + .catch((err) => { + // Network failed — serve a stale cache if we have one. + console.warn( + `dispatch: failed to fetch models.dev catalog: ${err instanceof Error ? err.message : String(err)}`, + ); + const fallback = disk?.catalog ?? ({} as ModelsDevCatalog); + // Memoize the fallback with a short "penalty" TTL so a sustained + // outage doesn't make every lookup hang on a fresh 10s timeout. + // `fetchedAt` is backdated so the memo expires after FETCH_PENALTY_MS. + cached = { + catalog: fallback, + fetchedAt: Date.now() - CACHE_TTL_MS + FETCH_PENALTY_MS, + }; + return fallback; + }) + .finally(() => { + inflight = null; + }); + } + return inflight; +} + +/** + * Resolve a model's maximum context window (in tokens) for the given Dispatch + * provider + model id. Returns `null` when the provider is unsupported, the + * model is unknown, or the catalog is unavailable — callers should render that + * as "max unknown" (no denominator / percentage). + */ +export async function resolveContextLimit( + provider: string, + modelId: string, +): Promise<number | null> { + const candidates = PROVIDER_MAP[provider]; + if (!candidates || !modelId) return null; + + const catalog = await getModelsCatalog(); + for (const providerId of candidates) { + const ctx = catalog[providerId]?.models?.[modelId]?.limit?.context; + if (typeof ctx === "number" && ctx > 0) return ctx; + } + return null; +} + +/** Test-only: reset the in-process memo so a test can re-exercise loading. */ +export function __resetCatalogCacheForTests(): void { + cached = null; + inflight = null; +} diff --git a/packages/core/src/models/index.ts b/packages/core/src/models/index.ts index cf59749..2fcd657 100644 --- a/packages/core/src/models/index.ts +++ b/packages/core/src/models/index.ts @@ -1 +1,5 @@ +export { + getModelsCatalog, + resolveContextLimit, +} from "./catalog.js"; export { ModelRegistry } from "./registry.js"; diff --git a/packages/core/tests/models/catalog.test.ts b/packages/core/tests/models/catalog.test.ts new file mode 100644 index 0000000..51043e6 --- /dev/null +++ b/packages/core/tests/models/catalog.test.ts @@ -0,0 +1,158 @@ +import { existsSync, rmSync, utimesSync, writeFileSync } from "node:fs"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + __resetCatalogCacheForTests, + getModelsCatalog, + resolveContextLimit, +} from "../../src/models/catalog.js"; + +const CACHE_PATH = "/tmp/dispatch/models-dev.json"; + +// A trimmed models.dev-shaped catalog covering the providers we support. +const CATALOG = { + anthropic: { + id: "anthropic", + models: { + "claude-sonnet-4-5": { limit: { context: 200000, output: 64000 } }, + "claude-sonnet-4-6": { limit: { context: 1000000, output: 64000 } }, + }, + }, + opencode: { + id: "opencode", + models: { + "glm-4-6": { limit: { context: 131072, output: 8192 } }, + }, + }, +}; + +function mockFetchOnce(catalog: unknown, ok = true, status = 200) { + const fn = vi.fn(() => + Promise.resolve({ + ok, + status, + text: () => Promise.resolve(JSON.stringify(catalog)), + } as Response), + ); + vi.stubGlobal("fetch", fn); + return fn; +} + +beforeEach(() => { + __resetCatalogCacheForTests(); + if (existsSync(CACHE_PATH)) rmSync(CACHE_PATH); + delete process.env.DISPATCH_DISABLE_MODELS_FETCH; +}); + +afterEach(() => { + vi.unstubAllGlobals(); + if (existsSync(CACHE_PATH)) rmSync(CACHE_PATH); +}); + +describe("resolveContextLimit", () => { + it("resolves a known anthropic model to its context window", async () => { + mockFetchOnce(CATALOG); + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000); + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-6")).toBe(1000000); + }); + + it("maps opencode-anthropic to the anthropic catalog, then opencode fallback", async () => { + mockFetchOnce(CATALOG); + // Present in the anthropic catalog. + expect(await resolveContextLimit("opencode-anthropic", "claude-sonnet-4-5")).toBe(200000); + // Absent in anthropic, found in the opencode gateway catalog. + expect(await resolveContextLimit("opencode-anthropic", "glm-4-6")).toBe(131072); + }); + + it("returns null for an unknown model id", async () => { + mockFetchOnce(CATALOG); + expect(await resolveContextLimit("anthropic", "no-such-model")).toBeNull(); + }); + + it("returns null for an unsupported provider (no network needed)", async () => { + const fetchFn = mockFetchOnce(CATALOG); + expect(await resolveContextLimit("google", "gemini-2.5-pro")).toBeNull(); + expect(await resolveContextLimit("anthropic", "")).toBeNull(); + expect(fetchFn).not.toHaveBeenCalled(); + }); + + it("returns null when the model has no positive context limit", async () => { + mockFetchOnce({ + anthropic: { id: "anthropic", models: { broken: { limit: { context: 0 } } } }, + }); + expect(await resolveContextLimit("anthropic", "broken")).toBeNull(); + }); + + it("does not throw on a malformed provider entry missing `models`", async () => { + // A provider object without a `models` map must degrade to null, not crash. + mockFetchOnce({ anthropic: { id: "anthropic" } }); + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull(); + }); + + it("does not throw when limit/context fields are absent", async () => { + mockFetchOnce({ anthropic: { id: "anthropic", models: { m: {} } } }); + expect(await resolveContextLimit("anthropic", "m")).toBeNull(); + }); +}); + +describe("getModelsCatalog caching", () => { + it("fetches once and serves the in-process memo on subsequent calls", async () => { + const fetchFn = mockFetchOnce(CATALOG); + await resolveContextLimit("anthropic", "claude-sonnet-4-5"); + await resolveContextLimit("anthropic", "claude-sonnet-4-6"); + await getModelsCatalog(); + expect(fetchFn).toHaveBeenCalledTimes(1); + }); + + it("reuses a fresh disk cache without re-fetching across processes", async () => { + // Simulate another process having written a fresh cache. + writeFileSync(CACHE_PATH, JSON.stringify(CATALOG), "utf-8"); + const fetchFn = vi.fn(() => Promise.reject(new Error("network should not be hit"))); + vi.stubGlobal("fetch", fetchFn); + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000); + expect(fetchFn).not.toHaveBeenCalled(); + }); + + it("falls back to a STALE disk cache when the network fails", async () => { + writeFileSync(CACHE_PATH, JSON.stringify(CATALOG), "utf-8"); + // Age the cache well past the TTL so the fetch path is taken. + const old = Date.now() / 1000 - 3600; + utimesSync(CACHE_PATH, old, old); + const fetchFn = vi.fn(() => Promise.reject(new Error("offline"))); + vi.stubGlobal("fetch", fetchFn); + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBe(200000); + expect(fetchFn).toHaveBeenCalledTimes(1); + warn.mockRestore(); + }); + + it("returns null when fetch fails and no cache exists", async () => { + const fetchFn = vi.fn(() => Promise.reject(new Error("offline"))); + vi.stubGlobal("fetch", fetchFn); + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull(); + warn.mockRestore(); + }); + + it("does not hit the network when DISPATCH_DISABLE_MODELS_FETCH is set", async () => { + process.env.DISPATCH_DISABLE_MODELS_FETCH = "1"; + const fetchFn = vi.fn(() => Promise.reject(new Error("should not fetch"))); + vi.stubGlobal("fetch", fetchFn); + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull(); + expect(fetchFn).not.toHaveBeenCalled(); + }); + + it("memoizes the fallback after a failed fetch so it does not re-hit the network", async () => { + const fetchFn = vi.fn(() => Promise.reject(new Error("offline"))); + vi.stubGlobal("fetch", fetchFn); + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + + // First lookup triggers the (failing) fetch. + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-5")).toBeNull(); + // Subsequent lookups within the penalty window must NOT re-fetch. + expect(await resolveContextLimit("anthropic", "claude-sonnet-4-6")).toBeNull(); + await getModelsCatalog(); + expect(fetchFn).toHaveBeenCalledTimes(1); + warn.mockRestore(); + }); +}); diff --git a/packages/frontend/src/App.svelte b/packages/frontend/src/App.svelte index eaa28e8..0344af4 100644 --- a/packages/frontend/src/App.svelte +++ b/packages/frontend/src/App.svelte @@ -74,6 +74,62 @@ $effect(() => { } }); +// ─── Context-window max lookup ───────────────────────────────── +// Resolve the active model's MAXIMUM context window from models.dev (via the +// API), so the Context Window sidebar view can show `current / max`. Cached +// per provider+model; `null` when unknown (the view then hides the +// denominator/percentage). Only Claude-backed providers are resolvable. +let contextLimit = $state<number | null>(null); +const contextLimitCache = new Map<string, number | null>(); + +$effect(() => { + const tab = tabStore.activeTab; + const keyId = tab?.keyId ?? null; + const modelId = tab?.modelId ?? null; + const provider = keyId ? (modelsData.keys.find((k) => k.id === keyId)?.provider ?? null) : null; + + if (!provider || !modelId) { + contextLimit = null; + return; + } + + const cacheKey = `${provider}/${modelId}`; + if (contextLimitCache.has(cacheKey)) { + contextLimit = contextLimitCache.get(cacheKey) ?? null; + return; + } + + // Clear immediately so a slow/failed fetch can't leave the PREVIOUS + // model's max on screen (which would render this model's tokens against + // the wrong denominator). The view degrades to a bare token count until + // the fetch resolves. + contextLimit = null; + + // Fetch is async; guard against a stale response overwriting a newer + // selection by re-checking the active tab's key/model on resolve. + void (async () => { + try { + const res = await fetch( + `${config.apiBase}/models/context-limit?provider=${encodeURIComponent(provider)}&modelId=${encodeURIComponent(modelId)}`, + ); + if (!res.ok) return; + const data = (await res.json()) as { contextLimit?: number | null }; + const limit = data.contextLimit ?? null; + contextLimitCache.set(cacheKey, limit); + const current = tabStore.activeTab; + const currentProvider = current?.keyId + ? (modelsData.keys.find((k) => k.id === current.keyId)?.provider ?? null) + : null; + if (currentProvider === provider && current?.modelId === modelId) { + contextLimit = limit; + } + } catch { + // Leave contextLimit as-is on network error; view falls back to + // showing the bare token count. + } + })(); +}); + onMount(() => { // Apply persisted theme (or the shared DEFAULT_THEME if nothing is // stored) so the first paint matches what the Settings panel will @@ -137,6 +193,7 @@ onMount(() => { tasks={tabStore.activeTab?.tasks ?? []} cacheStats={tabStore.activeTab?.cacheStats ?? null} cacheTabTitle={tabStore.activeTab?.title ?? null} + {contextLimit} permissionLog={tabStore.permissionLog} apiBase={config.apiBase} activeKeyId={tabStore.activeTab?.keyId ?? null} diff --git a/packages/frontend/src/lib/components/ContextWindowPanel.svelte b/packages/frontend/src/lib/components/ContextWindowPanel.svelte new file mode 100644 index 0000000..6c7de05 --- /dev/null +++ b/packages/frontend/src/lib/components/ContextWindowPanel.svelte @@ -0,0 +1,85 @@ +<script lang="ts"> +import { computeContextUsage } from "../context-window.js"; +import type { CacheStats } from "../types.js"; + +const { + cacheStats = null, + contextLimit = null, + tabTitle = null, + modelId = null, +}: { + cacheStats?: CacheStats | null; + contextLimit?: number | null; + tabTitle?: string | null; + modelId?: string | null; +} = $props(); + +const usage = $derived(computeContextUsage(cacheStats, contextLimit)); + +// As the window fills, escalate color: calm → warning → danger. +function fillClass(pct: number): string { + if (pct >= 90) return "progress-error"; + if (pct >= 70) return "progress-warning"; + return "progress-success"; +} + +function fmt(n: number): string { + return n.toLocaleString(); +} + +const hasUsage = $derived((cacheStats?.last ?? null) !== null); +</script> + +<div class="flex flex-col gap-3 flex-1 min-h-0 overflow-y-auto"> + {#if !hasUsage} + <p class="text-xs text-base-content/50"> + No context data yet. Send a message — the current context size appears + here after the first response. + </p> + {:else} + <div class="bg-base-200 rounded-lg p-2"> + <div class="flex items-center gap-1.5 mb-2"> + <span class="text-xs font-semibold">Context Window</span> + {#if tabTitle} + <span class="badge badge-xs badge-ghost">{tabTitle}</span> + {/if} + {#if usage.percent !== null} + <span class="badge badge-xs ml-auto">{usage.percent.toFixed(2)}%</span> + {/if} + </div> + + <!-- Headline: current / max (or just current when max is unknown) --> + <div class="flex items-baseline gap-1.5"> + <span class="text-lg font-mono font-semibold">{fmt(usage.current)}</span> + {#if usage.max !== null} + <span class="text-xs text-base-content/50 font-mono">/ {fmt(usage.max)}</span> + {/if} + <span class="text-xs text-base-content/40 ml-1">tokens</span> + </div> + + {#if usage.percent !== null} + <progress + class="progress w-full h-2 mt-1.5 {fillClass(usage.percent)}" + value={usage.percent} + max="100" + ></progress> + {:else} + <p class="text-xs text-base-content/40 mt-1.5"> + Max context size unknown for this model. + </p> + {/if} + + {#if modelId} + <div class="text-xs text-base-content/40 mt-1.5 truncate" title={modelId}> + {modelId} + </div> + {/if} + </div> + + <p class="text-xs text-base-content/40"> + Current context = the most recent request's prompt + output (what the + model actually held in its window that turn). Grows as the conversation + gets longer. Resets on reload. + </p> + {/if} +</div> diff --git a/packages/frontend/src/lib/components/SidebarPanel.svelte b/packages/frontend/src/lib/components/SidebarPanel.svelte index 491b1bd..573a6fc 100644 --- a/packages/frontend/src/lib/components/SidebarPanel.svelte +++ b/packages/frontend/src/lib/components/SidebarPanel.svelte @@ -4,6 +4,7 @@ import type { CacheStats, KeyInfo, LogEntry, TaskItem } from "../types.js"; import CacheRatePanel from "./CacheRatePanel.svelte"; import ClaudeReset from "./ClaudeReset.svelte"; import ConfigPanel from "./ConfigPanel.svelte"; +import ContextWindowPanel from "./ContextWindowPanel.svelte"; import DebugPanel from "./DebugPanel.svelte"; import KeyUsage from "./KeyUsage.svelte"; import ModelSelector from "./ModelSelector.svelte"; @@ -27,6 +28,7 @@ const { tasks = [], cacheStats = null, cacheTabTitle = null, + contextLimit = null, permissionLog = [], apiBase = "", activeKeyId = null, @@ -47,6 +49,7 @@ const { tasks?: TaskItem[]; cacheStats?: CacheStats | null; cacheTabTitle?: string | null; + contextLimit?: number | null; permissionLog?: LogEntry[]; apiBase?: string; activeKeyId?: string | null; @@ -89,6 +92,7 @@ const viewOptions = [ "Chat Settings", "Key Usage", "Cache Rate", + "Context Window", "Claude Reset", "Model Status", "Tasks", @@ -170,6 +174,13 @@ function contentClass(_selected: string): string { <KeyUsage {keys} {apiBase} /> {:else if panel.selected === "Cache Rate"} <CacheRatePanel {cacheStats} tabTitle={cacheTabTitle} /> + {:else if panel.selected === "Context Window"} + <ContextWindowPanel + {cacheStats} + {contextLimit} + tabTitle={cacheTabTitle} + modelId={activeModelId} + /> {:else if panel.selected === "Claude Reset"} <ClaudeReset {apiBase} /> {:else if panel.selected === "Model Status"} diff --git a/packages/frontend/src/lib/context-window.ts b/packages/frontend/src/lib/context-window.ts new file mode 100644 index 0000000..c4321f8 --- /dev/null +++ b/packages/frontend/src/lib/context-window.ts @@ -0,0 +1,37 @@ +import type { CacheStats } from "./types.js"; + +/** + * Context-window occupancy for the current tab/model. + * + * `current` is the size of the model's context on the MOST RECENT request — + * the last turn's full prompt (`inputTokens`, which already includes cached + * tokens for Anthropic) plus what the model generated that turn + * (`outputTokens`). This mirrors how opencode derives context fullness from + * the last assistant message, and reflects what actually occupies the model's + * window — NOT the session-cumulative totals shown by the Cache Rate view. + * + * `max` is the model's maximum context window from models.dev (or `null` when + * unknown). `percent` is `current / max * 100` clamped to [0, 100] (unrounded; + * the UI decides the displayed precision), or `null` when + * `max` is unknown — in which case the UI shows the bare token count with no + * denominator or progress bar. + */ +export interface ContextUsage { + current: number; + max: number | null; + percent: number | null; +} + +export function computeContextUsage( + cacheStats: CacheStats | null | undefined, + contextLimit: number | null | undefined, +): ContextUsage { + const last = cacheStats?.last ?? null; + const current = last ? last.inputTokens + last.outputTokens : 0; + const max = typeof contextLimit === "number" && contextLimit > 0 ? contextLimit : null; + // Precise (unrounded) percentage clamped to [0, 100]; the UI formats the + // decimal places. Kept unrounded so small contexts against huge windows + // (e.g. a few thousand tokens vs. 1,000,000) still read non-zero. + const percent = max ? Math.max(0, Math.min(100, (current / max) * 100)) : null; + return { current, max, percent }; +} diff --git a/packages/frontend/tests/context-window.test.ts b/packages/frontend/tests/context-window.test.ts new file mode 100644 index 0000000..bb64ed5 --- /dev/null +++ b/packages/frontend/tests/context-window.test.ts @@ -0,0 +1,84 @@ +import { describe, expect, it } from "vitest"; +import { computeContextUsage } from "../src/lib/context-window.js"; +import type { CacheStats } from "../src/lib/types.js"; + +function stats(last: CacheStats["last"]): CacheStats { + return { + inputTokens: 0, + outputTokens: 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + requests: last ? 1 : 0, + last, + }; +} + +describe("computeContextUsage", () => { + it("derives current context from the LAST request's input + output", () => { + const usage = computeContextUsage( + stats({ + inputTokens: 47000, + outputTokens: 1200, + cacheReadTokens: 40000, + cacheWriteTokens: 0, + }), + 200000, + ); + // 47000 + 1200 — NOT the cumulative totals, and cache tokens are already + // inside inputTokens (not re-added). + expect(usage.current).toBe(48200); + expect(usage.max).toBe(200000); + expect(usage.percent).toBeCloseTo(24.1, 5); // 48200 / 200000 * 100, unrounded + }); + + it("returns max=null and percent=null when the limit is unknown", () => { + const usage = computeContextUsage( + stats({ inputTokens: 100, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }), + null, + ); + expect(usage.current).toBe(100); + expect(usage.max).toBeNull(); + expect(usage.percent).toBeNull(); + }); + + it("treats a non-positive limit as unknown", () => { + const usage = computeContextUsage( + stats({ inputTokens: 100, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }), + 0, + ); + expect(usage.max).toBeNull(); + expect(usage.percent).toBeNull(); + }); + + it("reports zero usage when no request has completed yet", () => { + expect(computeContextUsage(null, 200000)).toEqual({ + current: 0, + max: 200000, + percent: 0, + }); + expect(computeContextUsage(stats(null), 200000)).toEqual({ + current: 0, + max: 200000, + percent: 0, + }); + }); + + it("clamps percent to 100 when context overflows the window", () => { + const usage = computeContextUsage( + stats({ inputTokens: 250000, outputTokens: 5000, cacheReadTokens: 0, cacheWriteTokens: 0 }), + 200000, + ); + expect(usage.current).toBe(255000); + expect(usage.percent).toBe(100); + }); + + it("keeps an unrounded percent so the UI can show 2 decimals", () => { + const usage = computeContextUsage( + stats({ inputTokens: 3690, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }), + 1000000, + ); + // 3690 / 1,000,000 * 100 = 0.369 → displayed as "0.37%" (toFixed(2)). + expect(usage.percent).toBeCloseTo(0.369, 6); + expect((usage.percent as number).toFixed(2)).toBe("0.37"); + }); +}); |
