summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-03 08:24:40 +0900
committerAdam Malczewski <[email protected]>2026-06-03 08:24:40 +0900
commitbc3ecbe7b72f6da6ed36d0cea5a66de1c440269a (patch)
tree17e84ebf8d83c51a7a50312c256372a86e38b92a
parentb26821ead97b986f886065b20d3dbde8283daa64 (diff)
parentae672fd4f5542a2c217cf97657bf81eeebdaabbd (diff)
downloaddispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.tar.gz
dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.zip
Merge branch 'dev' into cmp7/compaction-tool
# Conflicts: # packages/frontend/src/lib/components/ChatInput.svelte
-rw-r--r--packages/api/src/agent-manager.ts36
-rw-r--r--packages/api/src/app.ts63
-rw-r--r--packages/api/src/routes/models.ts18
-rw-r--r--packages/api/tests/agent-manager.test.ts30
-rw-r--r--packages/api/tests/routes.test.ts63
-rw-r--r--packages/core/src/agent/agent.tsbin57822 -> 60515 bytes
-rw-r--r--packages/core/src/credentials/claude.ts69
-rw-r--r--packages/core/src/credentials/index.ts2
-rw-r--r--packages/core/src/index.ts18
-rw-r--r--packages/core/src/models/attachments.ts151
-rw-r--r--packages/core/src/models/catalog.ts50
-rw-r--r--packages/core/src/models/index.ts19
-rw-r--r--packages/core/src/tools/key-usage.ts322
-rw-r--r--packages/core/src/tools/summon.ts1
-rw-r--r--packages/core/src/types/index.ts49
-rw-r--r--packages/core/tests/agent/agent.test.ts98
-rw-r--r--packages/core/tests/models/attachments.test.ts136
-rw-r--r--packages/core/tests/models/catalog.test.ts75
-rw-r--r--packages/core/tests/tools/key-usage.test.ts317
-rw-r--r--packages/frontend/src/App.svelte55
-rw-r--r--packages/frontend/src/lib/attachment-tokens.ts234
-rw-r--r--packages/frontend/src/lib/components/ChatInput.svelte226
-rw-r--r--packages/frontend/src/lib/components/TabBar.svelte34
-rw-r--r--packages/frontend/src/lib/components/ToolPermissions.svelte6
-rw-r--r--packages/frontend/src/lib/settings.svelte.ts2
-rw-r--r--packages/frontend/src/lib/tabs.svelte.ts68
-rw-r--r--packages/frontend/tests/attachment-tokens.test.ts130
-rw-r--r--packages/frontend/tests/chat-store.test.ts75
28 files changed, 2310 insertions, 37 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 38dab49..e79e9a8 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -15,6 +15,7 @@ import {
clearSpillForTab,
configToRuleset,
createConfigWatcher,
+ createKeyUsageTool,
createListFilesTool,
createLspTool,
createReadFileSliceTool,
@@ -71,6 +72,7 @@ import {
toAvailableUserAgents,
type UsageData,
type UsageStats,
+ type UserContentPart,
validateConfig,
} from "@dispatch/core";
import type { PermissionManager } from "./permission-manager.js";
@@ -90,6 +92,8 @@ const TOOL_DESCRIPTIONS: Record<string, string> = {
search_code:
"Search the codebase by query using the 'cs' code search engine (relevance-ranked, structure-aware). Returns the most relevant files first with matching snippets and line numbers. Better than grep/find for exploratory 'where is X / how does Y work' searches; use run_shell with rg for exhaustive exact-match lists.",
todo: "Create/maintain a todo list to plan and track work. Declarative whole-list write: send the entire list in `todos` each call (it replaces the previous list). Statuses: pending, in_progress, completed, cancelled.",
+ key_usage:
+ "Report current usage levels for configured API keys: provider, active/exhausted status, remaining rate-limit headroom and reset times per window (5-hour, weekly, monthly where available), and whether the figures are live or cached. Pass key_id for one key; omit to report all. Supported for anthropic and opencode-go keys.",
summon:
"Spawn a child agent to work on a task independently. By default blocks until the child finishes. Set background=true to return immediately with an agent_id for later retrieval.",
retrieve:
@@ -527,10 +531,11 @@ export class AgentManager {
const permReadTab = getSetting("perm_read_tab") === "allow";
const permWebSearch = getSetting("perm_web_search") === "allow";
const permSearchCode = getSetting("perm_search_code") === "allow";
+ const permKeyUsage = getSetting("perm_key_usage") === "allow";
const permYoutubeTranscribe = getSetting("perm_youtube_transcribe") === "allow";
const permLsp = getSetting("perm_lsp") === "allow";
const sysPrompt = getSetting("system_prompt") ?? "";
- const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permLsp}:${sysPrompt}`;
+ const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permKeyUsage}:${permLsp}:${sysPrompt}`;
// If the override differs or permissions changed, invalidate the cached agent
if (
@@ -622,6 +627,9 @@ export class AgentManager {
if (allowed.has("web_search")) {
toolEntries.push({ name: "web_search", tool: createWebSearchTool() });
}
+ if (allowed.has("key_usage")) {
+ toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() });
+ }
if (allowed.has("lsp") && lspServers.length > 0) {
toolEntries.push({
name: "lsp",
@@ -727,6 +735,9 @@ export class AgentManager {
if (permWebSearch) {
toolEntries.push({ name: "web_search", tool: createWebSearchTool() });
}
+ if (permKeyUsage) {
+ toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() });
+ }
// The `lsp` tool exposes diagnostics + navigation on demand. It is
// gated by `perm_lsp` AND requires at least one server configured
// in the working directory's `dispatch.toml`.
@@ -1665,6 +1676,19 @@ export class AgentManager {
// `deliverMessage`), so an agent message behaves identically to a user one.
/**
+ * Build the `key_usage` tool, wired to the live model registry (key states)
+ * and the discovered Claude accounts. The tool fetches usage live with a
+ * cache fallback (anthropic) or a live scrape (opencode-go), reporting
+ * remaining headroom, reset times, and data freshness per key.
+ */
+ private buildKeyUsageTool(): ReturnType<typeof createKeyUsageTool> {
+ return createKeyUsageTool({
+ listKeys: () => this.modelRegistry?.getKeys() ?? [],
+ listClaudeAccounts: () => this.claudeAccounts,
+ });
+ }
+
+ /**
* Build the `send_to_tab` + `read_tab` tool entries for `tabId`. Shared by
* both tool-construction paths (child whitelist + permission-gated parent).
* `selfHandle` is computed once so the calling tab can stamp provenance and
@@ -1796,6 +1820,13 @@ export class AgentManager {
workingDirectory?: string;
queueId?: string;
/**
+ * Ephemeral ordered multimodal content (image/pdf attachments) for a
+ * FRESH human turn. Forwarded to `processMessage` → `agent.run` only
+ * when the tab is idle (a started turn); never carried into the queue
+ * path (attachments require a fresh turn — the caller guards that).
+ */
+ content?: UserContentPart[];
+ /**
* Who is sending this message. `"human"` (default) is unrestricted
* and REFILLS the target's agent-to-agent auto-wake budget. `"agent"`
* (from the `send_to_tab` tool) is governed by that budget: an
@@ -1874,6 +1905,7 @@ export class AgentManager {
opts.reasoningEffort,
opts.workingDirectory,
agentModels,
+ opts.content,
).catch((err) => {
console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err);
});
@@ -1888,6 +1920,7 @@ export class AgentManager {
reasoningEffort?: ReasoningEffort,
workingDirectory?: string,
agentModels?: AgentModelEntry[],
+ content?: UserContentPart[],
): Promise<void> {
const tabAgent = this._getOrCreateTabAgent(tabId);
@@ -1999,6 +2032,7 @@ export class AgentManager {
for await (const event of agent.run(message, {
...(effortForEntry ? { reasoningEffort: effortForEntry } : {}),
abortSignal: tabAgent.abortController?.signal,
+ ...(content ? { content } : {}),
})) {
// Stop processing if the tab was aborted (closed/stopped).
// stopTab() already injected a `cancelled` system chunk into
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index 84afd2a..2f4e538 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -3,6 +3,8 @@ import {
getTab,
isReasoningEffort,
NotificationDispatcher,
+ type UserContentPart,
+ validateUserContent,
} from "@dispatch/core";
import { Hono } from "hono";
import { cors } from "hono/cors";
@@ -37,6 +39,41 @@ function sanitizeAgentModels(raw: unknown): AgentModelEntry[] | undefined {
return out;
}
+/**
+ * Validate and normalise the optional multimodal `content` array from the
+ * `/chat` body. Each entry is either a `{ type: "text", text }` part or a
+ * `{ type: "attachment", mediaType, data, name? }` part (base64 payload).
+ * Returns `undefined` when the input isn't a non-empty array or contains no
+ * attachment (so the plain-string path is taken — byte-identical to before).
+ * Shape only: SIZE/TYPE limits are enforced separately by `validateUserContent`.
+ */
+function sanitizeUserContent(raw: unknown): UserContentPart[] | undefined {
+ if (!Array.isArray(raw) || raw.length === 0) return undefined;
+ const out: UserContentPart[] = [];
+ let hasAttachment = false;
+ for (const p of raw) {
+ if (!p || typeof p !== "object") continue;
+ const part = p as Record<string, unknown>;
+ if (part.type === "text") {
+ if (typeof part.text === "string") out.push({ type: "text", text: part.text });
+ continue;
+ }
+ if (part.type === "attachment") {
+ if (typeof part.mediaType !== "string" || typeof part.data !== "string") continue;
+ hasAttachment = true;
+ out.push({
+ type: "attachment",
+ mediaType: part.mediaType,
+ data: part.data,
+ ...(typeof part.name === "string" ? { name: part.name } : {}),
+ });
+ }
+ }
+ // No attachment → let the plain-text path handle it (avoids needlessly
+ // switching the model message to array content for a text-only turn).
+ return hasAttachment ? out : undefined;
+}
+
export const permissionManager = new PermissionManager();
export const agentManager = new AgentManager(permissionManager);
@@ -94,6 +131,7 @@ app.post("/chat", async (c) => {
const body = await c.req.json<{
tabId?: unknown;
message?: unknown;
+ content?: unknown;
keyId?: unknown;
modelId?: unknown;
agentModels?: unknown;
@@ -121,6 +159,30 @@ app.post("/chat", async (c) => {
? body.reasoningEffort
: undefined;
+ // Optional multimodal content (image/pdf attachments). When present, the
+ // attachments are EPHEMERAL — forwarded to the model for this turn only and
+ // never persisted (the chunk log keeps just `message`, which the frontend
+ // has already projected to text with `[image]`/`[pdf]` markers).
+ const content = sanitizeUserContent(body.content);
+ if (content) {
+ // Enforce size/type/count ceilings server-side (defence in depth; the
+ // frontend also enforces them at paste time). Reject the whole request
+ // so no tokens are spent on an over-limit payload.
+ const validation = validateUserContent(content);
+ if (!validation.ok) {
+ return c.json({ error: "invalid attachments", details: validation.errors }, 400);
+ }
+ // Attachments only attach to a FRESH turn. If the tab is mid-turn the
+ // message would queue (text-only machinery), silently dropping the
+ // images. Reject clearly instead so the user can retry once idle.
+ if (agentManager.getTabStatus(tabId) === "running") {
+ return c.json(
+ { error: "cannot attach images while the agent is generating; wait for it to finish" },
+ 409,
+ );
+ }
+ }
+
// Single routing decision (queue if busy, new turn if idle) shared with the
// `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a
// started turn runs in the background.
@@ -131,6 +193,7 @@ app.post("/chat", async (c) => {
...(reasoningEffort ? { reasoningEffort } : {}),
...(workingDirectory !== undefined ? { workingDirectory } : {}),
...(queueId ? { queueId } : {}),
+ ...(content ? { content } : {}),
});
if (outcome.status === "queued") {
diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts
index eeb6029..a1700b1 100644
--- a/packages/api/src/routes/models.ts
+++ b/packages/api/src/routes/models.ts
@@ -20,6 +20,7 @@ import {
refreshAccountCredentialsAsync,
resolveApiKey,
resolveContextLimit,
+ resolveModelCapabilities,
selectHaikuModel,
setApiKey,
validateAccountCredentials,
@@ -180,6 +181,23 @@ modelsRoutes.get("/context-limit", async (c) => {
return c.json({ contextLimit });
});
+// Resolve a model's image / PDF INPUT capabilities from the models.dev catalog.
+// Returns `{ capabilities: { image, pdf } | null }`. `null` means UNKNOWN — the
+// provider is unmapped, the model is absent, the catalog predates the
+// `modalities` field, or the catalog is offline. The frontend treats `null` as
+// "can't verify" (optimistic allow) and a definitive `{ image: false }` as a
+// hard block (no tokens spent).
+modelsRoutes.get("/capabilities", async (c) => {
+ const provider = c.req.query("provider");
+ const modelId = c.req.query("modelId");
+ if (!provider || !modelId) {
+ return c.json({ error: "provider and modelId query parameters are required" }, 400);
+ }
+
+ const capabilities = await resolveModelCapabilities(provider, modelId);
+ return c.json({ capabilities });
+});
+
// List available Claude accounts with validated credentials
modelsRoutes.get("/claude-accounts", async (c) => {
const candidates = resolveClaudeAccounts();
diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts
index 0915d9b..80a8ae5 100644
--- a/packages/api/tests/agent-manager.test.ts
+++ b/packages/api/tests/agent-manager.test.ts
@@ -537,6 +537,14 @@ vi.mock("@dispatch/core", () => ({
execute: async () => "mock",
};
},
+ createKeyUsageTool(_callbacks: unknown) {
+ return {
+ name: "key_usage",
+ description: "key usage",
+ parameters: { _type: "z.ZodObject", shape: {} },
+ execute: async () => "mock",
+ };
+ },
createSearchCodeTool(_wd: string) {
return {
name: "search_code",
@@ -1634,6 +1642,28 @@ describe("AgentManager", () => {
});
});
+ describe("key_usage permission gate", () => {
+ // The key_usage tool is conditionally useful, so it must be COMPLETELY
+ // absent from the toolset (and thus the model's context) unless
+ // perm_key_usage is explicitly allowed.
+ async function toolsForPerms(tabId: string, perms: Record<string, string>): Promise<string[]> {
+ for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v);
+ const manager = new AgentManager();
+ await manager.processMessage(tabId, "go");
+ return constructedAgents.at(-1)?.toolNames ?? [];
+ }
+
+ it("registers key_usage when perm_key_usage is allowed", async () => {
+ const tools = await toolsForPerms("tab-key-usage-on", { perm_key_usage: "allow" });
+ expect(tools).toContain("key_usage");
+ });
+
+ it("omits key_usage when perm_key_usage is not allowed", async () => {
+ const tools = await toolsForPerms("tab-key-usage-off", {});
+ expect(tools).not.toContain("key_usage");
+ });
+ });
+
// Regression: granted tab-messaging tools must also be ADVERTISED in the
// agent's system prompt. The tools were registered in the API tool payload
// but `buildSystemPrompt` filtered its "You have access to the following
diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts
index d6f6087..06dfa13 100644
--- a/packages/api/tests/routes.test.ts
+++ b/packages/api/tests/routes.test.ts
@@ -219,6 +219,16 @@ vi.mock("@dispatch/core", () => ({
typeof value === "string" && ["none", "low", "medium", "high", "xhigh", "max"].includes(value)
);
},
+ // Lightweight stand-in for the real validator: accept the supported media
+ // types, reject everything else. Enough to exercise the /chat attachment
+ // validation branch (the real validator is unit-tested in core).
+ validateUserContent(content: Array<{ type: string; mediaType?: string }>) {
+ const accepted = ["image/png", "image/jpeg", "image/webp", "image/gif", "application/pdf"];
+ const errors = content
+ .filter((p) => p.type === "attachment" && !accepted.includes(p.mediaType ?? ""))
+ .map((p) => ({ code: "unsupported-type", mediaType: p.mediaType }));
+ return { ok: errors.length === 0, errors };
+ },
listOpenTabs() {
return [...fakeOpenTabs];
},
@@ -451,6 +461,59 @@ describe("POST /chat", () => {
expect(await res.json()).toEqual({ status: "ok" });
});
+ it("accepts a valid image attachment and starts a turn", async () => {
+ const res = await app.request("/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ tabId: "tab-img-ok",
+ message: "look: [image]",
+ content: [
+ { type: "text", text: "look: " },
+ { type: "attachment", mediaType: "image/png", data: "QQ==" },
+ ],
+ }),
+ });
+ expect(res.status).toBe(200);
+ expect(await res.json()).toEqual({ status: "ok" });
+ });
+
+ it("returns 400 for an unsupported attachment media type", async () => {
+ const res = await app.request("/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ tabId: "tab-img-bad",
+ message: "look: [image]",
+ content: [{ type: "attachment", mediaType: "image/svg+xml", data: "QQ==" }],
+ }),
+ });
+ expect(res.status).toBe(400);
+ const body = await res.json();
+ expect(body.error).toBe("invalid attachments");
+ });
+
+ it("returns 409 when attaching while the agent is generating", async () => {
+ // Kick off a turn so the tab is running.
+ await app.request("/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ tabId: "tab-img-busy", message: "first" }),
+ });
+ await new Promise<void>((r) => setTimeout(r, 20));
+
+ const res = await app.request("/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ tabId: "tab-img-busy",
+ message: "second [image]",
+ content: [{ type: "attachment", mediaType: "image/png", data: "QQ==" }],
+ }),
+ });
+ expect(res.status).toBe(409);
+ });
+
it("returns 400 with empty message", async () => {
const res = await app.request("/chat", {
method: "POST",
diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts
index 4bfa7eb..08b317a 100644
--- a/packages/core/src/agent/agent.ts
+++ b/packages/core/src/agent/agent.ts
Binary files differ
diff --git a/packages/core/src/credentials/claude.ts b/packages/core/src/credentials/claude.ts
index 7818222..050a0fc 100644
--- a/packages/core/src/credentials/claude.ts
+++ b/packages/core/src/credentials/claude.ts
@@ -441,6 +441,22 @@ export interface ClaudeUsageReport {
orgId?: string;
}
+/**
+ * A usage report paired with provenance: whether it came back from a fresh
+ * live fetch against Anthropic's `/api/oauth/usage` endpoint or was served
+ * from the local `usage_cache` table after a failed/skipped live fetch.
+ *
+ * `source: "cache"` carries `cachedAt` — the epoch-ms timestamp recording when
+ * that cached payload was last fetched FROM the source (the `usage_cache.cached_at`
+ * column). `source: "live"` omits `cachedAt` (the data is current as of now).
+ */
+export interface ClaudeUsageResult {
+ report: ClaudeUsageReport;
+ source: "live" | "cache";
+ /** Epoch-ms the cached report was last fetched from source. Only on `source: "cache"`. */
+ cachedAt?: number;
+}
+
// ─── Well-known Anthropic models ──────────────────────────────
/**
@@ -602,14 +618,23 @@ async function fetchClaudeUsage(accessToken: string): Promise<ClaudeUsageReport
}
}
-function getCachedUsage(keyId: string): ClaudeUsageReport | null {
+/**
+ * Read a cached usage report plus the epoch-ms it was last fetched from source.
+ * Returns `null` when there is no cached row (or on any DB/parse error).
+ */
+function getCachedUsageWithMeta(
+ keyId: string,
+): { report: ClaudeUsageReport; cachedAt: number } | null {
try {
const db = getDatabase();
const row = db
- .query("SELECT report_json FROM usage_cache WHERE key_id = $keyId")
- .get({ $keyId: keyId }) as { report_json: string } | null;
+ .query("SELECT report_json, cached_at FROM usage_cache WHERE key_id = $keyId")
+ .get({ $keyId: keyId }) as { report_json: string; cached_at: number } | null;
if (!row) return null;
- return JSON.parse(row.report_json) as ClaudeUsageReport;
+ return {
+ report: JSON.parse(row.report_json) as ClaudeUsageReport,
+ cachedAt: row.cached_at,
+ };
} catch {
return null;
}
@@ -635,13 +660,35 @@ function setCachedUsage(keyId: string, provider: string, report: ClaudeUsageRepo
}
}
-export async function getAccountUsage(account: ClaudeAccount): Promise<ClaudeUsageReport | null> {
+/**
+ * Fetch an account's usage report along with its provenance (live vs cache).
+ *
+ * Resolution: refresh credentials and hit the live `/api/oauth/usage` endpoint;
+ * on success the fresh report is cached and returned as `source: "live"`. If
+ * credentials cannot be refreshed OR the live fetch returns nothing, fall back
+ * to the local `usage_cache` row and return it as `source: "cache"` with the
+ * `cachedAt` timestamp recording when that payload was last fetched from source.
+ * Returns `null` only when neither a live report nor a cached row is available.
+ */
+export async function getAccountUsageWithSource(
+ account: ClaudeAccount,
+): Promise<ClaudeUsageResult | null> {
const creds = await refreshAccountCredentialsAsync(account);
- if (!creds) return getCachedUsage(account.id);
- const report = await fetchClaudeUsage(creds.accessToken);
- if (report) {
- setCachedUsage(account.id, "anthropic", report);
- return report;
+ if (creds) {
+ const report = await fetchClaudeUsage(creds.accessToken);
+ if (report) {
+ setCachedUsage(account.id, "anthropic", report);
+ return { report, source: "live" };
+ }
}
- return getCachedUsage(account.id);
+ const cached = getCachedUsageWithMeta(account.id);
+ if (cached) {
+ return { report: cached.report, source: "cache", cachedAt: cached.cachedAt };
+ }
+ return null;
+}
+
+export async function getAccountUsage(account: ClaudeAccount): Promise<ClaudeUsageReport | null> {
+ const result = await getAccountUsageWithSource(account);
+ return result?.report ?? null;
}
diff --git a/packages/core/src/credentials/index.ts b/packages/core/src/credentials/index.ts
index 5221dc6..131f035 100644
--- a/packages/core/src/credentials/index.ts
+++ b/packages/core/src/credentials/index.ts
@@ -15,9 +15,11 @@ export {
type ClaudeProfile,
type ClaudeUsageBucket,
type ClaudeUsageReport,
+ type ClaudeUsageResult,
discoverClaudeAccounts,
fetchAnthropicModels,
getAccountUsage,
+ getAccountUsageWithSource,
getAnthropicBetas,
getAnthropicHeaders,
getClaudeAccountsFromDB,
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index 2789b2c..25cc909 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -98,9 +98,26 @@ export {
} from "./lsp/index.js";
// Models
export {
+ ACCEPTED_ATTACHMENT_MEDIA_TYPES,
+ ACCEPTED_IMAGE_MEDIA_TYPES,
+ ACCEPTED_PDF_MEDIA_TYPE,
+ type AttachmentValidationError,
+ type AttachmentValidationResult,
+ base64ByteLength,
getModelsCatalog,
+ hasAttachments,
+ isAcceptedAttachmentMediaType,
+ isImageMediaType,
+ isPdfMediaType,
+ MAX_ATTACHMENTS,
+ MAX_IMAGE_BYTES,
+ MAX_PDF_BYTES,
+ MAX_TOTAL_ATTACHMENT_BYTES,
+ type ModelInputCapabilities,
ModelRegistry,
resolveContextLimit,
+ resolveModelCapabilities,
+ validateUserContent,
} from "./models/index.js";
// Notifications (ntfy.sh)
export * from "./notifications/index.js";
@@ -115,6 +132,7 @@ export {
} from "./skills/index.js";
export { prefix as bashArityPrefix } from "./tools/bash-arity.js";
// Tools
+export { createKeyUsageTool, type KeyUsageCallbacks } from "./tools/key-usage.js";
export { createListFilesTool } from "./tools/list-files.js";
export { createLspTool, type LspToolContext } from "./tools/lsp.js";
export { createReadFileTool } from "./tools/read-file.js";
diff --git a/packages/core/src/models/attachments.ts b/packages/core/src/models/attachments.ts
new file mode 100644
index 0000000..5c98db4
--- /dev/null
+++ b/packages/core/src/models/attachments.ts
@@ -0,0 +1,151 @@
+// Validation + limits for multimodal user attachments (images / PDFs).
+//
+// Kept dependency-free (no DB / `bun:sqlite` import) so both the API layer
+// (`/chat` request validation) and any future caller can share the exact same
+// allowlist and size/count ceilings. The limits mirror Anthropic's documented
+// vision/PDF API constraints (the only image-capable providers Dispatch maps),
+// so a request that passes here won't be rejected by the provider for size.
+
+import type { UserAttachmentPart, UserContentPart } from "../types/index.js";
+
+/** Accepted image media types. */
+export const ACCEPTED_IMAGE_MEDIA_TYPES = [
+ "image/png",
+ "image/jpeg",
+ "image/webp",
+ "image/gif",
+] as const;
+
+/** Accepted document media types. */
+export const ACCEPTED_PDF_MEDIA_TYPE = "application/pdf";
+
+/** Every media type we accept as an attachment. */
+export const ACCEPTED_ATTACHMENT_MEDIA_TYPES = [
+ ...ACCEPTED_IMAGE_MEDIA_TYPES,
+ ACCEPTED_PDF_MEDIA_TYPE,
+] as const;
+
+/** Per-image byte ceiling (Anthropic: 5 MB/image). */
+export const MAX_IMAGE_BYTES = 5 * 1024 * 1024;
+
+/** Per-PDF byte ceiling (Anthropic: 32 MB/PDF). */
+export const MAX_PDF_BYTES = 32 * 1024 * 1024;
+
+/** Max attachments per message (Anthropic: 20 images/request). */
+export const MAX_ATTACHMENTS = 20;
+
+/**
+ * Total attachment payload ceiling for a single request (decoded bytes). Bounds
+ * the overall request size even when each individual file is within its limit.
+ */
+export const MAX_TOTAL_ATTACHMENT_BYTES = 32 * 1024 * 1024;
+
+/** Whether a media type is an accepted image type. */
+export function isImageMediaType(mediaType: string): boolean {
+ return (ACCEPTED_IMAGE_MEDIA_TYPES as readonly string[]).includes(mediaType);
+}
+
+/** Whether a media type is the accepted PDF type. */
+export function isPdfMediaType(mediaType: string): boolean {
+ return mediaType === ACCEPTED_PDF_MEDIA_TYPE;
+}
+
+/** Whether a media type is an accepted attachment type at all. */
+export function isAcceptedAttachmentMediaType(mediaType: string): boolean {
+ return (ACCEPTED_ATTACHMENT_MEDIA_TYPES as readonly string[]).includes(mediaType);
+}
+
+/**
+ * Decoded byte length of a base64 string, computed WITHOUT allocating the
+ * decoded buffer. Tolerates an optional `data:<mediaType>;base64,` prefix and
+ * any embedded whitespace/newlines. Returns 0 for an empty/whitespace string.
+ */
+export function base64ByteLength(b64: string): number {
+ // Strip a data-URI prefix if present.
+ const comma = b64.indexOf(",");
+ const body = b64.startsWith("data:") && comma !== -1 ? b64.slice(comma + 1) : b64;
+ let len = 0;
+ let pad = 0;
+ for (let i = 0; i < body.length; i++) {
+ const ch = body.charCodeAt(i);
+ // Skip whitespace (space, \t, \n, \r).
+ if (ch === 32 || ch === 9 || ch === 10 || ch === 13) continue;
+ len++;
+ if (body[i] === "=") pad++;
+ }
+ if (len === 0) return 0;
+ // 4 base64 chars → 3 bytes, minus padding.
+ return Math.floor((len * 3) / 4) - pad;
+}
+
+export type AttachmentValidationError =
+ | { code: "unsupported-type"; mediaType: string }
+ | { code: "image-too-large"; mediaType: string; bytes: number; limit: number }
+ | { code: "pdf-too-large"; bytes: number; limit: number }
+ | { code: "too-many"; count: number; limit: number }
+ | { code: "total-too-large"; bytes: number; limit: number }
+ | { code: "empty"; mediaType: string };
+
+export interface AttachmentValidationResult {
+ ok: boolean;
+ errors: AttachmentValidationError[];
+}
+
+/** Extract just the attachment parts from a mixed content list. */
+function attachmentsOf(content: UserContentPart[]): UserAttachmentPart[] {
+ return content.filter((p): p is UserAttachmentPart => p.type === "attachment");
+}
+
+/**
+ * Validate the attachments in a multimodal user content list against the
+ * media-type allowlist and the size/count ceilings. Pure: never throws,
+ * collects every violation so the caller can report them all at once.
+ *
+ * Text parts are ignored (always valid). An empty content list is valid (it's
+ * just a text-only message expressed as parts).
+ */
+export function validateUserContent(content: UserContentPart[]): AttachmentValidationResult {
+ const errors: AttachmentValidationError[] = [];
+ const attachments = attachmentsOf(content);
+
+ if (attachments.length > MAX_ATTACHMENTS) {
+ errors.push({ code: "too-many", count: attachments.length, limit: MAX_ATTACHMENTS });
+ }
+
+ let total = 0;
+ for (const att of attachments) {
+ if (!isAcceptedAttachmentMediaType(att.mediaType)) {
+ errors.push({ code: "unsupported-type", mediaType: att.mediaType });
+ continue;
+ }
+ const bytes = base64ByteLength(att.data);
+ total += bytes;
+ if (bytes === 0) {
+ errors.push({ code: "empty", mediaType: att.mediaType });
+ continue;
+ }
+ if (isPdfMediaType(att.mediaType)) {
+ if (bytes > MAX_PDF_BYTES) {
+ errors.push({ code: "pdf-too-large", bytes, limit: MAX_PDF_BYTES });
+ }
+ } else if (bytes > MAX_IMAGE_BYTES) {
+ errors.push({
+ code: "image-too-large",
+ mediaType: att.mediaType,
+ bytes,
+ limit: MAX_IMAGE_BYTES,
+ });
+ }
+ }
+
+ if (total > MAX_TOTAL_ATTACHMENT_BYTES) {
+ errors.push({ code: "total-too-large", bytes: total, limit: MAX_TOTAL_ATTACHMENT_BYTES });
+ }
+
+ return { ok: errors.length === 0, errors };
+}
+
+/** Convenience: does the content list contain at least one attachment? */
+export function hasAttachments(content: UserContentPart[] | undefined | null): boolean {
+ return !!content && content.some((p) => p.type === "attachment");
+}
diff --git a/packages/core/src/models/catalog.ts b/packages/core/src/models/catalog.ts
index dea4647..ac310b1 100644
--- a/packages/core/src/models/catalog.ts
+++ b/packages/core/src/models/catalog.ts
@@ -18,6 +18,15 @@ interface ModelsDevModel {
context?: number;
output?: number;
};
+ /**
+ * Input/output modalities the model accepts. We read `input` to decide
+ * whether the model can take image / pdf attachments. Absent on older
+ * catalog entries — treated as "unknown" (capability resolves to `null`).
+ */
+ modalities?: {
+ input?: string[];
+ output?: string[];
+ };
}
interface ModelsDevProvider {
@@ -172,6 +181,47 @@ export async function resolveContextLimit(
return null;
}
+/**
+ * Image / PDF input capabilities for a model, resolved from the models.dev
+ * catalog's `modalities.input` list.
+ */
+export interface ModelInputCapabilities {
+ /** Model accepts image input (vision). */
+ image: boolean;
+ /** Model accepts PDF/document input. */
+ pdf: boolean;
+}
+
+/**
+ * Resolve whether a model accepts image / pdf input for the given Dispatch
+ * provider + model id. Returns `null` when the capability is UNKNOWN — i.e. the
+ * provider is unsupported/unmapped, the model is absent from the catalog, the
+ * entry predates the `modalities` field, or the catalog is unavailable. Callers
+ * should treat `null` as "can't verify" (optimistic allow) rather than a
+ * definitive "no", so a temporary catalog outage never disables a known-good
+ * vision model.
+ *
+ * A non-null result means the catalog DID describe the model's input modalities
+ * — `{ image, pdf }` then reflects exactly what it advertises (a definitive
+ * yes/no for each).
+ */
+export async function resolveModelCapabilities(
+ provider: string,
+ modelId: string,
+): Promise<ModelInputCapabilities | null> {
+ const candidates = PROVIDER_MAP[provider];
+ if (!candidates || !modelId) return null;
+
+ const catalog = await getModelsCatalog();
+ for (const providerId of candidates) {
+ const input = catalog[providerId]?.models?.[modelId]?.modalities?.input;
+ if (Array.isArray(input)) {
+ return { image: input.includes("image"), pdf: input.includes("pdf") };
+ }
+ }
+ return null;
+}
+
/** Test-only: reset the in-process memo so a test can re-exercise loading. */
export function __resetCatalogCacheForTests(): void {
cached = null;
diff --git a/packages/core/src/models/index.ts b/packages/core/src/models/index.ts
index 2fcd657..15d1ee2 100644
--- a/packages/core/src/models/index.ts
+++ b/packages/core/src/models/index.ts
@@ -1,5 +1,24 @@
export {
+ ACCEPTED_ATTACHMENT_MEDIA_TYPES,
+ ACCEPTED_IMAGE_MEDIA_TYPES,
+ ACCEPTED_PDF_MEDIA_TYPE,
+ type AttachmentValidationError,
+ type AttachmentValidationResult,
+ base64ByteLength,
+ hasAttachments,
+ isAcceptedAttachmentMediaType,
+ isImageMediaType,
+ isPdfMediaType,
+ MAX_ATTACHMENTS,
+ MAX_IMAGE_BYTES,
+ MAX_PDF_BYTES,
+ MAX_TOTAL_ATTACHMENT_BYTES,
+ validateUserContent,
+} from "./attachments.js";
+export {
getModelsCatalog,
+ type ModelInputCapabilities,
resolveContextLimit,
+ resolveModelCapabilities,
} from "./catalog.js";
export { ModelRegistry } from "./registry.js";
diff --git a/packages/core/src/tools/key-usage.ts b/packages/core/src/tools/key-usage.ts
new file mode 100644
index 0000000..0655ad7
--- /dev/null
+++ b/packages/core/src/tools/key-usage.ts
@@ -0,0 +1,322 @@
+import { z } from "zod";
+import type { ClaudeAccount, ClaudeUsageReport, ClaudeUsageResult } from "../credentials/claude.js";
+import { getAccountUsageWithSource } from "../credentials/claude.js";
+import type { OpencodeUsageReport } from "../credentials/opencode.js";
+import { fetchOpencodeUsage as defaultFetchOpencodeUsage } from "../credentials/opencode.js";
+import type { KeyState, ToolDefinition } from "../types/index.js";
+
+/**
+ * Collaborators the `key_usage` tool needs from the API layer (which owns the
+ * live `ModelRegistry` and the discovered Claude accounts). The two `fetch*`
+ * hooks default to the real credential fetchers but are injectable so tests can
+ * exercise the tool without network or DB access.
+ */
+export interface KeyUsageCallbacks {
+ /** Current key states from the model registry (definition + active/exhausted status). */
+ listKeys(): KeyState[];
+ /** Discovered Claude accounts, used to resolve `anthropic` keys to credentials. */
+ listClaudeAccounts(): ClaudeAccount[];
+ /**
+ * Fetch an anthropic account's usage with provenance (live vs cache).
+ * Defaults to `getAccountUsageWithSource`.
+ */
+ fetchAnthropicUsage?: (account: ClaudeAccount) => Promise<ClaudeUsageResult | null>;
+ /**
+ * Fetch an opencode-go key's usage (always a live scrape — OpenCode keeps no
+ * local cache). Defaults to `fetchOpencodeUsage`.
+ */
+ fetchOpencodeUsage?: (keyId: string) => Promise<OpencodeUsageReport | null>;
+}
+
+/** A single normalized usage window (5-hour / week / month). */
+interface UsageWindow {
+ label: string;
+ /** Remaining headroom as a 0–100 percentage. Omitted when the source gives no utilization. */
+ remainingPercent?: number;
+ /** Epoch-ms the window resets. Omitted when the source gives no reset time. */
+ resetsAt?: number;
+}
+
+/** Fully normalized per-key usage, ready for rendering. */
+interface KeyUsageEntry {
+ keyId: string;
+ provider: string;
+ status: "active" | "exhausted";
+ lastError?: string;
+ exhaustedAt?: number;
+ /** Provenance of the usage figures: a fresh live fetch or a cached payload. */
+ dataSource?: "live" | "cache";
+ /** Epoch-ms the cached payload was last fetched from source (only on `dataSource: "cache"`). */
+ cachedAt?: number;
+ windows: UsageWindow[];
+ /** Set when no usage figures could be obtained for an otherwise-supported key. */
+ unavailableReason?: string;
+ /** Set when the provider has no usage-reporting support. */
+ unsupported?: boolean;
+}
+
+function clampPercent(value: number): number {
+ if (value < 0) return 0;
+ if (value > 100) return 100;
+ return value;
+}
+
+/** Convert a raw `{ utilization, resetsAt }` bucket into a normalized window. */
+function toWindow(
+ label: string,
+ bucket?: { utilization?: number; resetsAt?: number },
+): UsageWindow | null {
+ if (!bucket) return null;
+ const hasUtil = typeof bucket.utilization === "number";
+ const hasReset = typeof bucket.resetsAt === "number";
+ if (!hasUtil && !hasReset) return null;
+ return {
+ label,
+ ...(hasUtil
+ ? { remainingPercent: clampPercent(Math.round((1 - (bucket.utilization as number)) * 100)) }
+ : {}),
+ ...(hasReset ? { resetsAt: bucket.resetsAt } : {}),
+ };
+}
+
+function anthropicWindows(report: ClaudeUsageReport): UsageWindow[] {
+ const windows: UsageWindow[] = [];
+ const fiveHour = toWindow("5-hour", report.fiveHour);
+ if (fiveHour) windows.push(fiveHour);
+ const week = toWindow("week", report.sevenDay);
+ if (week) windows.push(week);
+ return windows;
+}
+
+function opencodeWindows(report: OpencodeUsageReport): UsageWindow[] {
+ const windows: UsageWindow[] = [];
+ const fiveHour = toWindow("5-hour", report.fiveHour);
+ if (fiveHour) windows.push(fiveHour);
+ const week = toWindow("week", report.weekly);
+ if (week) windows.push(week);
+ const month = toWindow("month", report.monthly);
+ if (month) windows.push(month);
+ return windows;
+}
+
+/**
+ * Resolve which Claude account backs an `anthropic` key. Matches by key id or by
+ * the account's source file (the key's `credentials_file`), falling back to the
+ * first available account — mirrors the existing `/models/key-usage` route.
+ */
+function matchAnthropicAccount(
+ accounts: ClaudeAccount[],
+ keyId: string,
+ credFile?: string,
+): ClaudeAccount | undefined {
+ const matched = accounts.find(
+ (a) => a.id === keyId || (credFile != null && a.source === credFile),
+ );
+ return matched ?? accounts[0];
+}
+
+function iso(ms: number): string {
+ return new Date(ms).toISOString();
+}
+
+/** Human-readable coarse duration, e.g. "3h 12m", "5d 8h", "0m". */
+function formatDuration(ms: number): string {
+ const totalSec = Math.round(Math.abs(ms) / 1000);
+ const days = Math.floor(totalSec / 86400);
+ const hours = Math.floor((totalSec % 86400) / 3600);
+ const minutes = Math.floor((totalSec % 3600) / 60);
+ const parts: string[] = [];
+ if (days > 0) parts.push(`${days}d`);
+ if (hours > 0) parts.push(`${hours}h`);
+ if (minutes > 0 || parts.length === 0) parts.push(`${minutes}m`);
+ return parts.join(" ");
+}
+
+function formatRelative(targetMs: number, nowMs: number): string {
+ const delta = targetMs - nowMs;
+ return delta >= 0 ? `in ${formatDuration(delta)}` : `${formatDuration(delta)} ago`;
+}
+
+function formatWindow(window: UsageWindow, now: number): string {
+ const parts: string[] = [];
+ if (typeof window.remainingPercent === "number") {
+ parts.push(`${window.remainingPercent}% remaining`);
+ }
+ if (typeof window.resetsAt === "number") {
+ parts.push(`resets ${iso(window.resetsAt)} (${formatRelative(window.resetsAt, now)})`);
+ }
+ return `${window.label}: ${parts.join(", ")}`;
+}
+
+/**
+ * Render normalized usage entries into an AI-friendly text block. Pure — `now`
+ * is injected so relative timestamps are deterministic under test.
+ */
+export function formatKeyUsage(entries: KeyUsageEntry[], now: number): string {
+ if (entries.length === 0) return "No API keys matched.";
+
+ const lines: string[] = [];
+ lines.push(`API key usage — ${entries.length} key${entries.length === 1 ? "" : "s"}:`);
+
+ for (const entry of entries) {
+ lines.push("");
+ lines.push(`[${entry.keyId}] provider: ${entry.provider}`);
+
+ if (entry.status === "exhausted") {
+ const since =
+ typeof entry.exhaustedAt === "number"
+ ? ` (since ${iso(entry.exhaustedAt)}, ${formatRelative(entry.exhaustedAt, now)})`
+ : "";
+ lines.push(`status: EXHAUSTED${since}`);
+ if (entry.lastError) lines.push(`last error: ${entry.lastError}`);
+ } else {
+ lines.push("status: active");
+ }
+
+ if (entry.unsupported) {
+ lines.push(
+ `usage: not supported for provider "${entry.provider}" (only anthropic and opencode-go report usage)`,
+ );
+ continue;
+ }
+
+ if (entry.dataSource === "live") {
+ lines.push("data: live (fetched just now)");
+ } else if (entry.dataSource === "cache") {
+ lines.push(
+ typeof entry.cachedAt === "number"
+ ? `data: cached — last fetched from source ${iso(entry.cachedAt)} (${formatRelative(entry.cachedAt, now)})`
+ : "data: cached (source timestamp unknown)",
+ );
+ }
+
+ for (const window of entry.windows) {
+ lines.push(formatWindow(window, now));
+ }
+
+ if (entry.unavailableReason) {
+ lines.push(`usage: unavailable — ${entry.unavailableReason}`);
+ }
+ }
+
+ return lines.join("\n");
+}
+
+async function buildEntry(
+ key: KeyState,
+ accounts: ClaudeAccount[],
+ fetchAnthropic: (account: ClaudeAccount) => Promise<ClaudeUsageResult | null>,
+ fetchOpencode: (keyId: string) => Promise<OpencodeUsageReport | null>,
+): Promise<KeyUsageEntry> {
+ const def = key.definition;
+ const entry: KeyUsageEntry = {
+ keyId: def.id,
+ provider: def.provider,
+ status: key.status,
+ windows: [],
+ ...(key.lastError ? { lastError: key.lastError } : {}),
+ ...(typeof key.exhaustedAt === "number" ? { exhaustedAt: key.exhaustedAt } : {}),
+ };
+
+ if (def.provider === "anthropic") {
+ const account = matchAnthropicAccount(accounts, def.id, def.credentials_file);
+ if (!account) {
+ entry.unavailableReason = "no Claude account credentials available for this key";
+ return entry;
+ }
+ let result: ClaudeUsageResult | null = null;
+ try {
+ result = await fetchAnthropic(account);
+ } catch {
+ result = null;
+ }
+ if (!result) {
+ entry.unavailableReason = "no live usage data and no cached usage available";
+ return entry;
+ }
+ entry.dataSource = result.source;
+ if (typeof result.cachedAt === "number") entry.cachedAt = result.cachedAt;
+ entry.windows = anthropicWindows(result.report);
+ if (entry.windows.length === 0) {
+ entry.unavailableReason = "usage endpoint returned no window data";
+ }
+ return entry;
+ }
+
+ if (def.provider === "opencode-go") {
+ let report: OpencodeUsageReport | null = null;
+ try {
+ report = await fetchOpencode(def.id);
+ } catch {
+ report = null;
+ }
+ if (!report) {
+ entry.unavailableReason =
+ "live usage unavailable (requires OPENCODE_COOKIE and a workspace id, or the source returned no data; OpenCode keeps no local cache)";
+ return entry;
+ }
+ entry.dataSource = "live";
+ entry.windows = opencodeWindows(report);
+ if (entry.windows.length === 0) {
+ entry.unavailableReason = "usage source returned no window data";
+ }
+ return entry;
+ }
+
+ entry.unsupported = true;
+ return entry;
+}
+
+export function createKeyUsageTool(callbacks: KeyUsageCallbacks): ToolDefinition {
+ const fetchAnthropic = callbacks.fetchAnthropicUsage ?? getAccountUsageWithSource;
+ const fetchOpencode = callbacks.fetchOpencodeUsage ?? defaultFetchOpencodeUsage;
+
+ return {
+ name: "key_usage",
+ description: [
+ "Report current usage levels for configured API keys so you can pick a key with",
+ "headroom, warn before hitting a rate limit, or diagnose an exhausted-key failure.",
+ "",
+ "For each key it returns: provider, active/exhausted status (with the last error when",
+ "exhausted), remaining rate-limit headroom per window (5-hour, weekly, and monthly where",
+ "the provider exposes it), each window's reset timestamp, and whether the figures are",
+ "live or served from cache (with the cache's last-fetched time).",
+ "",
+ "Pass a key_id to inspect one key; omit it to report all keys. Usage reporting is",
+ "supported for anthropic and opencode-go keys.",
+ ].join("\n"),
+ parameters: z.object({
+ key_id: z
+ .string()
+ .optional()
+ .describe(
+ 'The id of a single key to report (as configured in dispatch.toml, e.g. "claude-max"). Omit to report all configured keys.',
+ ),
+ }),
+ execute: async (args: Record<string, unknown>): Promise<string> => {
+ const requestedKeyId = (args.key_id as string | undefined)?.trim() || undefined;
+
+ const allKeys = callbacks.listKeys();
+ if (allKeys.length === 0) {
+ return "No API keys are configured.";
+ }
+
+ let keys = allKeys;
+ if (requestedKeyId) {
+ keys = allKeys.filter((k) => k.definition.id === requestedKeyId);
+ if (keys.length === 0) {
+ const available = allKeys.map((k) => k.definition.id).join(", ");
+ return `Error: no key found with id "${requestedKeyId}". Available keys: ${available}.`;
+ }
+ }
+
+ const accounts = callbacks.listClaudeAccounts();
+ const entries: KeyUsageEntry[] = [];
+ for (const key of keys) {
+ entries.push(await buildEntry(key, accounts, fetchAnthropic, fetchOpencode));
+ }
+
+ return formatKeyUsage(entries, Date.now());
+ },
+ };
+}
diff --git a/packages/core/src/tools/summon.ts b/packages/core/src/tools/summon.ts
index b941152..2a076e6 100644
--- a/packages/core/src/tools/summon.ts
+++ b/packages/core/src/tools/summon.ts
@@ -287,6 +287,7 @@ export function createSummonTool(
"write_file",
"run_shell",
"search_code",
+ "key_usage",
"todo",
"summon",
"retrieve",
diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts
index f7944c9..4e3fa0b 100644
--- a/packages/core/src/types/index.ts
+++ b/packages/core/src/types/index.ts
@@ -76,8 +76,57 @@ export interface SystemChunk {
export interface ChatMessage {
role: MessageRole;
chunks: Chunk[];
+ /**
+ * Ephemeral ORDERED multimodal content for a user turn (interleaved text +
+ * image/pdf attachments). Set ONLY transiently on the in-flight user message
+ * so `toModelMessages` can emit multimodal `ImagePart`/`FilePart` content to
+ * the provider. Never persisted (the chunk log stores only the text, with
+ * `[image]`/`[pdf]` markers), so it's absent on history-rebuilt messages.
+ * When absent, the message is plain text built from its `chunks`.
+ */
+ content?: UserContentPart[];
}
+// ─── Multimodal user content (image / PDF attachments) ───────────
+//
+// When a user pastes one or more images/PDFs into the chat input, the turn's
+// user message carries an ORDERED list of content parts instead of a plain
+// string. The ordering is meaningful — the user can interleave text and
+// attachments ("here is image A: <A>, here is image B: <B>") and the model
+// sees them in exactly that sequence.
+//
+// These parts are EPHEMERAL: they are forwarded to the model for the turn that
+// produced them but are NOT persisted as raw bytes in the chunk log. History
+// stores only the user's text (with `[image]` / `[pdf]` markers in place of
+// each attachment), so a later reload re-renders the text but never re-sends
+// the binary payload. This keeps the persisted log small and avoids re-billing
+// image tokens on every subsequent turn.
+
+/** A plain-text segment of a multimodal user message. */
+export interface UserTextPart {
+ type: "text";
+ text: string;
+}
+
+/**
+ * A binary attachment (image or PDF) in a multimodal user message. `data` is a
+ * base64-encoded payload (no `data:` URI prefix); `mediaType` is the IANA media
+ * type (e.g. `image/png`, `application/pdf`). `name` is an optional original
+ * filename, used only for PDF `filename` passthrough and diagnostics.
+ */
+export interface UserAttachmentPart {
+ type: "attachment";
+ /** IANA media type, e.g. `image/png`, `image/jpeg`, `application/pdf`. */
+ mediaType: string;
+ /** Base64-encoded bytes WITHOUT a `data:` URI prefix. */
+ data: string;
+ /** Optional original filename (mainly for PDFs). */
+ name?: string;
+}
+
+/** One ordered part of a multimodal user message. */
+export type UserContentPart = UserTextPart | UserAttachmentPart;
+
// ─── Append-only chunk log (persisted model) ─────────────────────
//
// The DB stores a conversation as a flat stream of `ChunkRow`s (see
diff --git a/packages/core/tests/agent/agent.test.ts b/packages/core/tests/agent/agent.test.ts
index d8edec7..f4b33cc 100644
--- a/packages/core/tests/agent/agent.test.ts
+++ b/packages/core/tests/agent/agent.test.ts
@@ -1544,4 +1544,102 @@ describe("anthropicThinkingProviderOptions — adaptive-thinking model detection
effort: "xhigh",
});
});
+
+ describe("multimodal user content", () => {
+ it("emits ordered text + image parts to the model when content is provided", async () => {
+ vi.mocked(streamText).mockReturnValue(
+ makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]),
+ );
+
+ const agent = new Agent(makeConfig());
+ for await (const _ of agent.run("here is image A: [image]", {
+ content: [
+ { type: "text", text: "here is image A: " },
+ { type: "attachment", mediaType: "image/png", data: "QQ==" },
+ ],
+ })) {
+ // consume
+ }
+
+ const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0];
+ const messages = callArgs?.messages as Array<{ role: string; content: unknown }>;
+ const userMsg = messages.find((m) => m.role === "user");
+ expect(userMsg).toBeDefined();
+ // Multimodal turn → content is an ordered parts array, not a string.
+ expect(Array.isArray(userMsg?.content)).toBe(true);
+ const parts = userMsg?.content as Array<Record<string, unknown>>;
+ expect(parts[0]).toMatchObject({ type: "text", text: "here is image A: " });
+ expect(parts[1]).toMatchObject({ type: "image", mediaType: "image/png" });
+ expect(String(parts[1]?.image)).toBe("data:image/png;base64,QQ==");
+ });
+
+ it("emits a FilePart for a PDF attachment with its filename", async () => {
+ vi.mocked(streamText).mockReturnValue(
+ makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]),
+ );
+
+ const agent = new Agent(makeConfig());
+ for await (const _ of agent.run("see [pdf]", {
+ content: [
+ { type: "text", text: "see " },
+ { type: "attachment", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" },
+ ],
+ })) {
+ // consume
+ }
+
+ const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0];
+ const messages = callArgs?.messages as Array<{ role: string; content: unknown }>;
+ const userMsg = messages.find((m) => m.role === "user");
+ const parts = userMsg?.content as Array<Record<string, unknown>>;
+ const filePart = parts.find((p) => p.type === "file");
+ expect(filePart).toMatchObject({
+ type: "file",
+ mediaType: "application/pdf",
+ filename: "doc.pdf",
+ });
+ expect(String(filePart?.data)).toBe("data:application/pdf;base64,QQ==");
+ });
+
+ it("persists the user turn as text only (no content) for history", async () => {
+ vi.mocked(streamText).mockReturnValue(
+ makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]),
+ );
+
+ const agent = new Agent(makeConfig());
+ for await (const _ of agent.run("look: [image]", {
+ content: [
+ { type: "text", text: "look: " },
+ { type: "attachment", mediaType: "image/png", data: "QQ==" },
+ ],
+ })) {
+ // consume
+ }
+
+ // The in-memory user message keeps the text chunk for the render/persist
+ // path; the ephemeral `content` rides alongside it but isn't a chunk.
+ const userMsg = agent.messages.find((m) => m.role === "user");
+ expect(userMsg?.chunks).toEqual([{ type: "text", text: "look: [image]" }]);
+ });
+
+ it("falls back to a plain string when content has no attachment", async () => {
+ vi.mocked(streamText).mockReturnValue(
+ makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]),
+ );
+
+ const agent = new Agent(makeConfig());
+ for await (const _ of agent.run("plain text", {
+ content: [{ type: "text", text: "plain text" }],
+ })) {
+ // consume
+ }
+
+ const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0];
+ const messages = callArgs?.messages as Array<{ role: string; content: unknown }>;
+ const userMsg = messages.find((m) => m.role === "user");
+ // No attachment → plain string content (byte-identical to text-only path).
+ expect(typeof userMsg?.content).toBe("string");
+ expect(userMsg?.content).toBe("plain text");
+ });
+ });
});
diff --git a/packages/core/tests/models/attachments.test.ts b/packages/core/tests/models/attachments.test.ts
new file mode 100644
index 0000000..11a9f82
--- /dev/null
+++ b/packages/core/tests/models/attachments.test.ts
@@ -0,0 +1,136 @@
+import { describe, expect, it } from "vitest";
+import {
+ base64ByteLength,
+ isAcceptedAttachmentMediaType,
+ isImageMediaType,
+ isPdfMediaType,
+ MAX_ATTACHMENTS,
+ MAX_IMAGE_BYTES,
+ MAX_PDF_BYTES,
+ MAX_TOTAL_ATTACHMENT_BYTES,
+ validateUserContent,
+} from "../../src/models/attachments.js";
+import type { UserContentPart } from "../../src/types/index.js";
+
+/** A base64 string that decodes to exactly `bytes` bytes (no padding chars). */
+function base64OfBytes(bytes: number): string {
+ // 4 base64 chars → 3 bytes. Use a multiple of 3 for clean (unpadded) output.
+ const groups = Math.ceil(bytes / 3);
+ return "A".repeat(groups * 4);
+}
+
+function imagePart(data: string, mediaType = "image/png"): UserContentPart {
+ return { type: "attachment", mediaType, data };
+}
+
+describe("media-type predicates", () => {
+ it("classifies image types", () => {
+ expect(isImageMediaType("image/png")).toBe(true);
+ expect(isImageMediaType("image/jpeg")).toBe(true);
+ expect(isImageMediaType("image/webp")).toBe(true);
+ expect(isImageMediaType("image/gif")).toBe(true);
+ expect(isImageMediaType("application/pdf")).toBe(false);
+ expect(isImageMediaType("image/svg+xml")).toBe(false);
+ });
+
+ it("classifies pdf + accepted types", () => {
+ expect(isPdfMediaType("application/pdf")).toBe(true);
+ expect(isPdfMediaType("image/png")).toBe(false);
+ expect(isAcceptedAttachmentMediaType("image/gif")).toBe(true);
+ expect(isAcceptedAttachmentMediaType("application/pdf")).toBe(true);
+ expect(isAcceptedAttachmentMediaType("text/plain")).toBe(false);
+ });
+});
+
+describe("base64ByteLength", () => {
+ it("computes decoded length without padding", () => {
+ // "AAAA" → 3 bytes.
+ expect(base64ByteLength("AAAA")).toBe(3);
+ });
+
+ it("accounts for padding", () => {
+ // "QQ==" → 1 byte ("A").
+ expect(base64ByteLength("QQ==")).toBe(1);
+ // "QUI=" → 2 bytes ("AB").
+ expect(base64ByteLength("QUI=")).toBe(2);
+ });
+
+ it("tolerates a data: URI prefix and whitespace", () => {
+ expect(base64ByteLength("data:image/png;base64,AAAA")).toBe(3);
+ expect(base64ByteLength("AA\nAA")).toBe(3);
+ });
+
+ it("returns 0 for empty input", () => {
+ expect(base64ByteLength("")).toBe(0);
+ expect(base64ByteLength(" ")).toBe(0);
+ });
+});
+
+describe("validateUserContent", () => {
+ it("accepts a small image and ignores text parts", () => {
+ const content: UserContentPart[] = [
+ { type: "text", text: "hi" },
+ imagePart(base64OfBytes(1024)),
+ ];
+ expect(validateUserContent(content)).toEqual({ ok: true, errors: [] });
+ });
+
+ it("accepts an empty / text-only content list", () => {
+ expect(validateUserContent([]).ok).toBe(true);
+ expect(validateUserContent([{ type: "text", text: "no files" }]).ok).toBe(true);
+ });
+
+ it("rejects an unsupported media type", () => {
+ const res = validateUserContent([imagePart(base64OfBytes(10), "image/svg+xml")]);
+ expect(res.ok).toBe(false);
+ expect(res.errors[0]).toMatchObject({ code: "unsupported-type", mediaType: "image/svg+xml" });
+ });
+
+ it("rejects an oversized image but allows a PDF of the same size", () => {
+ const big = base64OfBytes(MAX_IMAGE_BYTES + 3);
+ const imgRes = validateUserContent([imagePart(big, "image/png")]);
+ expect(imgRes.ok).toBe(false);
+ expect(imgRes.errors.some((e) => e.code === "image-too-large")).toBe(true);
+
+ // Same byte size as a PDF is fine (PDF limit is much higher).
+ const pdfRes = validateUserContent([imagePart(big, "application/pdf")]);
+ expect(pdfRes.ok).toBe(true);
+ });
+
+ it("rejects an oversized PDF", () => {
+ const res = validateUserContent([
+ imagePart(base64OfBytes(MAX_PDF_BYTES + 3), "application/pdf"),
+ ]);
+ expect(res.ok).toBe(false);
+ expect(res.errors.some((e) => e.code === "pdf-too-large")).toBe(true);
+ });
+
+ it("rejects an empty attachment payload", () => {
+ const res = validateUserContent([imagePart("", "image/png")]);
+ expect(res.ok).toBe(false);
+ expect(res.errors.some((e) => e.code === "empty")).toBe(true);
+ });
+
+ it("rejects too many attachments", () => {
+ const content: UserContentPart[] = Array.from({ length: MAX_ATTACHMENTS + 1 }, () =>
+ imagePart(base64OfBytes(8)),
+ );
+ const res = validateUserContent(content);
+ expect(res.ok).toBe(false);
+ expect(res.errors.some((e) => e.code === "too-many")).toBe(true);
+ });
+
+ it("rejects when the total payload exceeds the request ceiling", () => {
+ // Several individually-legal PDFs that together exceed the total cap.
+ const each = Math.floor(MAX_TOTAL_ATTACHMENT_BYTES / 3);
+ const content: UserContentPart[] = [
+ imagePart(base64OfBytes(each), "application/pdf"),
+ imagePart(base64OfBytes(each), "application/pdf"),
+ imagePart(base64OfBytes(each), "application/pdf"),
+ imagePart(base64OfBytes(each), "application/pdf"),
+ ];
+ const res = validateUserContent(content);
+ expect(res.ok).toBe(false);
+ expect(res.errors.some((e) => e.code === "total-too-large")).toBe(true);
+ });
+});
diff --git a/packages/core/tests/models/catalog.test.ts b/packages/core/tests/models/catalog.test.ts
index 51043e6..f4bddc2 100644
--- a/packages/core/tests/models/catalog.test.ts
+++ b/packages/core/tests/models/catalog.test.ts
@@ -4,6 +4,7 @@ import {
__resetCatalogCacheForTests,
getModelsCatalog,
resolveContextLimit,
+ resolveModelCapabilities,
} from "../../src/models/catalog.js";
const CACHE_PATH = "/tmp/dispatch/models-dev.json";
@@ -13,14 +14,30 @@ const CATALOG = {
anthropic: {
id: "anthropic",
models: {
- "claude-sonnet-4-5": { limit: { context: 200000, output: 64000 } },
- "claude-sonnet-4-6": { limit: { context: 1000000, output: 64000 } },
+ "claude-sonnet-4-5": {
+ limit: { context: 200000, output: 64000 },
+ modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+ },
+ "claude-sonnet-4-6": {
+ limit: { context: 1000000, output: 64000 },
+ modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+ },
+ // A text-only model: definitively no image/pdf input.
+ "text-only-model": {
+ limit: { context: 100000, output: 8192 },
+ modalities: { input: ["text"], output: ["text"] },
+ },
+ // An entry predating the modalities field → capability unknown.
+ "legacy-model": { limit: { context: 100000, output: 8192 } },
},
},
opencode: {
id: "opencode",
models: {
- "glm-4-6": { limit: { context: 131072, output: 8192 } },
+ "glm-4-6": {
+ limit: { context: 131072, output: 8192 },
+ modalities: { input: ["text", "image"], output: ["text"] },
+ },
},
},
};
@@ -156,3 +173,55 @@ describe("getModelsCatalog caching", () => {
warn.mockRestore();
});
});
+
+describe("resolveModelCapabilities", () => {
+ it("reports image + pdf for a vision model", async () => {
+ mockFetchOnce(CATALOG);
+ expect(await resolveModelCapabilities("anthropic", "claude-sonnet-4-5")).toEqual({
+ image: true,
+ pdf: true,
+ });
+ });
+
+ it("reports image-only for a model whose modalities omit pdf", async () => {
+ mockFetchOnce(CATALOG);
+ // glm-4-6 lists image but not pdf (resolved via the opencode fallback).
+ expect(await resolveModelCapabilities("opencode-anthropic", "glm-4-6")).toEqual({
+ image: true,
+ pdf: false,
+ });
+ });
+
+ it("reports a definitive no for a text-only model", async () => {
+ mockFetchOnce(CATALOG);
+ expect(await resolveModelCapabilities("anthropic", "text-only-model")).toEqual({
+ image: false,
+ pdf: false,
+ });
+ });
+
+ it("returns null (unknown) for an entry without modalities", async () => {
+ mockFetchOnce(CATALOG);
+ expect(await resolveModelCapabilities("anthropic", "legacy-model")).toBeNull();
+ });
+
+ it("returns null (unknown) for an unknown model id", async () => {
+ mockFetchOnce(CATALOG);
+ expect(await resolveModelCapabilities("anthropic", "no-such-model")).toBeNull();
+ });
+
+ it("returns null for an unsupported provider without hitting the network", async () => {
+ const fetchFn = mockFetchOnce(CATALOG);
+ expect(await resolveModelCapabilities("google", "gemini-2.5-pro")).toBeNull();
+ expect(await resolveModelCapabilities("anthropic", "")).toBeNull();
+ expect(fetchFn).not.toHaveBeenCalled();
+ });
+
+ it("returns null (unknown) when the catalog is offline with no cache", async () => {
+ const fetchFn = vi.fn(() => Promise.reject(new Error("offline")));
+ vi.stubGlobal("fetch", fetchFn);
+ const warn = vi.spyOn(console, "warn").mockImplementation(() => {});
+ expect(await resolveModelCapabilities("anthropic", "claude-sonnet-4-5")).toBeNull();
+ warn.mockRestore();
+ });
+});
diff --git a/packages/core/tests/tools/key-usage.test.ts b/packages/core/tests/tools/key-usage.test.ts
new file mode 100644
index 0000000..643e30e
--- /dev/null
+++ b/packages/core/tests/tools/key-usage.test.ts
@@ -0,0 +1,317 @@
+import { describe, expect, it, vi } from "vitest";
+
+// The tool imports `getAccountUsageWithSource` from `claude.ts`, which
+// transitively imports `db/index.js` (top-level `import { Database } from
+// "bun:sqlite"`) — unresolvable under vitest's Node runtime. These tests inject
+// stub fetchers and never hit the real fetchers/DB, so stubbing the db module
+// is enough to let the import chain resolve.
+vi.mock("../../src/db/index.js", () => ({
+ getDatabase: vi.fn(() => {
+ throw new Error("db not available in this test");
+ }),
+}));
+
+import type { ClaudeAccount, ClaudeUsageResult } from "../../src/credentials/claude.js";
+import type { OpencodeUsageReport } from "../../src/credentials/opencode.js";
+import {
+ createKeyUsageTool,
+ formatKeyUsage,
+ type KeyUsageCallbacks,
+} from "../../src/tools/key-usage.js";
+import type { KeyDefinition, KeyState } from "../../src/types/index.js";
+
+// ─── Builders ─────────────────────────────────────────────────
+
+function keyState(
+ def: Partial<KeyDefinition> & { id: string; provider: string },
+ overrides: Partial<Omit<KeyState, "definition">> = {},
+): KeyState {
+ return {
+ definition: { base_url: "https://example.test", ...def },
+ status: "active",
+ ...overrides,
+ };
+}
+
+function account(id: string, source = `/creds/${id}.json`): ClaudeAccount {
+ return {
+ id,
+ label: id,
+ source,
+ credentials: { accessToken: "tok", refreshToken: "ref", expiresAt: Date.now() + 3_600_000 },
+ };
+}
+
+/** Build the tool with explicit stub fetchers — no network, no DB. */
+function buildTool(opts: {
+ keys: KeyState[];
+ accounts?: ClaudeAccount[];
+ anthropic?: (a: ClaudeAccount) => Promise<ClaudeUsageResult | null>;
+ opencode?: (keyId: string) => Promise<OpencodeUsageReport | null>;
+}) {
+ const callbacks: KeyUsageCallbacks = {
+ listKeys: () => opts.keys,
+ listClaudeAccounts: () => opts.accounts ?? [],
+ fetchAnthropicUsage: opts.anthropic ?? (async () => null),
+ fetchOpencodeUsage: opts.opencode ?? (async () => null),
+ };
+ return createKeyUsageTool(callbacks);
+}
+
+const HOUR = 3_600_000;
+
+describe("key_usage tool", () => {
+ it("reports all keys when no key_id is given", async () => {
+ const reset5h = Date.now() + 2 * HOUR;
+ const tool = buildTool({
+ keys: [
+ keyState({ id: "claude-max", provider: "anthropic", credentials_file: "/creds/max.json" }),
+ keyState({ id: "opencode-1", provider: "opencode-go" }),
+ ],
+ accounts: [account("claude-max", "/creds/max.json")],
+ anthropic: async () => ({
+ source: "live",
+ report: {
+ fiveHour: { utilization: 0.25, resetsAt: reset5h },
+ sevenDay: { utilization: 0.6 },
+ },
+ }),
+ opencode: async () => ({
+ fiveHour: { utilization: 0.1 },
+ weekly: { utilization: 0.4 },
+ monthly: { utilization: 0.7 },
+ }),
+ });
+
+ const out = await tool.execute({});
+
+ // Both keys present with providers.
+ expect(out).toContain("[claude-max] provider: anthropic");
+ expect(out).toContain("[opencode-1] provider: opencode-go");
+ // Remaining = (1 - utilization) * 100.
+ expect(out).toContain("5-hour: 75% remaining");
+ expect(out).toContain("week: 40% remaining");
+ expect(out).toContain("5-hour: 90% remaining");
+ expect(out).toContain("week: 60% remaining");
+ expect(out).toContain("month: 30% remaining");
+ expect(out).toContain("data: live (fetched just now)");
+ });
+
+ it("filters to a single key when key_id is given and does not fetch others", async () => {
+ const opencodeFetch = vi.fn(async () => ({ fiveHour: { utilization: 0.5 } }));
+ const tool = buildTool({
+ keys: [
+ keyState({ id: "claude-max", provider: "anthropic" }),
+ keyState({ id: "opencode-1", provider: "opencode-go" }),
+ ],
+ accounts: [account("claude-max")],
+ anthropic: async () => ({
+ source: "live",
+ report: { fiveHour: { utilization: 0.2 } },
+ }),
+ opencode: opencodeFetch,
+ });
+
+ const out = await tool.execute({ key_id: "claude-max" });
+
+ expect(out).toContain("[claude-max] provider: anthropic");
+ expect(out).not.toContain("opencode-1");
+ expect(opencodeFetch).not.toHaveBeenCalled();
+ });
+
+ it("returns a helpful error for an unknown key_id", async () => {
+ const tool = buildTool({
+ keys: [
+ keyState({ id: "claude-max", provider: "anthropic" }),
+ keyState({ id: "opencode-1", provider: "opencode-go" }),
+ ],
+ });
+
+ const out = await tool.execute({ key_id: "nope" });
+
+ expect(out).toContain('no key found with id "nope"');
+ expect(out).toContain("claude-max");
+ expect(out).toContain("opencode-1");
+ });
+
+ it("reports cached data with the source's last-fetched timestamp", async () => {
+ const cachedAt = Date.UTC(2025, 0, 2, 3, 4, 5);
+ const tool = buildTool({
+ keys: [keyState({ id: "claude-max", provider: "anthropic" })],
+ accounts: [account("claude-max")],
+ anthropic: async () => ({
+ source: "cache",
+ cachedAt,
+ report: { fiveHour: { utilization: 0.5 } },
+ }),
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("data: cached — last fetched from source 2025-01-02T03:04:05.000Z");
+ expect(out).toContain("5-hour: 50% remaining");
+ });
+
+ it("omits the month window for anthropic (no monthly bucket)", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "claude-max", provider: "anthropic" })],
+ accounts: [account("claude-max")],
+ anthropic: async () => ({
+ source: "live",
+ report: { fiveHour: { utilization: 0.1 }, sevenDay: { utilization: 0.2 } },
+ }),
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("5-hour:");
+ expect(out).toContain("week:");
+ expect(out).not.toContain("month:");
+ });
+
+ it("includes the month window for opencode-go", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "opencode-1", provider: "opencode-go" })],
+ opencode: async () => ({
+ fiveHour: { utilization: 0.1 },
+ weekly: { utilization: 0.2 },
+ monthly: { utilization: 0.3 },
+ }),
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("month: 70% remaining");
+ });
+
+ it("surfaces exhausted status with the last error", async () => {
+ const exhaustedAt = Date.now() - HOUR;
+ const tool = buildTool({
+ keys: [
+ keyState(
+ { id: "opencode-1", provider: "opencode-go" },
+ { status: "exhausted", lastError: "429 rate limit exceeded", exhaustedAt },
+ ),
+ ],
+ opencode: async () => null,
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("status: EXHAUSTED");
+ expect(out).toContain("last error: 429 rate limit exceeded");
+ });
+
+ it("flags providers without usage support", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "gem", provider: "google" })],
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("[gem] provider: google");
+ expect(out).toContain("not supported");
+ });
+
+ it("reports unavailable when a supported provider returns no usage", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "claude-max", provider: "anthropic" })],
+ accounts: [account("claude-max")],
+ anthropic: async () => null,
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("usage: unavailable");
+ expect(out).toContain("no cached usage");
+ });
+
+ it("reports unavailable for anthropic keys with no account credentials", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "claude-max", provider: "anthropic" })],
+ accounts: [],
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("no Claude account credentials available");
+ });
+
+ it("treats a fetcher that throws as unavailable (does not crash)", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "opencode-1", provider: "opencode-go" })],
+ opencode: async () => {
+ throw new Error("network down");
+ },
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("usage: unavailable");
+ });
+
+ it("reports when no keys are configured at all", async () => {
+ const tool = buildTool({ keys: [] });
+ const out = await tool.execute({});
+ expect(out).toBe("No API keys are configured.");
+ });
+
+ it("clamps out-of-range utilization to 0–100%", async () => {
+ const tool = buildTool({
+ keys: [keyState({ id: "opencode-1", provider: "opencode-go" })],
+ opencode: async () => ({
+ fiveHour: { utilization: 1.2 }, // over 100% used → 0% remaining
+ weekly: { utilization: -0.5 }, // negative → 100% remaining
+ }),
+ });
+
+ const out = await tool.execute({});
+
+ expect(out).toContain("5-hour: 0% remaining");
+ expect(out).toContain("week: 100% remaining");
+ });
+});
+
+describe("formatKeyUsage (pure)", () => {
+ const now = Date.UTC(2025, 5, 1, 12, 0, 0);
+
+ it("formats reset timestamps with ISO + relative time", () => {
+ const out = formatKeyUsage(
+ [
+ {
+ keyId: "claude-max",
+ provider: "anthropic",
+ status: "active",
+ dataSource: "live",
+ windows: [{ label: "5-hour", remainingPercent: 80, resetsAt: now + 90 * 60_000 }],
+ },
+ ],
+ now,
+ );
+
+ expect(out).toContain("5-hour: 80% remaining, resets 2025-06-01T13:30:00.000Z (in 1h 30m)");
+ });
+
+ it("renders a past reset/exhaustion time as 'ago'", () => {
+ const out = formatKeyUsage(
+ [
+ {
+ keyId: "opencode-1",
+ provider: "opencode-go",
+ status: "exhausted",
+ exhaustedAt: now - 2 * HOUR,
+ lastError: "boom",
+ windows: [],
+ },
+ ],
+ now,
+ );
+
+ expect(out).toContain("status: EXHAUSTED (since 2025-06-01T10:00:00.000Z, 2h ago)");
+ expect(out).toContain("last error: boom");
+ });
+
+ it("returns a friendly message when no entries match", () => {
+ expect(formatKeyUsage([], now)).toBe("No API keys matched.");
+ });
+});
diff --git a/packages/frontend/src/App.svelte b/packages/frontend/src/App.svelte
index 5f2b61f..405536c 100644
--- a/packages/frontend/src/App.svelte
+++ b/packages/frontend/src/App.svelte
@@ -131,6 +131,59 @@ $effect(() => {
})();
});
+// ─── Image / PDF capability lookup ─────────────────────────────
+// Resolve whether the active model accepts image/pdf INPUT from models.dev (via
+// the API), so the chat input can block sending an unsupported attachment
+// (no tokens spent) while staying permissive when the capability is unknown.
+// `null` = unknown (catalog offline / unsupported provider) → optimistic allow.
+let imageSupport = $state<{ image: boolean; pdf: boolean } | null>(null);
+const capabilityCache = new Map<string, { image: boolean; pdf: boolean } | null>();
+
+$effect(() => {
+ const tab = tabStore.activeTab;
+ const keyId = tab?.keyId ?? null;
+ const modelId = tab?.modelId ?? null;
+ const provider = keyId ? (modelsData.keys.find((k) => k.id === keyId)?.provider ?? null) : null;
+
+ if (!provider || !modelId) {
+ imageSupport = null;
+ return;
+ }
+
+ const cacheKey = `${provider}/${modelId}`;
+ if (capabilityCache.has(cacheKey)) {
+ imageSupport = capabilityCache.get(cacheKey) ?? null;
+ return;
+ }
+
+ // Clear immediately so a slow/failed fetch can't leave the PREVIOUS model's
+ // capability on screen (which could wrongly block/allow this model).
+ imageSupport = null;
+
+ void (async () => {
+ try {
+ const res = await fetch(
+ `${config.apiBase}/models/capabilities?provider=${encodeURIComponent(provider)}&modelId=${encodeURIComponent(modelId)}`,
+ );
+ if (!res.ok) return;
+ const data = (await res.json()) as {
+ capabilities?: { image: boolean; pdf: boolean } | null;
+ };
+ const caps = data.capabilities ?? null;
+ capabilityCache.set(cacheKey, caps);
+ const current = tabStore.activeTab;
+ const currentProvider = current?.keyId
+ ? (modelsData.keys.find((k) => k.id === current.keyId)?.provider ?? null)
+ : null;
+ if (currentProvider === provider && current?.modelId === modelId) {
+ imageSupport = caps;
+ }
+ } catch {
+ // Leave imageSupport as null (unknown → permissive) on network error.
+ }
+ })();
+});
+
onMount(() => {
// Apply persisted theme (or the shared DEFAULT_THEME if nothing is
// stored) so the first paint matches what the Settings panel will
@@ -174,7 +227,7 @@ onMount(() => {
<div class="flex-1 overflow-hidden">
<ChatPanel />
</div>
- <ChatInput {contextLimit} />
+ <ChatInput {contextLimit} {imageSupport} />
</div>
<!-- Right sidebar: overlay on small screens, inline on large -->
diff --git a/packages/frontend/src/lib/attachment-tokens.ts b/packages/frontend/src/lib/attachment-tokens.ts
new file mode 100644
index 0000000..79d4cbc
--- /dev/null
+++ b/packages/frontend/src/lib/attachment-tokens.ts
@@ -0,0 +1,234 @@
+// Inline attachment tokens for the chat input.
+//
+// A pasted image/PDF is represented in the textarea draft as an inline TOKEN
+// (e.g. `【image:a1b2c3】`). The token is ordinary text living inside the draft,
+// so attachments have ORDER relative to typed text and to each other, and the
+// user can reference them positionally ("here is image A: 【image:…】"). The
+// token is also the ONLY handle on an attachment — deleting it (atomic delete,
+// below) detaches the underlying file. There is no separate preview strip.
+//
+// This module is pure (no DOM, no Svelte) so it can be unit-tested directly.
+
+import type { UserContentPart } from "@dispatch/core/src/types/index.js";
+
+export type AttachmentKind = "image" | "pdf";
+
+/** A staged attachment, keyed by its short token id. */
+export interface StagedAttachment {
+ id: string;
+ kind: AttachmentKind;
+ /** IANA media type, e.g. `image/png`, `application/pdf`. */
+ mediaType: string;
+ /** Base64 payload WITHOUT a `data:` URI prefix. */
+ data: string;
+ /** Optional original filename (used for PDFs). */
+ name?: string;
+}
+
+/**
+ * Token grammar: `【<kind>:<id>】` where kind ∈ {image,pdf} and id is 6
+ * lowercase alphanumerics. The CJK corner brackets (U+3010/U+3011) are used as
+ * delimiters because they're visually distinct and virtually never typed by
+ * hand, so a token won't collide with normal prose.
+ */
+export const ATTACHMENT_TOKEN_RE = /【(image|pdf):([a-z0-9]{6})】/g;
+
+/** Build the inline token string for a staged attachment id + kind. */
+export function makeAttachmentToken(kind: AttachmentKind, id: string): string {
+ return `【${kind}:${id}】`;
+}
+
+/** Generate a short, URL-safe token id (6 lowercase alphanumerics). */
+export function generateTokenId(): string {
+ let out = "";
+ const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
+ // crypto.getRandomValues is available in browsers and modern Node/Bun.
+ const cryptoObj = (globalThis as { crypto?: Crypto }).crypto;
+ if (cryptoObj?.getRandomValues) {
+ const buf = new Uint32Array(6);
+ cryptoObj.getRandomValues(buf);
+ for (let i = 0; i < 6; i++) out += alphabet[(buf[i] ?? 0) % alphabet.length];
+ return out;
+ }
+ for (let i = 0; i < 6; i++) out += alphabet[Math.floor(Math.random() * alphabet.length)];
+ return out;
+}
+
+export interface FoundToken {
+ id: string;
+ kind: AttachmentKind;
+ /** Inclusive start index of the token within the text. */
+ start: number;
+ /** Exclusive end index of the token within the text. */
+ end: number;
+}
+
+/** Find all attachment tokens in `text`, in order of appearance. */
+export function findTokens(text: string): FoundToken[] {
+ const out: FoundToken[] = [];
+ // Fresh regex per call so `lastIndex` state never leaks between calls.
+ const re = new RegExp(ATTACHMENT_TOKEN_RE.source, "g");
+ let m: RegExpExecArray | null = re.exec(text);
+ while (m !== null) {
+ out.push({
+ kind: m[1] as AttachmentKind,
+ id: m[2] ?? "",
+ start: m.index,
+ end: m.index + m[0].length,
+ });
+ m = re.exec(text);
+ }
+ return out;
+}
+
+/** The set of attachment ids whose token is still intact in `text`. */
+export function intactTokenIds(text: string): Set<string> {
+ return new Set(findTokens(text).map((t) => t.id));
+}
+
+export interface DeletionResult {
+ /** Text after the deletion. */
+ text: string;
+ /** New caret position (collapsed) after the deletion. */
+ caret: number;
+ /** Ids of attachments whose tokens were removed by this deletion. */
+ removedIds: string[];
+}
+
+/**
+ * Compute the result of a Backspace/Delete keystroke when it interacts with an
+ * attachment token, so a token deletes ATOMICALLY (one keystroke removes the
+ * whole `【…】`, never a single bracket). Returns `null` when the keystroke does
+ * NOT touch a token — the caller should then let the browser's default editing
+ * behaviour run.
+ *
+ * Rules:
+ * - Range selection (`selStart !== selEnd`): expand the range to fully cover
+ * any token it overlaps, then delete the expanded range. Only acts when at
+ * least one token actually overlaps (otherwise returns null).
+ * - Collapsed + Backspace: if a token ends exactly at the caret, delete it.
+ * - Collapsed + Delete: if a token starts exactly at the caret, delete it.
+ */
+export function computeTokenDeletion(
+ text: string,
+ selStart: number,
+ selEnd: number,
+ key: "Backspace" | "Delete",
+): DeletionResult | null {
+ const tokens = findTokens(text);
+ if (tokens.length === 0) return null;
+
+ if (selStart !== selEnd) {
+ const lo = Math.min(selStart, selEnd);
+ const hi = Math.max(selStart, selEnd);
+ const overlapping = tokens.filter((t) => t.start < hi && t.end > lo);
+ if (overlapping.length === 0) return null;
+ const delStart = Math.min(lo, ...overlapping.map((t) => t.start));
+ const delEnd = Math.max(hi, ...overlapping.map((t) => t.end));
+ return {
+ text: text.slice(0, delStart) + text.slice(delEnd),
+ caret: delStart,
+ removedIds: overlapping.map((t) => t.id),
+ };
+ }
+
+ // Collapsed caret.
+ if (key === "Backspace") {
+ const tok = tokens.find((t) => t.end === selStart);
+ if (!tok) return null;
+ return {
+ text: text.slice(0, tok.start) + text.slice(tok.end),
+ caret: tok.start,
+ removedIds: [tok.id],
+ };
+ }
+ // Delete (forward).
+ const tok = tokens.find((t) => t.start === selStart);
+ if (!tok) return null;
+ return {
+ text: text.slice(0, tok.start) + text.slice(tok.end),
+ caret: tok.start,
+ removedIds: [tok.id],
+ };
+}
+
+/** Human-readable marker that replaces a token in persisted/display text. */
+export function markerFor(kind: AttachmentKind): string {
+ return kind === "pdf" ? "[pdf]" : "[image]";
+}
+
+export interface ParsedDraft {
+ /**
+ * Text-only projection of the draft with each attachment token replaced by a
+ * `[image]` / `[pdf]` marker. This is what gets persisted and rendered in the
+ * chat history (the raw bytes are never stored).
+ */
+ displayText: string;
+ /**
+ * Ordered multimodal content (interleaved text + attachment parts) to send to
+ * the model, or `null` when the draft has no intact attachment token (the
+ * caller then sends plain text).
+ */
+ content: UserContentPart[] | null;
+}
+
+/**
+ * Split a draft (text containing attachment tokens) plus the staged-attachment
+ * map into:
+ * - `displayText`: tokens swapped for `[image]`/`[pdf]` markers, and
+ * - `content`: an ordered `UserContentPart[]` interleaving the surrounding text
+ * with the matching attachment parts.
+ *
+ * A token whose id has no matching staged attachment (e.g. a stray paste of the
+ * token text, or a detached attachment) is treated as plain text in BOTH
+ * outputs — its marker still appears in `displayText`, but it contributes no
+ * attachment part. `content` is `null` when no attachment part is produced.
+ */
+export function parseDraft(draft: string, attachments: Map<string, StagedAttachment>): ParsedDraft {
+ const tokens = findTokens(draft);
+ let displayText = "";
+ const content: UserContentPart[] = [];
+ let textBuf = "";
+ let cursor = 0;
+ let producedAttachment = false;
+
+ const flushText = () => {
+ if (textBuf.length > 0) {
+ content.push({ type: "text", text: textBuf });
+ textBuf = "";
+ }
+ };
+
+ for (const tok of tokens) {
+ const between = draft.slice(cursor, tok.start);
+ textBuf += between;
+ displayText += between;
+ const att = attachments.get(tok.id);
+ if (att) {
+ // displayText (persisted/rendered) gets a `[image]`/`[pdf]` marker;
+ // the multimodal content gets the ACTUAL attachment part instead — no
+ // marker text, since the part itself represents the file to the model.
+ displayText += markerFor(tok.kind);
+ flushText();
+ content.push({
+ type: "attachment",
+ mediaType: att.mediaType,
+ data: att.data,
+ ...(att.name ? { name: att.name } : {}),
+ });
+ producedAttachment = true;
+ } else {
+ // Orphan token (no staged attachment) → keep the marker as plain text
+ // in BOTH outputs; it contributes no attachment part.
+ displayText += markerFor(tok.kind);
+ textBuf += markerFor(tok.kind);
+ }
+ cursor = tok.end;
+ }
+ const tail = draft.slice(cursor);
+ textBuf += tail;
+ displayText += tail;
+ flushText();
+
+ return { displayText, content: producedAttachment ? content : null };
+}
diff --git a/packages/frontend/src/lib/components/ChatInput.svelte b/packages/frontend/src/lib/components/ChatInput.svelte
index f954be8..f3eadf7 100644
--- a/packages/frontend/src/lib/components/ChatInput.svelte
+++ b/packages/frontend/src/lib/components/ChatInput.svelte
@@ -1,12 +1,40 @@
<script lang="ts">
+import {
+ ACCEPTED_PDF_MEDIA_TYPE,
+ isImageMediaType,
+ isPdfMediaType,
+ MAX_ATTACHMENTS,
+ MAX_IMAGE_BYTES,
+ MAX_PDF_BYTES,
+} from "@dispatch/core/src/models/attachments.js";
+import {
+ type AttachmentKind,
+ computeTokenDeletion,
+ generateTokenId,
+ makeAttachmentToken,
+ parseDraft,
+ type StagedAttachment,
+} from "../attachment-tokens.js";
import { computeContextUsage } from "../context-window.js";
import { tabStore } from "../tabs.svelte.js";
-const { contextLimit = null }: { contextLimit?: number | null } = $props();
+const {
+ contextLimit = null,
+ imageSupport = null,
+}: {
+ contextLimit?: number | null;
+ // Image/PDF INPUT capability for the active model, or `null` when unknown
+ // (catalog offline / unsupported provider) — null means "can't verify"
+ // (optimistic allow), not a hard no.
+ imageSupport?: { image: boolean; pdf: boolean } | null;
+} = $props();
const MAX_LINES = 7;
let inputEl: HTMLTextAreaElement | undefined;
+// Transient error shown when a paste is rejected (bad type / too large / too
+// many). Cleared on the next successful paste or any keystroke.
+let pasteError = $state<string | null>(null);
const agentStatus = $derived(tabStore.activeTab?.agentStatus ?? "idle");
const tabId = $derived(tabStore.activeTab?.id ?? "");
@@ -14,6 +42,7 @@ const tabId = $derived(tabStore.activeTab?.id ?? "");
// switching tabs saves the current draft and restores the target tab's text
// automatically — drafts are never lost or clobbered by tab switching.
const inputValue = $derived(tabStore.activeTab?.draft ?? "");
+const attachments = $derived(tabStore.activeTab?.attachments ?? []);
const cacheStats = $derived(tabStore.activeTab?.cacheStats ?? null);
const isRunning = $derived(agentStatus === "running");
@@ -25,9 +54,42 @@ const compactLocked = $derived(
(tabStore.activeTab?.compactionError ?? null) !== null,
);
const hasText = $derived(inputValue.trim().length > 0);
+const hasAttachments = $derived(attachments.length > 0);
// While generating with an empty box, the primary action is "stop". With text
// in the box, it stays "send" (the message is queued behind the live turn).
-const showStop = $derived(isRunning && !hasText);
+const showStop = $derived(isRunning && !hasText && !hasAttachments);
+
+// ─── Attachment capability gating ──────────────────────────────
+// A definitive "no" from the catalog (imageSupport.image === false with an
+// image staged, or .pdf === false with a pdf staged) blocks the send so no
+// tokens are spent. Unknown capability (imageSupport === null) is permissive.
+const hasImageAttachment = $derived(attachments.some((a) => a.kind === "image"));
+const hasPdfAttachment = $derived(attachments.some((a) => a.kind === "pdf"));
+const imageBlocked = $derived(
+ hasImageAttachment && imageSupport !== null && imageSupport.image === false,
+);
+const pdfBlocked = $derived(
+ hasPdfAttachment && imageSupport !== null && imageSupport.pdf === false,
+);
+// Attachments require a fresh turn — they can't ride the queue path (which is
+// text-only), so block sending an attachment while the agent is generating.
+const attachmentsWhileRunning = $derived(hasAttachments && isRunning);
+
+const attachmentWarning = $derived.by(() => {
+ if (pasteError) return pasteError;
+ if (attachmentsWhileRunning)
+ return "Wait for the current response to finish before sending images.";
+ if (imageBlocked && pdfBlocked)
+ return "The selected model doesn't support image or PDF input. Remove the attachments to send.";
+ if (imageBlocked)
+ return "The selected model doesn't support image input. Remove the image to send.";
+ if (pdfBlocked) return "The selected model doesn't support PDF input. Remove the PDF to send.";
+ return null;
+});
+
+// Send is blocked (but not the box) when an attachment is definitively
+// unsupported or when attachments are staged mid-generation.
+const sendBlocked = $derived(imageBlocked || pdfBlocked || attachmentsWhileRunning);
const usage = $derived(computeContextUsage(cacheStats, contextLimit));
const hasUsage = $derived((cacheStats?.last ?? null) !== null);
@@ -84,22 +146,155 @@ $effect(() => {
function handleInput(e: Event) {
if (!tabId) return;
+ pasteError = null;
+ // setDraft also reconciles staged attachments against the surviving tokens,
+ // so deleting a token (by any means) detaches its attachment.
tabStore.setDraft(tabId, (e.currentTarget as HTMLTextAreaElement).value);
}
+function kindForMediaType(mediaType: string): AttachmentKind | null {
+ if (isImageMediaType(mediaType)) return "image";
+ if (isPdfMediaType(mediaType)) return "pdf";
+ return null;
+}
+
+function readAsBase64(file: File): Promise<string> {
+ return new Promise((resolve, reject) => {
+ const reader = new FileReader();
+ reader.onload = () => {
+ const result = reader.result;
+ if (typeof result !== "string") {
+ reject(new Error("unexpected reader result"));
+ return;
+ }
+ // Strip the `data:<mediaType>;base64,` prefix → bare base64.
+ const comma = result.indexOf(",");
+ resolve(comma === -1 ? result : result.slice(comma + 1));
+ };
+ reader.onerror = () => reject(reader.error ?? new Error("read failed"));
+ reader.readAsDataURL(file);
+ });
+}
+
+/** Insert `insert` at the textarea's caret, returning the new caret offset. */
+function insertAtCaret(insert: string): number {
+ const el = inputEl;
+ const text = inputValue;
+ const start = el?.selectionStart ?? text.length;
+ const end = el?.selectionEnd ?? text.length;
+ const next = text.slice(0, start) + insert + text.slice(end);
+ if (tabId) tabStore.setDraft(tabId, next);
+ return start + insert.length;
+}
+
+async function handlePaste(e: ClipboardEvent) {
+ if (!tabId) return;
+ const items = e.clipboardData?.items;
+ if (!items) return;
+ const files: File[] = [];
+ for (const item of items) {
+ if (item.kind === "file") {
+ const file = item.getAsFile();
+ if (file) files.push(file);
+ }
+ }
+ // No files in the clipboard → let the default text paste happen.
+ if (files.length === 0) return;
+ // We're handling at least one file; stop the browser from also pasting a
+ // filename / image fallback into the textarea.
+ e.preventDefault();
+ pasteError = null;
+
+ for (const file of files) {
+ const kind = kindForMediaType(file.type);
+ if (!kind) {
+ pasteError = `Unsupported file type: ${file.type || "unknown"}. Allowed: PNG, JPEG, WebP, GIF, PDF.`;
+ continue;
+ }
+ const current = tabStore.activeTab?.attachments ?? [];
+ if (current.length >= MAX_ATTACHMENTS) {
+ pasteError = `You can attach at most ${MAX_ATTACHMENTS} files per message.`;
+ break;
+ }
+ const limit = kind === "pdf" ? MAX_PDF_BYTES : MAX_IMAGE_BYTES;
+ if (file.size > limit) {
+ const mb = Math.round(limit / (1024 * 1024));
+ pasteError = `${kind === "pdf" ? "PDF" : "Image"} is too large (max ${mb} MB).`;
+ continue;
+ }
+ try {
+ const data = await readAsBase64(file);
+ const id = generateTokenId();
+ const mediaType = kind === "pdf" ? ACCEPTED_PDF_MEDIA_TYPE : file.type;
+ const staged: StagedAttachment = {
+ id,
+ kind,
+ mediaType,
+ data,
+ ...(file.name ? { name: file.name } : {}),
+ };
+ // Stage first, then insert the token — `setDraft` reconciles against
+ // staged attachments, so the attachment must exist before its token
+ // appears in the draft.
+ tabStore.addAttachment(tabId, staged);
+ const caret = insertAtCaret(makeAttachmentToken(kind, id));
+ // Restore the caret after the value updates.
+ requestAnimationFrame(() => {
+ const el = inputEl;
+ if (el) {
+ el.focus();
+ el.setSelectionRange(caret, caret);
+ }
+ });
+ } catch {
+ pasteError = "Failed to read the pasted file.";
+ }
+ }
+}
+
function handleKeydown(e: KeyboardEvent) {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
submit();
+ return;
+ }
+ if ((e.key === "Backspace" || e.key === "Delete") && inputEl && tabId) {
+ // Atomic token delete: a single Backspace/Delete next to (or a selection
+ // overlapping) a `【…】` token removes the whole token in one stroke.
+ const result = computeTokenDeletion(
+ inputValue,
+ inputEl.selectionStart ?? 0,
+ inputEl.selectionEnd ?? 0,
+ e.key,
+ );
+ if (result) {
+ e.preventDefault();
+ tabStore.setDraft(tabId, result.text);
+ requestAnimationFrame(() => {
+ const el = inputEl;
+ if (el) {
+ el.focus();
+ el.setSelectionRange(result.caret, result.caret);
+ }
+ });
+ }
}
}
function submit() {
+ if (!tabId) return;
+ // Block sending while this tab is mid-compaction (source or placeholder).
if (compactLocked) return;
- const text = inputValue.trim();
- if (!text) return;
- if (tabId) tabStore.setDraft(tabId, "");
- tabStore.sendMessage(text);
+ const map = new Map(attachments.map((a) => [a.id, a] as const));
+ const { displayText, content } = parseDraft(inputValue, map);
+ const trimmed = displayText.trim();
+ // Nothing to send (no text and no usable attachment).
+ if (!trimmed && !content) return;
+ // Don't send when a staged attachment is unsupported / mid-generation.
+ if (sendBlocked) return;
+ const text = trimmed || displayText;
+ tabStore.setDraft(tabId, "");
+ void tabStore.sendMessage(text, content ?? undefined);
}
function primaryAction() {
@@ -112,26 +307,39 @@ function primaryAction() {
</script>
<div class="flex flex-col">
+ {#if attachmentWarning}
+ <div class="px-3 pt-2 text-xs text-warning flex items-start gap-1">
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="w-3.5 h-3.5 mt-0.5 shrink-0" aria-hidden="true">
+ <path d="M10.29 3.86 1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z"></path>
+ <line x1="12" y1="9" x2="12" y2="13"></line>
+ <line x1="12" y1="17" x2="12.01" y2="17"></line>
+ </svg>
+ <span>{attachmentWarning}</span>
+ </div>
+ {/if}
<!-- Top bar: expanding textarea + send/stop action -->
<div class="flex items-end gap-2 px-3 pt-3 pb-2">
<textarea
bind:this={inputEl}
value={inputValue}
rows="1"
- placeholder={compactLocked ? "Compaction in progress…" : "Type a message..."}
+ placeholder={compactLocked
+ ? "Compaction in progress…"
+ : "Type a message... (paste an image or PDF to attach)"}
disabled={compactLocked}
class="textarea textarea-ghost flex-1 resize-none leading-normal !min-h-0 h-auto"
onkeydown={handleKeydown}
oninput={handleInput}
+ onpaste={handlePaste}
></textarea>
<!-- Single fixed-width button across all states so the layout never
shifts when it morphs between Send and Stop. -->
<button
type="button"
class="btn w-20 shrink-0 {showStop ? 'btn-error btn-outline' : 'btn-primary'}"
- disabled={compactLocked || (!showStop && !hasText)}
+ disabled={compactLocked || (!showStop && !hasText && !hasAttachments) || sendBlocked}
onclick={primaryAction}
- title={showStop ? "Stop generation" : "Send message"}
+ title={showStop ? "Stop generation" : sendBlocked ? (attachmentWarning ?? "Cannot send") : "Send message"}
>
{#if showStop}
<span class="loading loading-spinner loading-sm"></span>
diff --git a/packages/frontend/src/lib/components/TabBar.svelte b/packages/frontend/src/lib/components/TabBar.svelte
index 354260c..7371f7b 100644
--- a/packages/frontend/src/lib/components/TabBar.svelte
+++ b/packages/frontend/src/lib/components/TabBar.svelte
@@ -1,5 +1,6 @@
<script lang="ts">
import { tick } from "svelte";
+import type { Tab } from "../tabs.svelte.js";
import { tabStore } from "../tabs.svelte.js";
function statusColor(status: string): string {
@@ -8,6 +9,21 @@ function statusColor(status: string): string {
return "bg-success";
}
+/**
+ * A tab "needs attention" — and should ping to grab the user's eye — when the
+ * agent has stopped and is likely waiting on the user:
+ * (a) the turn ended (idle) but the task list still has incomplete tasks
+ * (pending / in_progress) — the agent probably expects a response; or
+ * (b) the turn stopped due to an error of any kind.
+ */
+function needsAttention(tab: Tab): boolean {
+ if (tab.agentStatus === "error") return true;
+ if (tab.agentStatus === "idle") {
+ return tab.tasks.some((t) => t.status === "pending" || t.status === "in_progress");
+ }
+ return false;
+}
+
const userTabs = $derived(tabStore.tabs.filter((t) => t.parentTabId === null));
const subagentTabs = $derived(
tabStore.tabs.filter((t) => t.parentTabId !== null && t.parentTabId === activeUserTabId),
@@ -123,7 +139,14 @@ function handleRenameKeydown(e: KeyboardEvent): void {
tabindex="0"
>
<span class="flex items-center gap-1.5">
- <span class="w-1.5 h-1.5 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span>
+ {#if needsAttention(tab)}
+ <span class="relative inline-grid shrink-0 *:[grid-area:1/1]">
+ <span class="w-1.5 h-1.5 rounded-full animate-ping {statusColor(tab.agentStatus)}"></span>
+ <span class="w-1.5 h-1.5 rounded-full {statusColor(tab.agentStatus)}"></span>
+ </span>
+ {:else}
+ <span class="w-1.5 h-1.5 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span>
+ {/if}
<span class="font-mono text-[10px] px-1 py-0.5 rounded bg-base-300 text-base-content/60 shrink-0" title="Tab ID — agents address this tab by this handle">{tabStore.shortHandleFor(tab.id)}</span>
{#if editingTabId === tab.id}
<input
@@ -183,7 +206,14 @@ function handleRenameKeydown(e: KeyboardEvent): void {
tabindex="0"
>
<span class="flex items-center gap-1">
- <span class="w-1 h-1 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span>
+ {#if needsAttention(tab)}
+ <span class="relative inline-grid shrink-0 *:[grid-area:1/1]">
+ <span class="w-1 h-1 rounded-full animate-ping {statusColor(tab.agentStatus)}"></span>
+ <span class="w-1 h-1 rounded-full {statusColor(tab.agentStatus)}"></span>
+ </span>
+ {:else}
+ <span class="w-1 h-1 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span>
+ {/if}
<span class="font-mono text-[10px] px-1 rounded bg-base-300 text-base-content/60 shrink-0" title="Tab ID — agents address this tab by this handle">{tabStore.shortHandleFor(tab.id)}</span>
<span class="max-w-28 truncate text-xs">{tab.title}</span>
</span>
diff --git a/packages/frontend/src/lib/components/ToolPermissions.svelte b/packages/frontend/src/lib/components/ToolPermissions.svelte
index 6b09a07..4298724 100644
--- a/packages/frontend/src/lib/components/ToolPermissions.svelte
+++ b/packages/frontend/src/lib/components/ToolPermissions.svelte
@@ -53,6 +53,12 @@ const toolPermissions: ToolPermission[] = [
description: "Allow the AI to search the codebase with the cs ranked code-search engine",
},
{
+ id: "key_usage",
+ label: "Key usage",
+ description:
+ "Allow the AI to read current API-key usage levels, rate-limit headroom, and reset times",
+ },
+ {
id: "lsp",
label: "LSP queries",
description:
diff --git a/packages/frontend/src/lib/settings.svelte.ts b/packages/frontend/src/lib/settings.svelte.ts
index 0da4e45..1b93804 100644
--- a/packages/frontend/src/lib/settings.svelte.ts
+++ b/packages/frontend/src/lib/settings.svelte.ts
@@ -15,6 +15,7 @@ let toolPerms = $state<Record<string, boolean>>({
web_search: false,
youtube_transcribe: false,
search_code: false,
+ key_usage: false,
lsp: false,
});
let savedToolPerms = $state<Record<string, boolean>>({
@@ -29,6 +30,7 @@ let savedToolPerms = $state<Record<string, boolean>>({
web_search: false,
youtube_transcribe: false,
search_code: false,
+ key_usage: false,
lsp: false,
});
let skillChecks = $state<Record<string, boolean>>({});
diff --git a/packages/frontend/src/lib/tabs.svelte.ts b/packages/frontend/src/lib/tabs.svelte.ts
index 3edd1e3..90e1cee 100644
--- a/packages/frontend/src/lib/tabs.svelte.ts
+++ b/packages/frontend/src/lib/tabs.svelte.ts
@@ -11,13 +11,14 @@ import {
// DB-free; safe in the browser bundle. The flat chunk log is the frontend's
// source of truth for HISTORY; `groupRowsToMessages` derives render bubbles.
import { groupRowsToMessages, type MessageRow } from "@dispatch/core/src/chunks/transform.js";
-import type { ChunkRow } from "@dispatch/core/src/types/index.js";
+import type { ChunkRow, UserContentPart } from "@dispatch/core/src/types/index.js";
import {
type AgentModelEntry,
DEFAULT_REASONING_EFFORT,
isReasoningEffort,
type ReasoningEffort,
} from "@dispatch/core/src/types/index.js";
+import { intactTokenIds, type StagedAttachment } from "./attachment-tokens.js";
import { config } from "./config.js";
import { appSettings } from "./settings.svelte.js";
import type {
@@ -183,6 +184,13 @@ export interface Tab {
*/
draft: string;
/**
+ * Staged image/PDF attachments for THIS tab's unsent draft (in-memory only —
+ * never persisted). Each corresponds to an inline `【image:…】`/`【pdf:…】`
+ * token in `draft`; removing the token detaches the attachment (reconciled on
+ * every keystroke). Ephemeral: sent to the model for one turn, then cleared.
+ */
+ attachments: StagedAttachment[];
+ /**
* True once the user has manually renamed this tab (double-click rename).
* Suppresses the first-message auto-title so a chosen name is never
* clobbered. In-memory only — a renamed tab is no longer "New Tab" on
@@ -322,6 +330,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: null,
totalChunks: 0,
@@ -402,6 +411,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: win.oldestSeq,
totalChunks: win.total,
@@ -506,8 +516,31 @@ export function createTabStore() {
* target tab shows its own text. No-op if the tab is gone.
*/
function setDraft(id: string, text: string): void {
- if (!getTabById(id)) return;
- updateTab(id, { draft: text });
+ const tab = getTabById(id);
+ if (!tab) return;
+ // Detach any staged attachment whose inline token is no longer intact in
+ // the new draft text (covers atomic-delete, manual mid-token edits, cut,
+ // select-all-delete, etc.). The token in the textarea is the ONLY handle
+ // on an attachment, so reconciling here keeps the two in lockstep.
+ const intact = intactTokenIds(text);
+ const keep = tab.attachments.filter((a) => intact.has(a.id));
+ if (keep.length !== tab.attachments.length) {
+ updateTab(id, { draft: text, attachments: keep });
+ } else {
+ updateTab(id, { draft: text });
+ }
+ }
+
+ /**
+ * Stage a pasted attachment on a tab. The caller is responsible for also
+ * inserting the matching `【image:…】`/`【pdf:…】` token into the draft (the
+ * token is what keeps the attachment alive through reconciliation). No-op if
+ * the tab is gone.
+ */
+ function addAttachment(id: string, attachment: StagedAttachment): void {
+ const tab = getTabById(id);
+ if (!tab) return;
+ updateTab(id, { attachments: [...tab.attachments, attachment] });
}
/**
@@ -942,6 +975,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: win.oldestSeq,
totalChunks: win.total,
@@ -1011,6 +1045,7 @@ export function createTabStore() {
manualTitle: true,
oldestLoadedSeq: null,
totalChunks: 0,
+ attachments: [],
compactingSource: sourceTabId,
isCompacting: false,
compactionError: null,
@@ -1084,6 +1119,7 @@ export function createTabStore() {
manualTitle: true,
oldestLoadedSeq: win.oldestSeq,
totalChunks: win.total,
+ attachments: [],
compactingSource: null,
isCompacting: false,
compactionError: null,
@@ -1436,6 +1472,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: null,
totalChunks: 0,
@@ -1798,7 +1835,7 @@ export function createTabStore() {
}
}
- async function sendMessage(text: string): Promise<void> {
+ async function sendMessage(text: string, content?: UserContentPart[]): Promise<void> {
let tab = getActiveTab();
if (!tab) return;
@@ -1809,8 +1846,11 @@ export function createTabStore() {
if (!tab) return;
}
- // Fetch content for checked skills and build the message to send
- let messageToSend = text;
+ // Fetch content for checked skills and build the message to send.
+ // `skillPrefix` (when non-empty) is prepended to BOTH the text projection
+ // that gets persisted/rendered AND the multimodal content array, so an
+ // image turn still carries the activated skills to the model.
+ let skillPrefix = "";
const checkedKeys = Object.entries(appSettings.skillChecks)
.filter(([, v]) => v)
.map(([k]) => k);
@@ -1821,13 +1861,13 @@ export function createTabStore() {
const [scope, ...nameParts] = key.split(":");
const name = nameParts.join(":");
if (!scope || !name) continue;
- const content = await fetchSkillContent(scope, name);
- if (content) {
- skillSections.push(`<skill name="${name}">\n${content}\n</skill>`);
+ const skillContent = await fetchSkillContent(scope, name);
+ if (skillContent) {
+ skillSections.push(`<skill name="${name}">\n${skillContent}\n</skill>`);
}
}
if (skillSections.length > 0) {
- messageToSend = `[The following skills have been activated for this message]\n\n${skillSections.join("\n\n")}\n\n---\n\n${text}`;
+ skillPrefix = `[The following skills have been activated for this message]\n\n${skillSections.join("\n\n")}\n\n---\n\n`;
}
// Track injected skills on the tab
@@ -1838,6 +1878,12 @@ export function createTabStore() {
appSettings.skillChecks = {};
}
+ const messageToSend = `${skillPrefix}${text}`;
+ // Prepend the skill prefix to the multimodal content as a leading text
+ // part so the model sees the activated skills before the attachments.
+ const contentToSend =
+ content && skillPrefix ? [{ type: "text" as const, text: skillPrefix }, ...content] : content;
+
const userMsg: ChatMessage = {
id: generateId(),
role: "user",
@@ -1914,6 +1960,7 @@ export function createTabStore() {
body: JSON.stringify({
tabId: tab.id,
message: messageToSend,
+ ...(contentToSend ? { content: contentToSend } : {}),
...(tab.keyId ? { keyId: tab.keyId } : {}),
...(tab.modelId ? { modelId: tab.modelId } : {}),
...(tab.agentModels ? { agentModels: tab.agentModels } : {}),
@@ -2312,6 +2359,7 @@ export function createTabStore() {
renameTab,
reorderTabs,
setDraft,
+ addAttachment,
sendMessage,
cancelQueuedMessage,
stopGeneration,
diff --git a/packages/frontend/tests/attachment-tokens.test.ts b/packages/frontend/tests/attachment-tokens.test.ts
new file mode 100644
index 0000000..7208cf3
--- /dev/null
+++ b/packages/frontend/tests/attachment-tokens.test.ts
@@ -0,0 +1,130 @@
+import { describe, expect, it } from "vitest";
+import {
+ computeTokenDeletion,
+ findTokens,
+ generateTokenId,
+ intactTokenIds,
+ makeAttachmentToken,
+ markerFor,
+ parseDraft,
+ type StagedAttachment,
+} from "../src/lib/attachment-tokens.js";
+
+function img(id: string): StagedAttachment {
+ return { id, kind: "image", mediaType: "image/png", data: "QQ==" };
+}
+function pdf(id: string): StagedAttachment {
+ return { id, kind: "pdf", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" };
+}
+
+describe("token helpers", () => {
+ it("round-trips make/find", () => {
+ const tok = makeAttachmentToken("image", "abc123");
+ expect(tok).toBe("【image:abc123】");
+ const found = findTokens(`x ${tok} y`);
+ expect(found).toHaveLength(1);
+ expect(found[0]).toMatchObject({ id: "abc123", kind: "image", start: 2, end: 2 + tok.length });
+ });
+
+ it("generates 6-char lowercase-alnum ids", () => {
+ for (let i = 0; i < 20; i++) {
+ expect(generateTokenId()).toMatch(/^[a-z0-9]{6}$/);
+ }
+ });
+
+ it("finds multiple tokens in order and reports intact ids", () => {
+ const text = `a ${makeAttachmentToken("image", "aaaaaa")} b ${makeAttachmentToken("pdf", "bbbbbb")}`;
+ const found = findTokens(text);
+ expect(found.map((t) => t.id)).toEqual(["aaaaaa", "bbbbbb"]);
+ expect(intactTokenIds(text)).toEqual(new Set(["aaaaaa", "bbbbbb"]));
+ });
+
+ it("does not treat a partially-broken token as intact", () => {
+ // Missing closing bracket → not a valid token.
+ expect(intactTokenIds("【image:aaaaaa").size).toBe(0);
+ });
+});
+
+describe("computeTokenDeletion", () => {
+ const tok = makeAttachmentToken("image", "abcabc");
+ const text = `hi ${tok}!`; // token spans indices 3..3+len
+ const tokStart = 3;
+ const tokEnd = 3 + tok.length;
+
+ it("returns null when no tokens exist", () => {
+ expect(computeTokenDeletion("plain", 2, 2, "Backspace")).toBeNull();
+ });
+
+ it("Backspace just after a token removes the whole token atomically", () => {
+ const res = computeTokenDeletion(text, tokEnd, tokEnd, "Backspace");
+ expect(res).not.toBeNull();
+ expect(res?.text).toBe("hi !");
+ expect(res?.caret).toBe(tokStart);
+ expect(res?.removedIds).toEqual(["abcabc"]);
+ });
+
+ it("Delete just before a token removes the whole token atomically", () => {
+ const res = computeTokenDeletion(text, tokStart, tokStart, "Delete");
+ expect(res?.text).toBe("hi !");
+ expect(res?.caret).toBe(tokStart);
+ expect(res?.removedIds).toEqual(["abcabc"]);
+ });
+
+ it("Backspace NOT adjacent to a token returns null (default editing)", () => {
+ // Caret at index 2 (after "hi"), token is further along.
+ expect(computeTokenDeletion(text, 2, 2, "Backspace")).toBeNull();
+ });
+
+ it("a selection overlapping a token expands to cover the whole token", () => {
+ // Select from inside "hi " through the middle of the token.
+ const res = computeTokenDeletion(text, 1, tokStart + 3, "Backspace");
+ expect(res).not.toBeNull();
+ // Deletion starts at min(selStart, tokStart)=1 and ends at tokEnd.
+ expect(res?.text).toBe("h!");
+ expect(res?.removedIds).toEqual(["abcabc"]);
+ });
+
+ it("a range selection touching no token returns null", () => {
+ expect(computeTokenDeletion(text, 0, 2, "Backspace")).toBeNull();
+ });
+});
+
+describe("parseDraft", () => {
+ it("returns plain text + null content when there are no attachments", () => {
+ const res = parseDraft("just text", new Map());
+ expect(res.displayText).toBe("just text");
+ expect(res.content).toBeNull();
+ });
+
+ it("interleaves text and attachment parts in order", () => {
+ const a = img("aaaaaa");
+ const b = pdf("bbbbbb");
+ const map = new Map([
+ [a.id, a],
+ [b.id, b],
+ ]);
+ const draft = `A: ${makeAttachmentToken("image", a.id)} B: ${makeAttachmentToken("pdf", b.id)} end`;
+ const res = parseDraft(draft, map);
+
+ // displayText swaps tokens for markers.
+ expect(res.displayText).toBe(`A: ${markerFor("image")} B: ${markerFor("pdf")} end`);
+
+ // content interleaves the surrounding text with the attachment parts.
+ expect(res.content).toEqual([
+ { type: "text", text: "A: " },
+ { type: "attachment", mediaType: "image/png", data: "QQ==" },
+ { type: "text", text: " B: " },
+ { type: "attachment", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" },
+ { type: "text", text: " end" },
+ ]);
+ });
+
+ it("treats an orphan token (no staged attachment) as plain text", () => {
+ // Token present in text but not in the attachments map.
+ const draft = `x ${makeAttachmentToken("image", "zzzzzz")} y`;
+ const res = parseDraft(draft, new Map());
+ expect(res.displayText).toBe(`x ${markerFor("image")} y`);
+ // No real attachment → null content (plain-text send).
+ expect(res.content).toBeNull();
+ });
+});
diff --git a/packages/frontend/tests/chat-store.test.ts b/packages/frontend/tests/chat-store.test.ts
index a0d4ead..8639bff 100644
--- a/packages/frontend/tests/chat-store.test.ts
+++ b/packages/frontend/tests/chat-store.test.ts
@@ -2126,3 +2126,78 @@ describe("tabStore — per-tab chat input draft", () => {
expect(store.tabs.every((t) => t.draft === "")).toBe(true);
});
});
+
+describe("tabStore — image/pdf attachments", () => {
+ function imgAttachment(id: string) {
+ return { id, kind: "image" as const, mediaType: "image/png", data: "QQ==" };
+ }
+
+ it("stages attachments and reconciles them against intact draft tokens", async () => {
+ vi.stubGlobal(
+ "fetch",
+ vi.fn(() => Promise.resolve({ ok: true, json: () => Promise.resolve({}) })),
+ );
+ const store = createTabStore();
+ const a = await store.createNewTab();
+ store.switchTab(a.id);
+
+ store.addAttachment(a.id, imgAttachment("aaaaaa"));
+ // Draft carries the token → attachment survives.
+ store.setDraft(a.id, "look 【image:aaaaaa】");
+ expect(store.activeTab?.attachments.map((x) => x.id)).toEqual(["aaaaaa"]);
+
+ // Remove the token from the draft → attachment is detached.
+ store.setDraft(a.id, "look ");
+ expect(store.activeTab?.attachments).toHaveLength(0);
+ });
+
+ it("sendMessage posts ordered multimodal content and clears the draft", async () => {
+ const fetchMock = vi.fn((url: string) => {
+ if (typeof url === "string" && url.endsWith("/chat")) {
+ return Promise.resolve({ ok: true, json: () => Promise.resolve({ status: "ok" }) });
+ }
+ return Promise.resolve({ ok: true, json: () => Promise.resolve({}) });
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const store = createTabStore();
+ const a = await store.createNewTab();
+ store.switchTab(a.id);
+
+ await store.sendMessage("here is A: [image]", [
+ { type: "text", text: "here is A: " },
+ { type: "attachment", mediaType: "image/png", data: "QQ==" },
+ ]);
+
+ const chatCall = fetchMock.mock.calls.find(
+ (c) => typeof c[0] === "string" && (c[0] as string).endsWith("/chat"),
+ );
+ expect(chatCall).toBeDefined();
+ const body = JSON.parse((chatCall?.[1] as { body: string }).body);
+ expect(body.message).toBe("here is A: [image]");
+ expect(body.content).toEqual([
+ { type: "text", text: "here is A: " },
+ { type: "attachment", mediaType: "image/png", data: "QQ==" },
+ ]);
+ });
+
+ it("sendMessage omits content for a plain-text message", async () => {
+ const fetchMock = vi.fn((url: string) => {
+ if (typeof url === "string" && url.endsWith("/chat")) {
+ return Promise.resolve({ ok: true, json: () => Promise.resolve({ status: "ok" }) });
+ }
+ return Promise.resolve({ ok: true, json: () => Promise.resolve({}) });
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ const store = createTabStore();
+ await store.createNewTab();
+ await store.sendMessage("just text");
+
+ const chatCall = fetchMock.mock.calls.find(
+ (c) => typeof c[0] === "string" && (c[0] as string).endsWith("/chat"),
+ );
+ const body = JSON.parse((chatCall?.[1] as { body: string }).body);
+ expect(body.content).toBeUndefined();
+ });
+});