Merge branch 'dev' into cmp7/compaction-tool

# Conflicts: # packages/frontend/src/lib/components/ChatInput.svelte
author: Adam Malczewski <[email protected]> 2026-06-03 08:24:40 +0900
committer: Adam Malczewski <[email protected]> 2026-06-03 08:24:40 +0900
commit: bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a (patch)
tree: 17e84ebf8d83c51a7a50312c256372a86e38b92a /packages/api/src
parent: b26821ead97b986f886065b20d3dbde8283daa64 (diff)
parent: ae672fd4f5542a2c217cf97657bf81eeebdaabbd (diff)
download: dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.tar.gz
dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.zip
3 files changed, 116 insertions, 1 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 38dab49..e79e9a8 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -15,6 +15,7 @@ import {
 	clearSpillForTab,
 	configToRuleset,
 	createConfigWatcher,
+	createKeyUsageTool,
 	createListFilesTool,
 	createLspTool,
 	createReadFileSliceTool,
@@ -71,6 +72,7 @@ import {
 	toAvailableUserAgents,
 	type UsageData,
 	type UsageStats,
+	type UserContentPart,
 	validateConfig,
 } from "@dispatch/core";
 import type { PermissionManager } from "./permission-manager.js";
@@ -90,6 +92,8 @@ const TOOL_DESCRIPTIONS: Record<string, string> = {
 	search_code:
 		"Search the codebase by query using the 'cs' code search engine (relevance-ranked, structure-aware). Returns the most relevant files first with matching snippets and line numbers. Better than grep/find for exploratory 'where is X / how does Y work' searches; use run_shell with rg for exhaustive exact-match lists.",
 	todo: "Create/maintain a todo list to plan and track work. Declarative whole-list write: send the entire list in `todos` each call (it replaces the previous list). Statuses: pending, in_progress, completed, cancelled.",
+	key_usage:
+		"Report current usage levels for configured API keys: provider, active/exhausted status, remaining rate-limit headroom and reset times per window (5-hour, weekly, monthly where available), and whether the figures are live or cached. Pass key_id for one key; omit to report all. Supported for anthropic and opencode-go keys.",
 	summon:
 		"Spawn a child agent to work on a task independently. By default blocks until the child finishes. Set background=true to return immediately with an agent_id for later retrieval.",
 	retrieve:
@@ -527,10 +531,11 @@ export class AgentManager {
 		const permReadTab = getSetting("perm_read_tab") === "allow";
 		const permWebSearch = getSetting("perm_web_search") === "allow";
 		const permSearchCode = getSetting("perm_search_code") === "allow";
+		const permKeyUsage = getSetting("perm_key_usage") === "allow";
 		const permYoutubeTranscribe = getSetting("perm_youtube_transcribe") === "allow";
 		const permLsp = getSetting("perm_lsp") === "allow";
 		const sysPrompt = getSetting("system_prompt") ?? "";
-		const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permLsp}:${sysPrompt}`;
+		const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permKeyUsage}:${permLsp}:${sysPrompt}`;
 
 		// If the override differs or permissions changed, invalidate the cached agent
 		if (
@@ -622,6 +627,9 @@ export class AgentManager {
 				if (allowed.has("web_search")) {
 					toolEntries.push({ name: "web_search", tool: createWebSearchTool() });
 				}
+				if (allowed.has("key_usage")) {
+					toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() });
+				}
 				if (allowed.has("lsp") && lspServers.length > 0) {
 					toolEntries.push({
 						name: "lsp",
@@ -727,6 +735,9 @@ export class AgentManager {
 				if (permWebSearch) {
 					toolEntries.push({ name: "web_search", tool: createWebSearchTool() });
 				}
+				if (permKeyUsage) {
+					toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() });
+				}
 				// The `lsp` tool exposes diagnostics + navigation on demand. It is
 				// gated by `perm_lsp` AND requires at least one server configured
 				// in the working directory's `dispatch.toml`.
@@ -1665,6 +1676,19 @@ export class AgentManager {
 	// `deliverMessage`), so an agent message behaves identically to a user one.
 
 	/**
+	 * Build the `key_usage` tool, wired to the live model registry (key states)
+	 * and the discovered Claude accounts. The tool fetches usage live with a
+	 * cache fallback (anthropic) or a live scrape (opencode-go), reporting
+	 * remaining headroom, reset times, and data freshness per key.
+	 */
+	private buildKeyUsageTool(): ReturnType<typeof createKeyUsageTool> {
+		return createKeyUsageTool({
+			listKeys: () => this.modelRegistry?.getKeys() ?? [],
+			listClaudeAccounts: () => this.claudeAccounts,
+		});
+	}
+
+	/**
 	 * Build the `send_to_tab` + `read_tab` tool entries for `tabId`. Shared by
 	 * both tool-construction paths (child whitelist + permission-gated parent).
 	 * `selfHandle` is computed once so the calling tab can stamp provenance and
@@ -1796,6 +1820,13 @@ export class AgentManager {
 			workingDirectory?: string;
 			queueId?: string;
 			/**
+			 * Ephemeral ordered multimodal content (image/pdf attachments) for a
+			 * FRESH human turn. Forwarded to `processMessage` → `agent.run` only
+			 * when the tab is idle (a started turn); never carried into the queue
+			 * path (attachments require a fresh turn — the caller guards that).
+			 */
+			content?: UserContentPart[];
+			/**
 			 * Who is sending this message. `"human"` (default) is unrestricted
 			 * and REFILLS the target's agent-to-agent auto-wake budget. `"agent"`
 			 * (from the `send_to_tab` tool) is governed by that budget: an
@@ -1874,6 +1905,7 @@ export class AgentManager {
 			opts.reasoningEffort,
 			opts.workingDirectory,
 			agentModels,
+			opts.content,
 		).catch((err) => {
 			console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err);
 		});
@@ -1888,6 +1920,7 @@ export class AgentManager {
 		reasoningEffort?: ReasoningEffort,
 		workingDirectory?: string,
 		agentModels?: AgentModelEntry[],
+		content?: UserContentPart[],
 	): Promise<void> {
 		const tabAgent = this._getOrCreateTabAgent(tabId);
 
@@ -1999,6 +2032,7 @@ export class AgentManager {
 				for await (const event of agent.run(message, {
 					...(effortForEntry ? { reasoningEffort: effortForEntry } : {}),
 					abortSignal: tabAgent.abortController?.signal,
+					...(content ? { content } : {}),
 				})) {
 					// Stop processing if the tab was aborted (closed/stopped).
 					// stopTab() already injected a `cancelled` system chunk into
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index 84afd2a..2f4e538 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -3,6 +3,8 @@ import {
 	getTab,
 	isReasoningEffort,
 	NotificationDispatcher,
+	type UserContentPart,
+	validateUserContent,
 } from "@dispatch/core";
 import { Hono } from "hono";
 import { cors } from "hono/cors";
@@ -37,6 +39,41 @@ function sanitizeAgentModels(raw: unknown): AgentModelEntry[] | undefined {
 	return out;
 }
 
+/**
+ * Validate and normalise the optional multimodal `content` array from the
+ * `/chat` body. Each entry is either a `{ type: "text", text }` part or a
+ * `{ type: "attachment", mediaType, data, name? }` part (base64 payload).
+ * Returns `undefined` when the input isn't a non-empty array or contains no
+ * attachment (so the plain-string path is taken — byte-identical to before).
+ * Shape only: SIZE/TYPE limits are enforced separately by `validateUserContent`.
+ */
+function sanitizeUserContent(raw: unknown): UserContentPart[] | undefined {
+	if (!Array.isArray(raw) || raw.length === 0) return undefined;
+	const out: UserContentPart[] = [];
+	let hasAttachment = false;
+	for (const p of raw) {
+		if (!p || typeof p !== "object") continue;
+		const part = p as Record<string, unknown>;
+		if (part.type === "text") {
+			if (typeof part.text === "string") out.push({ type: "text", text: part.text });
+			continue;
+		}
+		if (part.type === "attachment") {
+			if (typeof part.mediaType !== "string" || typeof part.data !== "string") continue;
+			hasAttachment = true;
+			out.push({
+				type: "attachment",
+				mediaType: part.mediaType,
+				data: part.data,
+				...(typeof part.name === "string" ? { name: part.name } : {}),
+			});
+		}
+	}
+	// No attachment → let the plain-text path handle it (avoids needlessly
+	// switching the model message to array content for a text-only turn).
+	return hasAttachment ? out : undefined;
+}
+
 export const permissionManager = new PermissionManager();
 export const agentManager = new AgentManager(permissionManager);
 
@@ -94,6 +131,7 @@ app.post("/chat", async (c) => {
 	const body = await c.req.json<{
 		tabId?: unknown;
 		message?: unknown;
+		content?: unknown;
 		keyId?: unknown;
 		modelId?: unknown;
 		agentModels?: unknown;
@@ -121,6 +159,30 @@ app.post("/chat", async (c) => {
 		? body.reasoningEffort
 		: undefined;
 
+	// Optional multimodal content (image/pdf attachments). When present, the
+	// attachments are EPHEMERAL — forwarded to the model for this turn only and
+	// never persisted (the chunk log keeps just `message`, which the frontend
+	// has already projected to text with `[image]`/`[pdf]` markers).
+	const content = sanitizeUserContent(body.content);
+	if (content) {
+		// Enforce size/type/count ceilings server-side (defence in depth; the
+		// frontend also enforces them at paste time). Reject the whole request
+		// so no tokens are spent on an over-limit payload.
+		const validation = validateUserContent(content);
+		if (!validation.ok) {
+			return c.json({ error: "invalid attachments", details: validation.errors }, 400);
+		}
+		// Attachments only attach to a FRESH turn. If the tab is mid-turn the
+		// message would queue (text-only machinery), silently dropping the
+		// images. Reject clearly instead so the user can retry once idle.
+		if (agentManager.getTabStatus(tabId) === "running") {
+			return c.json(
+				{ error: "cannot attach images while the agent is generating; wait for it to finish" },
+				409,
+			);
+		}
+	}
+
 	// Single routing decision (queue if busy, new turn if idle) shared with the
 	// `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a
 	// started turn runs in the background.
@@ -131,6 +193,7 @@ app.post("/chat", async (c) => {
 		...(reasoningEffort ? { reasoningEffort } : {}),
 		...(workingDirectory !== undefined ? { workingDirectory } : {}),
 		...(queueId ? { queueId } : {}),
+		...(content ? { content } : {}),
 	});
 
 	if (outcome.status === "queued") {
diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts
index eeb6029..a1700b1 100644
--- a/packages/api/src/routes/models.ts
+++ b/packages/api/src/routes/models.ts
@@ -20,6 +20,7 @@ import {
 	refreshAccountCredentialsAsync,
 	resolveApiKey,
 	resolveContextLimit,
+	resolveModelCapabilities,
 	selectHaikuModel,
 	setApiKey,
 	validateAccountCredentials,
@@ -180,6 +181,23 @@ modelsRoutes.get("/context-limit", async (c) => {
 	return c.json({ contextLimit });
 });
 
+// Resolve a model's image / PDF INPUT capabilities from the models.dev catalog.
+// Returns `{ capabilities: { image, pdf } | null }`. `null` means UNKNOWN — the
+// provider is unmapped, the model is absent, the catalog predates the
+// `modalities` field, or the catalog is offline. The frontend treats `null` as
+// "can't verify" (optimistic allow) and a definitive `{ image: false }` as a
+// hard block (no tokens spent).
+modelsRoutes.get("/capabilities", async (c) => {
+	const provider = c.req.query("provider");
+	const modelId = c.req.query("modelId");
+	if (!provider || !modelId) {
+		return c.json({ error: "provider and modelId query parameters are required" }, 400);
+	}
+
+	const capabilities = await resolveModelCapabilities(provider, modelId);
+	return c.json({ capabilities });
+});
+
 // List available Claude accounts with validated credentials
 modelsRoutes.get("/claude-accounts", async (c) => {
 	const candidates = resolveClaudeAccounts();
author	Adam Malczewski <[email protected]>	2026-06-03 08:24:40 +0900
committer	Adam Malczewski <[email protected]>	2026-06-03 08:24:40 +0900
commit	bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a (patch)
tree	17e84ebf8d83c51a7a50312c256372a86e38b92a /packages/api/src
parent	b26821ead97b986f886065b20d3dbde8283daa64 (diff)
parent	ae672fd4f5542a2c217cf97657bf81eeebdaabbd (diff)
download	dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.tar.gz dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.zip