summaryrefslogtreecommitdiffhomepage
path: root/packages/api/src
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-03 08:24:40 +0900
committerAdam Malczewski <[email protected]>2026-06-03 08:24:40 +0900
commitbc3ecbe7b72f6da6ed36d0cea5a66de1c440269a (patch)
tree17e84ebf8d83c51a7a50312c256372a86e38b92a /packages/api/src
parentb26821ead97b986f886065b20d3dbde8283daa64 (diff)
parentae672fd4f5542a2c217cf97657bf81eeebdaabbd (diff)
downloaddispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.tar.gz
dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.zip
Merge branch 'dev' into cmp7/compaction-tool
# Conflicts: # packages/frontend/src/lib/components/ChatInput.svelte
Diffstat (limited to 'packages/api/src')
-rw-r--r--packages/api/src/agent-manager.ts36
-rw-r--r--packages/api/src/app.ts63
-rw-r--r--packages/api/src/routes/models.ts18
3 files changed, 116 insertions, 1 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 38dab49..e79e9a8 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -15,6 +15,7 @@ import {
clearSpillForTab,
configToRuleset,
createConfigWatcher,
+ createKeyUsageTool,
createListFilesTool,
createLspTool,
createReadFileSliceTool,
@@ -71,6 +72,7 @@ import {
toAvailableUserAgents,
type UsageData,
type UsageStats,
+ type UserContentPart,
validateConfig,
} from "@dispatch/core";
import type { PermissionManager } from "./permission-manager.js";
@@ -90,6 +92,8 @@ const TOOL_DESCRIPTIONS: Record<string, string> = {
search_code:
"Search the codebase by query using the 'cs' code search engine (relevance-ranked, structure-aware). Returns the most relevant files first with matching snippets and line numbers. Better than grep/find for exploratory 'where is X / how does Y work' searches; use run_shell with rg for exhaustive exact-match lists.",
todo: "Create/maintain a todo list to plan and track work. Declarative whole-list write: send the entire list in `todos` each call (it replaces the previous list). Statuses: pending, in_progress, completed, cancelled.",
+ key_usage:
+ "Report current usage levels for configured API keys: provider, active/exhausted status, remaining rate-limit headroom and reset times per window (5-hour, weekly, monthly where available), and whether the figures are live or cached. Pass key_id for one key; omit to report all. Supported for anthropic and opencode-go keys.",
summon:
"Spawn a child agent to work on a task independently. By default blocks until the child finishes. Set background=true to return immediately with an agent_id for later retrieval.",
retrieve:
@@ -527,10 +531,11 @@ export class AgentManager {
const permReadTab = getSetting("perm_read_tab") === "allow";
const permWebSearch = getSetting("perm_web_search") === "allow";
const permSearchCode = getSetting("perm_search_code") === "allow";
+ const permKeyUsage = getSetting("perm_key_usage") === "allow";
const permYoutubeTranscribe = getSetting("perm_youtube_transcribe") === "allow";
const permLsp = getSetting("perm_lsp") === "allow";
const sysPrompt = getSetting("system_prompt") ?? "";
- const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permLsp}:${sysPrompt}`;
+ const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permKeyUsage}:${permLsp}:${sysPrompt}`;
// If the override differs or permissions changed, invalidate the cached agent
if (
@@ -622,6 +627,9 @@ export class AgentManager {
if (allowed.has("web_search")) {
toolEntries.push({ name: "web_search", tool: createWebSearchTool() });
}
+ if (allowed.has("key_usage")) {
+ toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() });
+ }
if (allowed.has("lsp") && lspServers.length > 0) {
toolEntries.push({
name: "lsp",
@@ -727,6 +735,9 @@ export class AgentManager {
if (permWebSearch) {
toolEntries.push({ name: "web_search", tool: createWebSearchTool() });
}
+ if (permKeyUsage) {
+ toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() });
+ }
// The `lsp` tool exposes diagnostics + navigation on demand. It is
// gated by `perm_lsp` AND requires at least one server configured
// in the working directory's `dispatch.toml`.
@@ -1665,6 +1676,19 @@ export class AgentManager {
// `deliverMessage`), so an agent message behaves identically to a user one.
/**
+ * Build the `key_usage` tool, wired to the live model registry (key states)
+ * and the discovered Claude accounts. The tool fetches usage live with a
+ * cache fallback (anthropic) or a live scrape (opencode-go), reporting
+ * remaining headroom, reset times, and data freshness per key.
+ */
+ private buildKeyUsageTool(): ReturnType<typeof createKeyUsageTool> {
+ return createKeyUsageTool({
+ listKeys: () => this.modelRegistry?.getKeys() ?? [],
+ listClaudeAccounts: () => this.claudeAccounts,
+ });
+ }
+
+ /**
* Build the `send_to_tab` + `read_tab` tool entries for `tabId`. Shared by
* both tool-construction paths (child whitelist + permission-gated parent).
* `selfHandle` is computed once so the calling tab can stamp provenance and
@@ -1796,6 +1820,13 @@ export class AgentManager {
workingDirectory?: string;
queueId?: string;
/**
+ * Ephemeral ordered multimodal content (image/pdf attachments) for a
+ * FRESH human turn. Forwarded to `processMessage` → `agent.run` only
+ * when the tab is idle (a started turn); never carried into the queue
+ * path (attachments require a fresh turn — the caller guards that).
+ */
+ content?: UserContentPart[];
+ /**
* Who is sending this message. `"human"` (default) is unrestricted
* and REFILLS the target's agent-to-agent auto-wake budget. `"agent"`
* (from the `send_to_tab` tool) is governed by that budget: an
@@ -1874,6 +1905,7 @@ export class AgentManager {
opts.reasoningEffort,
opts.workingDirectory,
agentModels,
+ opts.content,
).catch((err) => {
console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err);
});
@@ -1888,6 +1920,7 @@ export class AgentManager {
reasoningEffort?: ReasoningEffort,
workingDirectory?: string,
agentModels?: AgentModelEntry[],
+ content?: UserContentPart[],
): Promise<void> {
const tabAgent = this._getOrCreateTabAgent(tabId);
@@ -1999,6 +2032,7 @@ export class AgentManager {
for await (const event of agent.run(message, {
...(effortForEntry ? { reasoningEffort: effortForEntry } : {}),
abortSignal: tabAgent.abortController?.signal,
+ ...(content ? { content } : {}),
})) {
// Stop processing if the tab was aborted (closed/stopped).
// stopTab() already injected a `cancelled` system chunk into
diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts
index 84afd2a..2f4e538 100644
--- a/packages/api/src/app.ts
+++ b/packages/api/src/app.ts
@@ -3,6 +3,8 @@ import {
getTab,
isReasoningEffort,
NotificationDispatcher,
+ type UserContentPart,
+ validateUserContent,
} from "@dispatch/core";
import { Hono } from "hono";
import { cors } from "hono/cors";
@@ -37,6 +39,41 @@ function sanitizeAgentModels(raw: unknown): AgentModelEntry[] | undefined {
return out;
}
+/**
+ * Validate and normalise the optional multimodal `content` array from the
+ * `/chat` body. Each entry is either a `{ type: "text", text }` part or a
+ * `{ type: "attachment", mediaType, data, name? }` part (base64 payload).
+ * Returns `undefined` when the input isn't a non-empty array or contains no
+ * attachment (so the plain-string path is taken — byte-identical to before).
+ * Shape only: SIZE/TYPE limits are enforced separately by `validateUserContent`.
+ */
+function sanitizeUserContent(raw: unknown): UserContentPart[] | undefined {
+ if (!Array.isArray(raw) || raw.length === 0) return undefined;
+ const out: UserContentPart[] = [];
+ let hasAttachment = false;
+ for (const p of raw) {
+ if (!p || typeof p !== "object") continue;
+ const part = p as Record<string, unknown>;
+ if (part.type === "text") {
+ if (typeof part.text === "string") out.push({ type: "text", text: part.text });
+ continue;
+ }
+ if (part.type === "attachment") {
+ if (typeof part.mediaType !== "string" || typeof part.data !== "string") continue;
+ hasAttachment = true;
+ out.push({
+ type: "attachment",
+ mediaType: part.mediaType,
+ data: part.data,
+ ...(typeof part.name === "string" ? { name: part.name } : {}),
+ });
+ }
+ }
+ // No attachment → let the plain-text path handle it (avoids needlessly
+ // switching the model message to array content for a text-only turn).
+ return hasAttachment ? out : undefined;
+}
+
export const permissionManager = new PermissionManager();
export const agentManager = new AgentManager(permissionManager);
@@ -94,6 +131,7 @@ app.post("/chat", async (c) => {
const body = await c.req.json<{
tabId?: unknown;
message?: unknown;
+ content?: unknown;
keyId?: unknown;
modelId?: unknown;
agentModels?: unknown;
@@ -121,6 +159,30 @@ app.post("/chat", async (c) => {
? body.reasoningEffort
: undefined;
+ // Optional multimodal content (image/pdf attachments). When present, the
+ // attachments are EPHEMERAL — forwarded to the model for this turn only and
+ // never persisted (the chunk log keeps just `message`, which the frontend
+ // has already projected to text with `[image]`/`[pdf]` markers).
+ const content = sanitizeUserContent(body.content);
+ if (content) {
+ // Enforce size/type/count ceilings server-side (defence in depth; the
+ // frontend also enforces them at paste time). Reject the whole request
+ // so no tokens are spent on an over-limit payload.
+ const validation = validateUserContent(content);
+ if (!validation.ok) {
+ return c.json({ error: "invalid attachments", details: validation.errors }, 400);
+ }
+ // Attachments only attach to a FRESH turn. If the tab is mid-turn the
+ // message would queue (text-only machinery), silently dropping the
+ // images. Reject clearly instead so the user can retry once idle.
+ if (agentManager.getTabStatus(tabId) === "running") {
+ return c.json(
+ { error: "cannot attach images while the agent is generating; wait for it to finish" },
+ 409,
+ );
+ }
+ }
+
// Single routing decision (queue if busy, new turn if idle) shared with the
// `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a
// started turn runs in the background.
@@ -131,6 +193,7 @@ app.post("/chat", async (c) => {
...(reasoningEffort ? { reasoningEffort } : {}),
...(workingDirectory !== undefined ? { workingDirectory } : {}),
...(queueId ? { queueId } : {}),
+ ...(content ? { content } : {}),
});
if (outcome.status === "queued") {
diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts
index eeb6029..a1700b1 100644
--- a/packages/api/src/routes/models.ts
+++ b/packages/api/src/routes/models.ts
@@ -20,6 +20,7 @@ import {
refreshAccountCredentialsAsync,
resolveApiKey,
resolveContextLimit,
+ resolveModelCapabilities,
selectHaikuModel,
setApiKey,
validateAccountCredentials,
@@ -180,6 +181,23 @@ modelsRoutes.get("/context-limit", async (c) => {
return c.json({ contextLimit });
});
+// Resolve a model's image / PDF INPUT capabilities from the models.dev catalog.
+// Returns `{ capabilities: { image, pdf } | null }`. `null` means UNKNOWN — the
+// provider is unmapped, the model is absent, the catalog predates the
+// `modalities` field, or the catalog is offline. The frontend treats `null` as
+// "can't verify" (optimistic allow) and a definitive `{ image: false }` as a
+// hard block (no tokens spent).
+modelsRoutes.get("/capabilities", async (c) => {
+ const provider = c.req.query("provider");
+ const modelId = c.req.query("modelId");
+ if (!provider || !modelId) {
+ return c.json({ error: "provider and modelId query parameters are required" }, 400);
+ }
+
+ const capabilities = await resolveModelCapabilities(provider, modelId);
+ return c.json({ capabilities });
+});
+
// List available Claude accounts with validated credentials
modelsRoutes.get("/claude-accounts", async (c) => {
const candidates = resolveClaudeAccounts();