diff options
| author | Adam Malczewski <[email protected]> | 2026-06-03 08:24:40 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-03 08:24:40 +0900 |
| commit | bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a (patch) | |
| tree | 17e84ebf8d83c51a7a50312c256372a86e38b92a /packages/api/src | |
| parent | b26821ead97b986f886065b20d3dbde8283daa64 (diff) | |
| parent | ae672fd4f5542a2c217cf97657bf81eeebdaabbd (diff) | |
| download | dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.tar.gz dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.zip | |
Merge branch 'dev' into cmp7/compaction-tool
# Conflicts:
# packages/frontend/src/lib/components/ChatInput.svelte
Diffstat (limited to 'packages/api/src')
| -rw-r--r-- | packages/api/src/agent-manager.ts | 36 | ||||
| -rw-r--r-- | packages/api/src/app.ts | 63 | ||||
| -rw-r--r-- | packages/api/src/routes/models.ts | 18 |
3 files changed, 116 insertions, 1 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 38dab49..e79e9a8 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -15,6 +15,7 @@ import { clearSpillForTab, configToRuleset, createConfigWatcher, + createKeyUsageTool, createListFilesTool, createLspTool, createReadFileSliceTool, @@ -71,6 +72,7 @@ import { toAvailableUserAgents, type UsageData, type UsageStats, + type UserContentPart, validateConfig, } from "@dispatch/core"; import type { PermissionManager } from "./permission-manager.js"; @@ -90,6 +92,8 @@ const TOOL_DESCRIPTIONS: Record<string, string> = { search_code: "Search the codebase by query using the 'cs' code search engine (relevance-ranked, structure-aware). Returns the most relevant files first with matching snippets and line numbers. Better than grep/find for exploratory 'where is X / how does Y work' searches; use run_shell with rg for exhaustive exact-match lists.", todo: "Create/maintain a todo list to plan and track work. Declarative whole-list write: send the entire list in `todos` each call (it replaces the previous list). Statuses: pending, in_progress, completed, cancelled.", + key_usage: + "Report current usage levels for configured API keys: provider, active/exhausted status, remaining rate-limit headroom and reset times per window (5-hour, weekly, monthly where available), and whether the figures are live or cached. Pass key_id for one key; omit to report all. Supported for anthropic and opencode-go keys.", summon: "Spawn a child agent to work on a task independently. By default blocks until the child finishes. Set background=true to return immediately with an agent_id for later retrieval.", retrieve: @@ -527,10 +531,11 @@ export class AgentManager { const permReadTab = getSetting("perm_read_tab") === "allow"; const permWebSearch = getSetting("perm_web_search") === "allow"; const permSearchCode = getSetting("perm_search_code") === "allow"; + const permKeyUsage = getSetting("perm_key_usage") === "allow"; const permYoutubeTranscribe = getSetting("perm_youtube_transcribe") === "allow"; const permLsp = getSetting("perm_lsp") === "allow"; const sysPrompt = getSetting("system_prompt") ?? ""; - const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permLsp}:${sysPrompt}`; + const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permKeyUsage}:${permLsp}:${sysPrompt}`; // If the override differs or permissions changed, invalidate the cached agent if ( @@ -622,6 +627,9 @@ export class AgentManager { if (allowed.has("web_search")) { toolEntries.push({ name: "web_search", tool: createWebSearchTool() }); } + if (allowed.has("key_usage")) { + toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() }); + } if (allowed.has("lsp") && lspServers.length > 0) { toolEntries.push({ name: "lsp", @@ -727,6 +735,9 @@ export class AgentManager { if (permWebSearch) { toolEntries.push({ name: "web_search", tool: createWebSearchTool() }); } + if (permKeyUsage) { + toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() }); + } // The `lsp` tool exposes diagnostics + navigation on demand. It is // gated by `perm_lsp` AND requires at least one server configured // in the working directory's `dispatch.toml`. @@ -1665,6 +1676,19 @@ export class AgentManager { // `deliverMessage`), so an agent message behaves identically to a user one. /** + * Build the `key_usage` tool, wired to the live model registry (key states) + * and the discovered Claude accounts. The tool fetches usage live with a + * cache fallback (anthropic) or a live scrape (opencode-go), reporting + * remaining headroom, reset times, and data freshness per key. + */ + private buildKeyUsageTool(): ReturnType<typeof createKeyUsageTool> { + return createKeyUsageTool({ + listKeys: () => this.modelRegistry?.getKeys() ?? [], + listClaudeAccounts: () => this.claudeAccounts, + }); + } + + /** * Build the `send_to_tab` + `read_tab` tool entries for `tabId`. Shared by * both tool-construction paths (child whitelist + permission-gated parent). * `selfHandle` is computed once so the calling tab can stamp provenance and @@ -1796,6 +1820,13 @@ export class AgentManager { workingDirectory?: string; queueId?: string; /** + * Ephemeral ordered multimodal content (image/pdf attachments) for a + * FRESH human turn. Forwarded to `processMessage` → `agent.run` only + * when the tab is idle (a started turn); never carried into the queue + * path (attachments require a fresh turn — the caller guards that). + */ + content?: UserContentPart[]; + /** * Who is sending this message. `"human"` (default) is unrestricted * and REFILLS the target's agent-to-agent auto-wake budget. `"agent"` * (from the `send_to_tab` tool) is governed by that budget: an @@ -1874,6 +1905,7 @@ export class AgentManager { opts.reasoningEffort, opts.workingDirectory, agentModels, + opts.content, ).catch((err) => { console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err); }); @@ -1888,6 +1920,7 @@ export class AgentManager { reasoningEffort?: ReasoningEffort, workingDirectory?: string, agentModels?: AgentModelEntry[], + content?: UserContentPart[], ): Promise<void> { const tabAgent = this._getOrCreateTabAgent(tabId); @@ -1999,6 +2032,7 @@ export class AgentManager { for await (const event of agent.run(message, { ...(effortForEntry ? { reasoningEffort: effortForEntry } : {}), abortSignal: tabAgent.abortController?.signal, + ...(content ? { content } : {}), })) { // Stop processing if the tab was aborted (closed/stopped). // stopTab() already injected a `cancelled` system chunk into diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index 84afd2a..2f4e538 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -3,6 +3,8 @@ import { getTab, isReasoningEffort, NotificationDispatcher, + type UserContentPart, + validateUserContent, } from "@dispatch/core"; import { Hono } from "hono"; import { cors } from "hono/cors"; @@ -37,6 +39,41 @@ function sanitizeAgentModels(raw: unknown): AgentModelEntry[] | undefined { return out; } +/** + * Validate and normalise the optional multimodal `content` array from the + * `/chat` body. Each entry is either a `{ type: "text", text }` part or a + * `{ type: "attachment", mediaType, data, name? }` part (base64 payload). + * Returns `undefined` when the input isn't a non-empty array or contains no + * attachment (so the plain-string path is taken — byte-identical to before). + * Shape only: SIZE/TYPE limits are enforced separately by `validateUserContent`. + */ +function sanitizeUserContent(raw: unknown): UserContentPart[] | undefined { + if (!Array.isArray(raw) || raw.length === 0) return undefined; + const out: UserContentPart[] = []; + let hasAttachment = false; + for (const p of raw) { + if (!p || typeof p !== "object") continue; + const part = p as Record<string, unknown>; + if (part.type === "text") { + if (typeof part.text === "string") out.push({ type: "text", text: part.text }); + continue; + } + if (part.type === "attachment") { + if (typeof part.mediaType !== "string" || typeof part.data !== "string") continue; + hasAttachment = true; + out.push({ + type: "attachment", + mediaType: part.mediaType, + data: part.data, + ...(typeof part.name === "string" ? { name: part.name } : {}), + }); + } + } + // No attachment → let the plain-text path handle it (avoids needlessly + // switching the model message to array content for a text-only turn). + return hasAttachment ? out : undefined; +} + export const permissionManager = new PermissionManager(); export const agentManager = new AgentManager(permissionManager); @@ -94,6 +131,7 @@ app.post("/chat", async (c) => { const body = await c.req.json<{ tabId?: unknown; message?: unknown; + content?: unknown; keyId?: unknown; modelId?: unknown; agentModels?: unknown; @@ -121,6 +159,30 @@ app.post("/chat", async (c) => { ? body.reasoningEffort : undefined; + // Optional multimodal content (image/pdf attachments). When present, the + // attachments are EPHEMERAL — forwarded to the model for this turn only and + // never persisted (the chunk log keeps just `message`, which the frontend + // has already projected to text with `[image]`/`[pdf]` markers). + const content = sanitizeUserContent(body.content); + if (content) { + // Enforce size/type/count ceilings server-side (defence in depth; the + // frontend also enforces them at paste time). Reject the whole request + // so no tokens are spent on an over-limit payload. + const validation = validateUserContent(content); + if (!validation.ok) { + return c.json({ error: "invalid attachments", details: validation.errors }, 400); + } + // Attachments only attach to a FRESH turn. If the tab is mid-turn the + // message would queue (text-only machinery), silently dropping the + // images. Reject clearly instead so the user can retry once idle. + if (agentManager.getTabStatus(tabId) === "running") { + return c.json( + { error: "cannot attach images while the agent is generating; wait for it to finish" }, + 409, + ); + } + } + // Single routing decision (queue if busy, new turn if idle) shared with the // `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a // started turn runs in the background. @@ -131,6 +193,7 @@ app.post("/chat", async (c) => { ...(reasoningEffort ? { reasoningEffort } : {}), ...(workingDirectory !== undefined ? { workingDirectory } : {}), ...(queueId ? { queueId } : {}), + ...(content ? { content } : {}), }); if (outcome.status === "queued") { diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts index eeb6029..a1700b1 100644 --- a/packages/api/src/routes/models.ts +++ b/packages/api/src/routes/models.ts @@ -20,6 +20,7 @@ import { refreshAccountCredentialsAsync, resolveApiKey, resolveContextLimit, + resolveModelCapabilities, selectHaikuModel, setApiKey, validateAccountCredentials, @@ -180,6 +181,23 @@ modelsRoutes.get("/context-limit", async (c) => { return c.json({ contextLimit }); }); +// Resolve a model's image / PDF INPUT capabilities from the models.dev catalog. +// Returns `{ capabilities: { image, pdf } | null }`. `null` means UNKNOWN — the +// provider is unmapped, the model is absent, the catalog predates the +// `modalities` field, or the catalog is offline. The frontend treats `null` as +// "can't verify" (optimistic allow) and a definitive `{ image: false }` as a +// hard block (no tokens spent). +modelsRoutes.get("/capabilities", async (c) => { + const provider = c.req.query("provider"); + const modelId = c.req.query("modelId"); + if (!provider || !modelId) { + return c.json({ error: "provider and modelId query parameters are required" }, 400); + } + + const capabilities = await resolveModelCapabilities(provider, modelId); + return c.json({ capabilities }); +}); + // List available Claude accounts with validated credentials modelsRoutes.get("/claude-accounts", async (c) => { const candidates = resolveClaudeAccounts(); |
