diff options
| author | Adam Malczewski <[email protected]> | 2026-06-03 08:24:40 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-03 08:24:40 +0900 |
| commit | bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a (patch) | |
| tree | 17e84ebf8d83c51a7a50312c256372a86e38b92a | |
| parent | b26821ead97b986f886065b20d3dbde8283daa64 (diff) | |
| parent | ae672fd4f5542a2c217cf97657bf81eeebdaabbd (diff) | |
| download | dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.tar.gz dispatch-bc3ecbe7b72f6da6ed36d0cea5a66de1c440269a.zip | |
Merge branch 'dev' into cmp7/compaction-tool
# Conflicts:
# packages/frontend/src/lib/components/ChatInput.svelte
28 files changed, 2310 insertions, 37 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 38dab49..e79e9a8 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -15,6 +15,7 @@ import { clearSpillForTab, configToRuleset, createConfigWatcher, + createKeyUsageTool, createListFilesTool, createLspTool, createReadFileSliceTool, @@ -71,6 +72,7 @@ import { toAvailableUserAgents, type UsageData, type UsageStats, + type UserContentPart, validateConfig, } from "@dispatch/core"; import type { PermissionManager } from "./permission-manager.js"; @@ -90,6 +92,8 @@ const TOOL_DESCRIPTIONS: Record<string, string> = { search_code: "Search the codebase by query using the 'cs' code search engine (relevance-ranked, structure-aware). Returns the most relevant files first with matching snippets and line numbers. Better than grep/find for exploratory 'where is X / how does Y work' searches; use run_shell with rg for exhaustive exact-match lists.", todo: "Create/maintain a todo list to plan and track work. Declarative whole-list write: send the entire list in `todos` each call (it replaces the previous list). Statuses: pending, in_progress, completed, cancelled.", + key_usage: + "Report current usage levels for configured API keys: provider, active/exhausted status, remaining rate-limit headroom and reset times per window (5-hour, weekly, monthly where available), and whether the figures are live or cached. Pass key_id for one key; omit to report all. Supported for anthropic and opencode-go keys.", summon: "Spawn a child agent to work on a task independently. By default blocks until the child finishes. Set background=true to return immediately with an agent_id for later retrieval.", retrieve: @@ -527,10 +531,11 @@ export class AgentManager { const permReadTab = getSetting("perm_read_tab") === "allow"; const permWebSearch = getSetting("perm_web_search") === "allow"; const permSearchCode = getSetting("perm_search_code") === "allow"; + const permKeyUsage = getSetting("perm_key_usage") === "allow"; const permYoutubeTranscribe = getSetting("perm_youtube_transcribe") === "allow"; const permLsp = getSetting("perm_lsp") === "allow"; const sysPrompt = getSetting("system_prompt") ?? ""; - const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permLsp}:${sysPrompt}`; + const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${permSearchCode}:${permKeyUsage}:${permLsp}:${sysPrompt}`; // If the override differs or permissions changed, invalidate the cached agent if ( @@ -622,6 +627,9 @@ export class AgentManager { if (allowed.has("web_search")) { toolEntries.push({ name: "web_search", tool: createWebSearchTool() }); } + if (allowed.has("key_usage")) { + toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() }); + } if (allowed.has("lsp") && lspServers.length > 0) { toolEntries.push({ name: "lsp", @@ -727,6 +735,9 @@ export class AgentManager { if (permWebSearch) { toolEntries.push({ name: "web_search", tool: createWebSearchTool() }); } + if (permKeyUsage) { + toolEntries.push({ name: "key_usage", tool: this.buildKeyUsageTool() }); + } // The `lsp` tool exposes diagnostics + navigation on demand. It is // gated by `perm_lsp` AND requires at least one server configured // in the working directory's `dispatch.toml`. @@ -1665,6 +1676,19 @@ export class AgentManager { // `deliverMessage`), so an agent message behaves identically to a user one. /** + * Build the `key_usage` tool, wired to the live model registry (key states) + * and the discovered Claude accounts. The tool fetches usage live with a + * cache fallback (anthropic) or a live scrape (opencode-go), reporting + * remaining headroom, reset times, and data freshness per key. + */ + private buildKeyUsageTool(): ReturnType<typeof createKeyUsageTool> { + return createKeyUsageTool({ + listKeys: () => this.modelRegistry?.getKeys() ?? [], + listClaudeAccounts: () => this.claudeAccounts, + }); + } + + /** * Build the `send_to_tab` + `read_tab` tool entries for `tabId`. Shared by * both tool-construction paths (child whitelist + permission-gated parent). * `selfHandle` is computed once so the calling tab can stamp provenance and @@ -1796,6 +1820,13 @@ export class AgentManager { workingDirectory?: string; queueId?: string; /** + * Ephemeral ordered multimodal content (image/pdf attachments) for a + * FRESH human turn. Forwarded to `processMessage` → `agent.run` only + * when the tab is idle (a started turn); never carried into the queue + * path (attachments require a fresh turn — the caller guards that). + */ + content?: UserContentPart[]; + /** * Who is sending this message. `"human"` (default) is unrestricted * and REFILLS the target's agent-to-agent auto-wake budget. `"agent"` * (from the `send_to_tab` tool) is governed by that budget: an @@ -1874,6 +1905,7 @@ export class AgentManager { opts.reasoningEffort, opts.workingDirectory, agentModels, + opts.content, ).catch((err) => { console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err); }); @@ -1888,6 +1920,7 @@ export class AgentManager { reasoningEffort?: ReasoningEffort, workingDirectory?: string, agentModels?: AgentModelEntry[], + content?: UserContentPart[], ): Promise<void> { const tabAgent = this._getOrCreateTabAgent(tabId); @@ -1999,6 +2032,7 @@ export class AgentManager { for await (const event of agent.run(message, { ...(effortForEntry ? { reasoningEffort: effortForEntry } : {}), abortSignal: tabAgent.abortController?.signal, + ...(content ? { content } : {}), })) { // Stop processing if the tab was aborted (closed/stopped). // stopTab() already injected a `cancelled` system chunk into diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index 84afd2a..2f4e538 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -3,6 +3,8 @@ import { getTab, isReasoningEffort, NotificationDispatcher, + type UserContentPart, + validateUserContent, } from "@dispatch/core"; import { Hono } from "hono"; import { cors } from "hono/cors"; @@ -37,6 +39,41 @@ function sanitizeAgentModels(raw: unknown): AgentModelEntry[] | undefined { return out; } +/** + * Validate and normalise the optional multimodal `content` array from the + * `/chat` body. Each entry is either a `{ type: "text", text }` part or a + * `{ type: "attachment", mediaType, data, name? }` part (base64 payload). + * Returns `undefined` when the input isn't a non-empty array or contains no + * attachment (so the plain-string path is taken — byte-identical to before). + * Shape only: SIZE/TYPE limits are enforced separately by `validateUserContent`. + */ +function sanitizeUserContent(raw: unknown): UserContentPart[] | undefined { + if (!Array.isArray(raw) || raw.length === 0) return undefined; + const out: UserContentPart[] = []; + let hasAttachment = false; + for (const p of raw) { + if (!p || typeof p !== "object") continue; + const part = p as Record<string, unknown>; + if (part.type === "text") { + if (typeof part.text === "string") out.push({ type: "text", text: part.text }); + continue; + } + if (part.type === "attachment") { + if (typeof part.mediaType !== "string" || typeof part.data !== "string") continue; + hasAttachment = true; + out.push({ + type: "attachment", + mediaType: part.mediaType, + data: part.data, + ...(typeof part.name === "string" ? { name: part.name } : {}), + }); + } + } + // No attachment → let the plain-text path handle it (avoids needlessly + // switching the model message to array content for a text-only turn). + return hasAttachment ? out : undefined; +} + export const permissionManager = new PermissionManager(); export const agentManager = new AgentManager(permissionManager); @@ -94,6 +131,7 @@ app.post("/chat", async (c) => { const body = await c.req.json<{ tabId?: unknown; message?: unknown; + content?: unknown; keyId?: unknown; modelId?: unknown; agentModels?: unknown; @@ -121,6 +159,30 @@ app.post("/chat", async (c) => { ? body.reasoningEffort : undefined; + // Optional multimodal content (image/pdf attachments). When present, the + // attachments are EPHEMERAL — forwarded to the model for this turn only and + // never persisted (the chunk log keeps just `message`, which the frontend + // has already projected to text with `[image]`/`[pdf]` markers). + const content = sanitizeUserContent(body.content); + if (content) { + // Enforce size/type/count ceilings server-side (defence in depth; the + // frontend also enforces them at paste time). Reject the whole request + // so no tokens are spent on an over-limit payload. + const validation = validateUserContent(content); + if (!validation.ok) { + return c.json({ error: "invalid attachments", details: validation.errors }, 400); + } + // Attachments only attach to a FRESH turn. If the tab is mid-turn the + // message would queue (text-only machinery), silently dropping the + // images. Reject clearly instead so the user can retry once idle. + if (agentManager.getTabStatus(tabId) === "running") { + return c.json( + { error: "cannot attach images while the agent is generating; wait for it to finish" }, + 409, + ); + } + } + // Single routing decision (queue if busy, new turn if idle) shared with the // `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a // started turn runs in the background. @@ -131,6 +193,7 @@ app.post("/chat", async (c) => { ...(reasoningEffort ? { reasoningEffort } : {}), ...(workingDirectory !== undefined ? { workingDirectory } : {}), ...(queueId ? { queueId } : {}), + ...(content ? { content } : {}), }); if (outcome.status === "queued") { diff --git a/packages/api/src/routes/models.ts b/packages/api/src/routes/models.ts index eeb6029..a1700b1 100644 --- a/packages/api/src/routes/models.ts +++ b/packages/api/src/routes/models.ts @@ -20,6 +20,7 @@ import { refreshAccountCredentialsAsync, resolveApiKey, resolveContextLimit, + resolveModelCapabilities, selectHaikuModel, setApiKey, validateAccountCredentials, @@ -180,6 +181,23 @@ modelsRoutes.get("/context-limit", async (c) => { return c.json({ contextLimit }); }); +// Resolve a model's image / PDF INPUT capabilities from the models.dev catalog. +// Returns `{ capabilities: { image, pdf } | null }`. `null` means UNKNOWN — the +// provider is unmapped, the model is absent, the catalog predates the +// `modalities` field, or the catalog is offline. The frontend treats `null` as +// "can't verify" (optimistic allow) and a definitive `{ image: false }` as a +// hard block (no tokens spent). +modelsRoutes.get("/capabilities", async (c) => { + const provider = c.req.query("provider"); + const modelId = c.req.query("modelId"); + if (!provider || !modelId) { + return c.json({ error: "provider and modelId query parameters are required" }, 400); + } + + const capabilities = await resolveModelCapabilities(provider, modelId); + return c.json({ capabilities }); +}); + // List available Claude accounts with validated credentials modelsRoutes.get("/claude-accounts", async (c) => { const candidates = resolveClaudeAccounts(); diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index 0915d9b..80a8ae5 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -537,6 +537,14 @@ vi.mock("@dispatch/core", () => ({ execute: async () => "mock", }; }, + createKeyUsageTool(_callbacks: unknown) { + return { + name: "key_usage", + description: "key usage", + parameters: { _type: "z.ZodObject", shape: {} }, + execute: async () => "mock", + }; + }, createSearchCodeTool(_wd: string) { return { name: "search_code", @@ -1634,6 +1642,28 @@ describe("AgentManager", () => { }); }); + describe("key_usage permission gate", () => { + // The key_usage tool is conditionally useful, so it must be COMPLETELY + // absent from the toolset (and thus the model's context) unless + // perm_key_usage is explicitly allowed. + async function toolsForPerms(tabId: string, perms: Record<string, string>): Promise<string[]> { + for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v); + const manager = new AgentManager(); + await manager.processMessage(tabId, "go"); + return constructedAgents.at(-1)?.toolNames ?? []; + } + + it("registers key_usage when perm_key_usage is allowed", async () => { + const tools = await toolsForPerms("tab-key-usage-on", { perm_key_usage: "allow" }); + expect(tools).toContain("key_usage"); + }); + + it("omits key_usage when perm_key_usage is not allowed", async () => { + const tools = await toolsForPerms("tab-key-usage-off", {}); + expect(tools).not.toContain("key_usage"); + }); + }); + // Regression: granted tab-messaging tools must also be ADVERTISED in the // agent's system prompt. The tools were registered in the API tool payload // but `buildSystemPrompt` filtered its "You have access to the following diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts index d6f6087..06dfa13 100644 --- a/packages/api/tests/routes.test.ts +++ b/packages/api/tests/routes.test.ts @@ -219,6 +219,16 @@ vi.mock("@dispatch/core", () => ({ typeof value === "string" && ["none", "low", "medium", "high", "xhigh", "max"].includes(value) ); }, + // Lightweight stand-in for the real validator: accept the supported media + // types, reject everything else. Enough to exercise the /chat attachment + // validation branch (the real validator is unit-tested in core). + validateUserContent(content: Array<{ type: string; mediaType?: string }>) { + const accepted = ["image/png", "image/jpeg", "image/webp", "image/gif", "application/pdf"]; + const errors = content + .filter((p) => p.type === "attachment" && !accepted.includes(p.mediaType ?? "")) + .map((p) => ({ code: "unsupported-type", mediaType: p.mediaType })); + return { ok: errors.length === 0, errors }; + }, listOpenTabs() { return [...fakeOpenTabs]; }, @@ -451,6 +461,59 @@ describe("POST /chat", () => { expect(await res.json()).toEqual({ status: "ok" }); }); + it("accepts a valid image attachment and starts a turn", async () => { + const res = await app.request("/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + tabId: "tab-img-ok", + message: "look: [image]", + content: [ + { type: "text", text: "look: " }, + { type: "attachment", mediaType: "image/png", data: "QQ==" }, + ], + }), + }); + expect(res.status).toBe(200); + expect(await res.json()).toEqual({ status: "ok" }); + }); + + it("returns 400 for an unsupported attachment media type", async () => { + const res = await app.request("/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + tabId: "tab-img-bad", + message: "look: [image]", + content: [{ type: "attachment", mediaType: "image/svg+xml", data: "QQ==" }], + }), + }); + expect(res.status).toBe(400); + const body = await res.json(); + expect(body.error).toBe("invalid attachments"); + }); + + it("returns 409 when attaching while the agent is generating", async () => { + // Kick off a turn so the tab is running. + await app.request("/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ tabId: "tab-img-busy", message: "first" }), + }); + await new Promise<void>((r) => setTimeout(r, 20)); + + const res = await app.request("/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + tabId: "tab-img-busy", + message: "second [image]", + content: [{ type: "attachment", mediaType: "image/png", data: "QQ==" }], + }), + }); + expect(res.status).toBe(409); + }); + it("returns 400 with empty message", async () => { const res = await app.request("/chat", { method: "POST", diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts Binary files differindex 4bfa7eb..08b317a 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts diff --git a/packages/core/src/credentials/claude.ts b/packages/core/src/credentials/claude.ts index 7818222..050a0fc 100644 --- a/packages/core/src/credentials/claude.ts +++ b/packages/core/src/credentials/claude.ts @@ -441,6 +441,22 @@ export interface ClaudeUsageReport { orgId?: string; } +/** + * A usage report paired with provenance: whether it came back from a fresh + * live fetch against Anthropic's `/api/oauth/usage` endpoint or was served + * from the local `usage_cache` table after a failed/skipped live fetch. + * + * `source: "cache"` carries `cachedAt` — the epoch-ms timestamp recording when + * that cached payload was last fetched FROM the source (the `usage_cache.cached_at` + * column). `source: "live"` omits `cachedAt` (the data is current as of now). + */ +export interface ClaudeUsageResult { + report: ClaudeUsageReport; + source: "live" | "cache"; + /** Epoch-ms the cached report was last fetched from source. Only on `source: "cache"`. */ + cachedAt?: number; +} + // ─── Well-known Anthropic models ────────────────────────────── /** @@ -602,14 +618,23 @@ async function fetchClaudeUsage(accessToken: string): Promise<ClaudeUsageReport } } -function getCachedUsage(keyId: string): ClaudeUsageReport | null { +/** + * Read a cached usage report plus the epoch-ms it was last fetched from source. + * Returns `null` when there is no cached row (or on any DB/parse error). + */ +function getCachedUsageWithMeta( + keyId: string, +): { report: ClaudeUsageReport; cachedAt: number } | null { try { const db = getDatabase(); const row = db - .query("SELECT report_json FROM usage_cache WHERE key_id = $keyId") - .get({ $keyId: keyId }) as { report_json: string } | null; + .query("SELECT report_json, cached_at FROM usage_cache WHERE key_id = $keyId") + .get({ $keyId: keyId }) as { report_json: string; cached_at: number } | null; if (!row) return null; - return JSON.parse(row.report_json) as ClaudeUsageReport; + return { + report: JSON.parse(row.report_json) as ClaudeUsageReport, + cachedAt: row.cached_at, + }; } catch { return null; } @@ -635,13 +660,35 @@ function setCachedUsage(keyId: string, provider: string, report: ClaudeUsageRepo } } -export async function getAccountUsage(account: ClaudeAccount): Promise<ClaudeUsageReport | null> { +/** + * Fetch an account's usage report along with its provenance (live vs cache). + * + * Resolution: refresh credentials and hit the live `/api/oauth/usage` endpoint; + * on success the fresh report is cached and returned as `source: "live"`. If + * credentials cannot be refreshed OR the live fetch returns nothing, fall back + * to the local `usage_cache` row and return it as `source: "cache"` with the + * `cachedAt` timestamp recording when that payload was last fetched from source. + * Returns `null` only when neither a live report nor a cached row is available. + */ +export async function getAccountUsageWithSource( + account: ClaudeAccount, +): Promise<ClaudeUsageResult | null> { const creds = await refreshAccountCredentialsAsync(account); - if (!creds) return getCachedUsage(account.id); - const report = await fetchClaudeUsage(creds.accessToken); - if (report) { - setCachedUsage(account.id, "anthropic", report); - return report; + if (creds) { + const report = await fetchClaudeUsage(creds.accessToken); + if (report) { + setCachedUsage(account.id, "anthropic", report); + return { report, source: "live" }; + } } - return getCachedUsage(account.id); + const cached = getCachedUsageWithMeta(account.id); + if (cached) { + return { report: cached.report, source: "cache", cachedAt: cached.cachedAt }; + } + return null; +} + +export async function getAccountUsage(account: ClaudeAccount): Promise<ClaudeUsageReport | null> { + const result = await getAccountUsageWithSource(account); + return result?.report ?? null; } diff --git a/packages/core/src/credentials/index.ts b/packages/core/src/credentials/index.ts index 5221dc6..131f035 100644 --- a/packages/core/src/credentials/index.ts +++ b/packages/core/src/credentials/index.ts @@ -15,9 +15,11 @@ export { type ClaudeProfile, type ClaudeUsageBucket, type ClaudeUsageReport, + type ClaudeUsageResult, discoverClaudeAccounts, fetchAnthropicModels, getAccountUsage, + getAccountUsageWithSource, getAnthropicBetas, getAnthropicHeaders, getClaudeAccountsFromDB, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2789b2c..25cc909 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -98,9 +98,26 @@ export { } from "./lsp/index.js"; // Models export { + ACCEPTED_ATTACHMENT_MEDIA_TYPES, + ACCEPTED_IMAGE_MEDIA_TYPES, + ACCEPTED_PDF_MEDIA_TYPE, + type AttachmentValidationError, + type AttachmentValidationResult, + base64ByteLength, getModelsCatalog, + hasAttachments, + isAcceptedAttachmentMediaType, + isImageMediaType, + isPdfMediaType, + MAX_ATTACHMENTS, + MAX_IMAGE_BYTES, + MAX_PDF_BYTES, + MAX_TOTAL_ATTACHMENT_BYTES, + type ModelInputCapabilities, ModelRegistry, resolveContextLimit, + resolveModelCapabilities, + validateUserContent, } from "./models/index.js"; // Notifications (ntfy.sh) export * from "./notifications/index.js"; @@ -115,6 +132,7 @@ export { } from "./skills/index.js"; export { prefix as bashArityPrefix } from "./tools/bash-arity.js"; // Tools +export { createKeyUsageTool, type KeyUsageCallbacks } from "./tools/key-usage.js"; export { createListFilesTool } from "./tools/list-files.js"; export { createLspTool, type LspToolContext } from "./tools/lsp.js"; export { createReadFileTool } from "./tools/read-file.js"; diff --git a/packages/core/src/models/attachments.ts b/packages/core/src/models/attachments.ts new file mode 100644 index 0000000..5c98db4 --- /dev/null +++ b/packages/core/src/models/attachments.ts @@ -0,0 +1,151 @@ +// Validation + limits for multimodal user attachments (images / PDFs). +// +// Kept dependency-free (no DB / `bun:sqlite` import) so both the API layer +// (`/chat` request validation) and any future caller can share the exact same +// allowlist and size/count ceilings. The limits mirror Anthropic's documented +// vision/PDF API constraints (the only image-capable providers Dispatch maps), +// so a request that passes here won't be rejected by the provider for size. + +import type { UserAttachmentPart, UserContentPart } from "../types/index.js"; + +/** Accepted image media types. */ +export const ACCEPTED_IMAGE_MEDIA_TYPES = [ + "image/png", + "image/jpeg", + "image/webp", + "image/gif", +] as const; + +/** Accepted document media types. */ +export const ACCEPTED_PDF_MEDIA_TYPE = "application/pdf"; + +/** Every media type we accept as an attachment. */ +export const ACCEPTED_ATTACHMENT_MEDIA_TYPES = [ + ...ACCEPTED_IMAGE_MEDIA_TYPES, + ACCEPTED_PDF_MEDIA_TYPE, +] as const; + +/** Per-image byte ceiling (Anthropic: 5 MB/image). */ +export const MAX_IMAGE_BYTES = 5 * 1024 * 1024; + +/** Per-PDF byte ceiling (Anthropic: 32 MB/PDF). */ +export const MAX_PDF_BYTES = 32 * 1024 * 1024; + +/** Max attachments per message (Anthropic: 20 images/request). */ +export const MAX_ATTACHMENTS = 20; + +/** + * Total attachment payload ceiling for a single request (decoded bytes). Bounds + * the overall request size even when each individual file is within its limit. + */ +export const MAX_TOTAL_ATTACHMENT_BYTES = 32 * 1024 * 1024; + +/** Whether a media type is an accepted image type. */ +export function isImageMediaType(mediaType: string): boolean { + return (ACCEPTED_IMAGE_MEDIA_TYPES as readonly string[]).includes(mediaType); +} + +/** Whether a media type is the accepted PDF type. */ +export function isPdfMediaType(mediaType: string): boolean { + return mediaType === ACCEPTED_PDF_MEDIA_TYPE; +} + +/** Whether a media type is an accepted attachment type at all. */ +export function isAcceptedAttachmentMediaType(mediaType: string): boolean { + return (ACCEPTED_ATTACHMENT_MEDIA_TYPES as readonly string[]).includes(mediaType); +} + +/** + * Decoded byte length of a base64 string, computed WITHOUT allocating the + * decoded buffer. Tolerates an optional `data:<mediaType>;base64,` prefix and + * any embedded whitespace/newlines. Returns 0 for an empty/whitespace string. + */ +export function base64ByteLength(b64: string): number { + // Strip a data-URI prefix if present. + const comma = b64.indexOf(","); + const body = b64.startsWith("data:") && comma !== -1 ? b64.slice(comma + 1) : b64; + let len = 0; + let pad = 0; + for (let i = 0; i < body.length; i++) { + const ch = body.charCodeAt(i); + // Skip whitespace (space, \t, \n, \r). + if (ch === 32 || ch === 9 || ch === 10 || ch === 13) continue; + len++; + if (body[i] === "=") pad++; + } + if (len === 0) return 0; + // 4 base64 chars → 3 bytes, minus padding. + return Math.floor((len * 3) / 4) - pad; +} + +export type AttachmentValidationError = + | { code: "unsupported-type"; mediaType: string } + | { code: "image-too-large"; mediaType: string; bytes: number; limit: number } + | { code: "pdf-too-large"; bytes: number; limit: number } + | { code: "too-many"; count: number; limit: number } + | { code: "total-too-large"; bytes: number; limit: number } + | { code: "empty"; mediaType: string }; + +export interface AttachmentValidationResult { + ok: boolean; + errors: AttachmentValidationError[]; +} + +/** Extract just the attachment parts from a mixed content list. */ +function attachmentsOf(content: UserContentPart[]): UserAttachmentPart[] { + return content.filter((p): p is UserAttachmentPart => p.type === "attachment"); +} + +/** + * Validate the attachments in a multimodal user content list against the + * media-type allowlist and the size/count ceilings. Pure: never throws, + * collects every violation so the caller can report them all at once. + * + * Text parts are ignored (always valid). An empty content list is valid (it's + * just a text-only message expressed as parts). + */ +export function validateUserContent(content: UserContentPart[]): AttachmentValidationResult { + const errors: AttachmentValidationError[] = []; + const attachments = attachmentsOf(content); + + if (attachments.length > MAX_ATTACHMENTS) { + errors.push({ code: "too-many", count: attachments.length, limit: MAX_ATTACHMENTS }); + } + + let total = 0; + for (const att of attachments) { + if (!isAcceptedAttachmentMediaType(att.mediaType)) { + errors.push({ code: "unsupported-type", mediaType: att.mediaType }); + continue; + } + const bytes = base64ByteLength(att.data); + total += bytes; + if (bytes === 0) { + errors.push({ code: "empty", mediaType: att.mediaType }); + continue; + } + if (isPdfMediaType(att.mediaType)) { + if (bytes > MAX_PDF_BYTES) { + errors.push({ code: "pdf-too-large", bytes, limit: MAX_PDF_BYTES }); + } + } else if (bytes > MAX_IMAGE_BYTES) { + errors.push({ + code: "image-too-large", + mediaType: att.mediaType, + bytes, + limit: MAX_IMAGE_BYTES, + }); + } + } + + if (total > MAX_TOTAL_ATTACHMENT_BYTES) { + errors.push({ code: "total-too-large", bytes: total, limit: MAX_TOTAL_ATTACHMENT_BYTES }); + } + + return { ok: errors.length === 0, errors }; +} + +/** Convenience: does the content list contain at least one attachment? */ +export function hasAttachments(content: UserContentPart[] | undefined | null): boolean { + return !!content && content.some((p) => p.type === "attachment"); +} diff --git a/packages/core/src/models/catalog.ts b/packages/core/src/models/catalog.ts index dea4647..ac310b1 100644 --- a/packages/core/src/models/catalog.ts +++ b/packages/core/src/models/catalog.ts @@ -18,6 +18,15 @@ interface ModelsDevModel { context?: number; output?: number; }; + /** + * Input/output modalities the model accepts. We read `input` to decide + * whether the model can take image / pdf attachments. Absent on older + * catalog entries — treated as "unknown" (capability resolves to `null`). + */ + modalities?: { + input?: string[]; + output?: string[]; + }; } interface ModelsDevProvider { @@ -172,6 +181,47 @@ export async function resolveContextLimit( return null; } +/** + * Image / PDF input capabilities for a model, resolved from the models.dev + * catalog's `modalities.input` list. + */ +export interface ModelInputCapabilities { + /** Model accepts image input (vision). */ + image: boolean; + /** Model accepts PDF/document input. */ + pdf: boolean; +} + +/** + * Resolve whether a model accepts image / pdf input for the given Dispatch + * provider + model id. Returns `null` when the capability is UNKNOWN — i.e. the + * provider is unsupported/unmapped, the model is absent from the catalog, the + * entry predates the `modalities` field, or the catalog is unavailable. Callers + * should treat `null` as "can't verify" (optimistic allow) rather than a + * definitive "no", so a temporary catalog outage never disables a known-good + * vision model. + * + * A non-null result means the catalog DID describe the model's input modalities + * — `{ image, pdf }` then reflects exactly what it advertises (a definitive + * yes/no for each). + */ +export async function resolveModelCapabilities( + provider: string, + modelId: string, +): Promise<ModelInputCapabilities | null> { + const candidates = PROVIDER_MAP[provider]; + if (!candidates || !modelId) return null; + + const catalog = await getModelsCatalog(); + for (const providerId of candidates) { + const input = catalog[providerId]?.models?.[modelId]?.modalities?.input; + if (Array.isArray(input)) { + return { image: input.includes("image"), pdf: input.includes("pdf") }; + } + } + return null; +} + /** Test-only: reset the in-process memo so a test can re-exercise loading. */ export function __resetCatalogCacheForTests(): void { cached = null; diff --git a/packages/core/src/models/index.ts b/packages/core/src/models/index.ts index 2fcd657..15d1ee2 100644 --- a/packages/core/src/models/index.ts +++ b/packages/core/src/models/index.ts @@ -1,5 +1,24 @@ export { + ACCEPTED_ATTACHMENT_MEDIA_TYPES, + ACCEPTED_IMAGE_MEDIA_TYPES, + ACCEPTED_PDF_MEDIA_TYPE, + type AttachmentValidationError, + type AttachmentValidationResult, + base64ByteLength, + hasAttachments, + isAcceptedAttachmentMediaType, + isImageMediaType, + isPdfMediaType, + MAX_ATTACHMENTS, + MAX_IMAGE_BYTES, + MAX_PDF_BYTES, + MAX_TOTAL_ATTACHMENT_BYTES, + validateUserContent, +} from "./attachments.js"; +export { getModelsCatalog, + type ModelInputCapabilities, resolveContextLimit, + resolveModelCapabilities, } from "./catalog.js"; export { ModelRegistry } from "./registry.js"; diff --git a/packages/core/src/tools/key-usage.ts b/packages/core/src/tools/key-usage.ts new file mode 100644 index 0000000..0655ad7 --- /dev/null +++ b/packages/core/src/tools/key-usage.ts @@ -0,0 +1,322 @@ +import { z } from "zod"; +import type { ClaudeAccount, ClaudeUsageReport, ClaudeUsageResult } from "../credentials/claude.js"; +import { getAccountUsageWithSource } from "../credentials/claude.js"; +import type { OpencodeUsageReport } from "../credentials/opencode.js"; +import { fetchOpencodeUsage as defaultFetchOpencodeUsage } from "../credentials/opencode.js"; +import type { KeyState, ToolDefinition } from "../types/index.js"; + +/** + * Collaborators the `key_usage` tool needs from the API layer (which owns the + * live `ModelRegistry` and the discovered Claude accounts). The two `fetch*` + * hooks default to the real credential fetchers but are injectable so tests can + * exercise the tool without network or DB access. + */ +export interface KeyUsageCallbacks { + /** Current key states from the model registry (definition + active/exhausted status). */ + listKeys(): KeyState[]; + /** Discovered Claude accounts, used to resolve `anthropic` keys to credentials. */ + listClaudeAccounts(): ClaudeAccount[]; + /** + * Fetch an anthropic account's usage with provenance (live vs cache). + * Defaults to `getAccountUsageWithSource`. + */ + fetchAnthropicUsage?: (account: ClaudeAccount) => Promise<ClaudeUsageResult | null>; + /** + * Fetch an opencode-go key's usage (always a live scrape — OpenCode keeps no + * local cache). Defaults to `fetchOpencodeUsage`. + */ + fetchOpencodeUsage?: (keyId: string) => Promise<OpencodeUsageReport | null>; +} + +/** A single normalized usage window (5-hour / week / month). */ +interface UsageWindow { + label: string; + /** Remaining headroom as a 0–100 percentage. Omitted when the source gives no utilization. */ + remainingPercent?: number; + /** Epoch-ms the window resets. Omitted when the source gives no reset time. */ + resetsAt?: number; +} + +/** Fully normalized per-key usage, ready for rendering. */ +interface KeyUsageEntry { + keyId: string; + provider: string; + status: "active" | "exhausted"; + lastError?: string; + exhaustedAt?: number; + /** Provenance of the usage figures: a fresh live fetch or a cached payload. */ + dataSource?: "live" | "cache"; + /** Epoch-ms the cached payload was last fetched from source (only on `dataSource: "cache"`). */ + cachedAt?: number; + windows: UsageWindow[]; + /** Set when no usage figures could be obtained for an otherwise-supported key. */ + unavailableReason?: string; + /** Set when the provider has no usage-reporting support. */ + unsupported?: boolean; +} + +function clampPercent(value: number): number { + if (value < 0) return 0; + if (value > 100) return 100; + return value; +} + +/** Convert a raw `{ utilization, resetsAt }` bucket into a normalized window. */ +function toWindow( + label: string, + bucket?: { utilization?: number; resetsAt?: number }, +): UsageWindow | null { + if (!bucket) return null; + const hasUtil = typeof bucket.utilization === "number"; + const hasReset = typeof bucket.resetsAt === "number"; + if (!hasUtil && !hasReset) return null; + return { + label, + ...(hasUtil + ? { remainingPercent: clampPercent(Math.round((1 - (bucket.utilization as number)) * 100)) } + : {}), + ...(hasReset ? { resetsAt: bucket.resetsAt } : {}), + }; +} + +function anthropicWindows(report: ClaudeUsageReport): UsageWindow[] { + const windows: UsageWindow[] = []; + const fiveHour = toWindow("5-hour", report.fiveHour); + if (fiveHour) windows.push(fiveHour); + const week = toWindow("week", report.sevenDay); + if (week) windows.push(week); + return windows; +} + +function opencodeWindows(report: OpencodeUsageReport): UsageWindow[] { + const windows: UsageWindow[] = []; + const fiveHour = toWindow("5-hour", report.fiveHour); + if (fiveHour) windows.push(fiveHour); + const week = toWindow("week", report.weekly); + if (week) windows.push(week); + const month = toWindow("month", report.monthly); + if (month) windows.push(month); + return windows; +} + +/** + * Resolve which Claude account backs an `anthropic` key. Matches by key id or by + * the account's source file (the key's `credentials_file`), falling back to the + * first available account — mirrors the existing `/models/key-usage` route. + */ +function matchAnthropicAccount( + accounts: ClaudeAccount[], + keyId: string, + credFile?: string, +): ClaudeAccount | undefined { + const matched = accounts.find( + (a) => a.id === keyId || (credFile != null && a.source === credFile), + ); + return matched ?? accounts[0]; +} + +function iso(ms: number): string { + return new Date(ms).toISOString(); +} + +/** Human-readable coarse duration, e.g. "3h 12m", "5d 8h", "0m". */ +function formatDuration(ms: number): string { + const totalSec = Math.round(Math.abs(ms) / 1000); + const days = Math.floor(totalSec / 86400); + const hours = Math.floor((totalSec % 86400) / 3600); + const minutes = Math.floor((totalSec % 3600) / 60); + const parts: string[] = []; + if (days > 0) parts.push(`${days}d`); + if (hours > 0) parts.push(`${hours}h`); + if (minutes > 0 || parts.length === 0) parts.push(`${minutes}m`); + return parts.join(" "); +} + +function formatRelative(targetMs: number, nowMs: number): string { + const delta = targetMs - nowMs; + return delta >= 0 ? `in ${formatDuration(delta)}` : `${formatDuration(delta)} ago`; +} + +function formatWindow(window: UsageWindow, now: number): string { + const parts: string[] = []; + if (typeof window.remainingPercent === "number") { + parts.push(`${window.remainingPercent}% remaining`); + } + if (typeof window.resetsAt === "number") { + parts.push(`resets ${iso(window.resetsAt)} (${formatRelative(window.resetsAt, now)})`); + } + return `${window.label}: ${parts.join(", ")}`; +} + +/** + * Render normalized usage entries into an AI-friendly text block. Pure — `now` + * is injected so relative timestamps are deterministic under test. + */ +export function formatKeyUsage(entries: KeyUsageEntry[], now: number): string { + if (entries.length === 0) return "No API keys matched."; + + const lines: string[] = []; + lines.push(`API key usage — ${entries.length} key${entries.length === 1 ? "" : "s"}:`); + + for (const entry of entries) { + lines.push(""); + lines.push(`[${entry.keyId}] provider: ${entry.provider}`); + + if (entry.status === "exhausted") { + const since = + typeof entry.exhaustedAt === "number" + ? ` (since ${iso(entry.exhaustedAt)}, ${formatRelative(entry.exhaustedAt, now)})` + : ""; + lines.push(`status: EXHAUSTED${since}`); + if (entry.lastError) lines.push(`last error: ${entry.lastError}`); + } else { + lines.push("status: active"); + } + + if (entry.unsupported) { + lines.push( + `usage: not supported for provider "${entry.provider}" (only anthropic and opencode-go report usage)`, + ); + continue; + } + + if (entry.dataSource === "live") { + lines.push("data: live (fetched just now)"); + } else if (entry.dataSource === "cache") { + lines.push( + typeof entry.cachedAt === "number" + ? `data: cached — last fetched from source ${iso(entry.cachedAt)} (${formatRelative(entry.cachedAt, now)})` + : "data: cached (source timestamp unknown)", + ); + } + + for (const window of entry.windows) { + lines.push(formatWindow(window, now)); + } + + if (entry.unavailableReason) { + lines.push(`usage: unavailable — ${entry.unavailableReason}`); + } + } + + return lines.join("\n"); +} + +async function buildEntry( + key: KeyState, + accounts: ClaudeAccount[], + fetchAnthropic: (account: ClaudeAccount) => Promise<ClaudeUsageResult | null>, + fetchOpencode: (keyId: string) => Promise<OpencodeUsageReport | null>, +): Promise<KeyUsageEntry> { + const def = key.definition; + const entry: KeyUsageEntry = { + keyId: def.id, + provider: def.provider, + status: key.status, + windows: [], + ...(key.lastError ? { lastError: key.lastError } : {}), + ...(typeof key.exhaustedAt === "number" ? { exhaustedAt: key.exhaustedAt } : {}), + }; + + if (def.provider === "anthropic") { + const account = matchAnthropicAccount(accounts, def.id, def.credentials_file); + if (!account) { + entry.unavailableReason = "no Claude account credentials available for this key"; + return entry; + } + let result: ClaudeUsageResult | null = null; + try { + result = await fetchAnthropic(account); + } catch { + result = null; + } + if (!result) { + entry.unavailableReason = "no live usage data and no cached usage available"; + return entry; + } + entry.dataSource = result.source; + if (typeof result.cachedAt === "number") entry.cachedAt = result.cachedAt; + entry.windows = anthropicWindows(result.report); + if (entry.windows.length === 0) { + entry.unavailableReason = "usage endpoint returned no window data"; + } + return entry; + } + + if (def.provider === "opencode-go") { + let report: OpencodeUsageReport | null = null; + try { + report = await fetchOpencode(def.id); + } catch { + report = null; + } + if (!report) { + entry.unavailableReason = + "live usage unavailable (requires OPENCODE_COOKIE and a workspace id, or the source returned no data; OpenCode keeps no local cache)"; + return entry; + } + entry.dataSource = "live"; + entry.windows = opencodeWindows(report); + if (entry.windows.length === 0) { + entry.unavailableReason = "usage source returned no window data"; + } + return entry; + } + + entry.unsupported = true; + return entry; +} + +export function createKeyUsageTool(callbacks: KeyUsageCallbacks): ToolDefinition { + const fetchAnthropic = callbacks.fetchAnthropicUsage ?? getAccountUsageWithSource; + const fetchOpencode = callbacks.fetchOpencodeUsage ?? defaultFetchOpencodeUsage; + + return { + name: "key_usage", + description: [ + "Report current usage levels for configured API keys so you can pick a key with", + "headroom, warn before hitting a rate limit, or diagnose an exhausted-key failure.", + "", + "For each key it returns: provider, active/exhausted status (with the last error when", + "exhausted), remaining rate-limit headroom per window (5-hour, weekly, and monthly where", + "the provider exposes it), each window's reset timestamp, and whether the figures are", + "live or served from cache (with the cache's last-fetched time).", + "", + "Pass a key_id to inspect one key; omit it to report all keys. Usage reporting is", + "supported for anthropic and opencode-go keys.", + ].join("\n"), + parameters: z.object({ + key_id: z + .string() + .optional() + .describe( + 'The id of a single key to report (as configured in dispatch.toml, e.g. "claude-max"). Omit to report all configured keys.', + ), + }), + execute: async (args: Record<string, unknown>): Promise<string> => { + const requestedKeyId = (args.key_id as string | undefined)?.trim() || undefined; + + const allKeys = callbacks.listKeys(); + if (allKeys.length === 0) { + return "No API keys are configured."; + } + + let keys = allKeys; + if (requestedKeyId) { + keys = allKeys.filter((k) => k.definition.id === requestedKeyId); + if (keys.length === 0) { + const available = allKeys.map((k) => k.definition.id).join(", "); + return `Error: no key found with id "${requestedKeyId}". Available keys: ${available}.`; + } + } + + const accounts = callbacks.listClaudeAccounts(); + const entries: KeyUsageEntry[] = []; + for (const key of keys) { + entries.push(await buildEntry(key, accounts, fetchAnthropic, fetchOpencode)); + } + + return formatKeyUsage(entries, Date.now()); + }, + }; +} diff --git a/packages/core/src/tools/summon.ts b/packages/core/src/tools/summon.ts index b941152..2a076e6 100644 --- a/packages/core/src/tools/summon.ts +++ b/packages/core/src/tools/summon.ts @@ -287,6 +287,7 @@ export function createSummonTool( "write_file", "run_shell", "search_code", + "key_usage", "todo", "summon", "retrieve", diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index f7944c9..4e3fa0b 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -76,8 +76,57 @@ export interface SystemChunk { export interface ChatMessage { role: MessageRole; chunks: Chunk[]; + /** + * Ephemeral ORDERED multimodal content for a user turn (interleaved text + + * image/pdf attachments). Set ONLY transiently on the in-flight user message + * so `toModelMessages` can emit multimodal `ImagePart`/`FilePart` content to + * the provider. Never persisted (the chunk log stores only the text, with + * `[image]`/`[pdf]` markers), so it's absent on history-rebuilt messages. + * When absent, the message is plain text built from its `chunks`. + */ + content?: UserContentPart[]; } +// ─── Multimodal user content (image / PDF attachments) ─────────── +// +// When a user pastes one or more images/PDFs into the chat input, the turn's +// user message carries an ORDERED list of content parts instead of a plain +// string. The ordering is meaningful — the user can interleave text and +// attachments ("here is image A: <A>, here is image B: <B>") and the model +// sees them in exactly that sequence. +// +// These parts are EPHEMERAL: they are forwarded to the model for the turn that +// produced them but are NOT persisted as raw bytes in the chunk log. History +// stores only the user's text (with `[image]` / `[pdf]` markers in place of +// each attachment), so a later reload re-renders the text but never re-sends +// the binary payload. This keeps the persisted log small and avoids re-billing +// image tokens on every subsequent turn. + +/** A plain-text segment of a multimodal user message. */ +export interface UserTextPart { + type: "text"; + text: string; +} + +/** + * A binary attachment (image or PDF) in a multimodal user message. `data` is a + * base64-encoded payload (no `data:` URI prefix); `mediaType` is the IANA media + * type (e.g. `image/png`, `application/pdf`). `name` is an optional original + * filename, used only for PDF `filename` passthrough and diagnostics. + */ +export interface UserAttachmentPart { + type: "attachment"; + /** IANA media type, e.g. `image/png`, `image/jpeg`, `application/pdf`. */ + mediaType: string; + /** Base64-encoded bytes WITHOUT a `data:` URI prefix. */ + data: string; + /** Optional original filename (mainly for PDFs). */ + name?: string; +} + +/** One ordered part of a multimodal user message. */ +export type UserContentPart = UserTextPart | UserAttachmentPart; + // ─── Append-only chunk log (persisted model) ───────────────────── // // The DB stores a conversation as a flat stream of `ChunkRow`s (see diff --git a/packages/core/tests/agent/agent.test.ts b/packages/core/tests/agent/agent.test.ts index d8edec7..f4b33cc 100644 --- a/packages/core/tests/agent/agent.test.ts +++ b/packages/core/tests/agent/agent.test.ts @@ -1544,4 +1544,102 @@ describe("anthropicThinkingProviderOptions — adaptive-thinking model detection effort: "xhigh", }); }); + + describe("multimodal user content", () => { + it("emits ordered text + image parts to the model when content is provided", async () => { + vi.mocked(streamText).mockReturnValue( + makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), + ); + + const agent = new Agent(makeConfig()); + for await (const _ of agent.run("here is image A: [image]", { + content: [ + { type: "text", text: "here is image A: " }, + { type: "attachment", mediaType: "image/png", data: "QQ==" }, + ], + })) { + // consume + } + + const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; + const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; + const userMsg = messages.find((m) => m.role === "user"); + expect(userMsg).toBeDefined(); + // Multimodal turn → content is an ordered parts array, not a string. + expect(Array.isArray(userMsg?.content)).toBe(true); + const parts = userMsg?.content as Array<Record<string, unknown>>; + expect(parts[0]).toMatchObject({ type: "text", text: "here is image A: " }); + expect(parts[1]).toMatchObject({ type: "image", mediaType: "image/png" }); + expect(String(parts[1]?.image)).toBe("data:image/png;base64,QQ=="); + }); + + it("emits a FilePart for a PDF attachment with its filename", async () => { + vi.mocked(streamText).mockReturnValue( + makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), + ); + + const agent = new Agent(makeConfig()); + for await (const _ of agent.run("see [pdf]", { + content: [ + { type: "text", text: "see " }, + { type: "attachment", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" }, + ], + })) { + // consume + } + + const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; + const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; + const userMsg = messages.find((m) => m.role === "user"); + const parts = userMsg?.content as Array<Record<string, unknown>>; + const filePart = parts.find((p) => p.type === "file"); + expect(filePart).toMatchObject({ + type: "file", + mediaType: "application/pdf", + filename: "doc.pdf", + }); + expect(String(filePart?.data)).toBe("data:application/pdf;base64,QQ=="); + }); + + it("persists the user turn as text only (no content) for history", async () => { + vi.mocked(streamText).mockReturnValue( + makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), + ); + + const agent = new Agent(makeConfig()); + for await (const _ of agent.run("look: [image]", { + content: [ + { type: "text", text: "look: " }, + { type: "attachment", mediaType: "image/png", data: "QQ==" }, + ], + })) { + // consume + } + + // The in-memory user message keeps the text chunk for the render/persist + // path; the ephemeral `content` rides alongside it but isn't a chunk. + const userMsg = agent.messages.find((m) => m.role === "user"); + expect(userMsg?.chunks).toEqual([{ type: "text", text: "look: [image]" }]); + }); + + it("falls back to a plain string when content has no attachment", async () => { + vi.mocked(streamText).mockReturnValue( + makeMockStreamResult([{ type: "text-delta", id: "t0", text: "ok" }, finishStop]), + ); + + const agent = new Agent(makeConfig()); + for await (const _ of agent.run("plain text", { + content: [{ type: "text", text: "plain text" }], + })) { + // consume + } + + const callArgs = vi.mocked(streamText).mock.calls.at(-1)?.[0]; + const messages = callArgs?.messages as Array<{ role: string; content: unknown }>; + const userMsg = messages.find((m) => m.role === "user"); + // No attachment → plain string content (byte-identical to text-only path). + expect(typeof userMsg?.content).toBe("string"); + expect(userMsg?.content).toBe("plain text"); + }); + }); }); diff --git a/packages/core/tests/models/attachments.test.ts b/packages/core/tests/models/attachments.test.ts new file mode 100644 index 0000000..11a9f82 --- /dev/null +++ b/packages/core/tests/models/attachments.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from "vitest"; +import { + base64ByteLength, + isAcceptedAttachmentMediaType, + isImageMediaType, + isPdfMediaType, + MAX_ATTACHMENTS, + MAX_IMAGE_BYTES, + MAX_PDF_BYTES, + MAX_TOTAL_ATTACHMENT_BYTES, + validateUserContent, +} from "../../src/models/attachments.js"; +import type { UserContentPart } from "../../src/types/index.js"; + +/** A base64 string that decodes to exactly `bytes` bytes (no padding chars). */ +function base64OfBytes(bytes: number): string { + // 4 base64 chars → 3 bytes. Use a multiple of 3 for clean (unpadded) output. + const groups = Math.ceil(bytes / 3); + return "A".repeat(groups * 4); +} + +function imagePart(data: string, mediaType = "image/png"): UserContentPart { + return { type: "attachment", mediaType, data }; +} + +describe("media-type predicates", () => { + it("classifies image types", () => { + expect(isImageMediaType("image/png")).toBe(true); + expect(isImageMediaType("image/jpeg")).toBe(true); + expect(isImageMediaType("image/webp")).toBe(true); + expect(isImageMediaType("image/gif")).toBe(true); + expect(isImageMediaType("application/pdf")).toBe(false); + expect(isImageMediaType("image/svg+xml")).toBe(false); + }); + + it("classifies pdf + accepted types", () => { + expect(isPdfMediaType("application/pdf")).toBe(true); + expect(isPdfMediaType("image/png")).toBe(false); + expect(isAcceptedAttachmentMediaType("image/gif")).toBe(true); + expect(isAcceptedAttachmentMediaType("application/pdf")).toBe(true); + expect(isAcceptedAttachmentMediaType("text/plain")).toBe(false); + }); +}); + +describe("base64ByteLength", () => { + it("computes decoded length without padding", () => { + // "AAAA" → 3 bytes. + expect(base64ByteLength("AAAA")).toBe(3); + }); + + it("accounts for padding", () => { + // "QQ==" → 1 byte ("A"). + expect(base64ByteLength("QQ==")).toBe(1); + // "QUI=" → 2 bytes ("AB"). + expect(base64ByteLength("QUI=")).toBe(2); + }); + + it("tolerates a data: URI prefix and whitespace", () => { + expect(base64ByteLength("data:image/png;base64,AAAA")).toBe(3); + expect(base64ByteLength("AA\nAA")).toBe(3); + }); + + it("returns 0 for empty input", () => { + expect(base64ByteLength("")).toBe(0); + expect(base64ByteLength(" ")).toBe(0); + }); +}); + +describe("validateUserContent", () => { + it("accepts a small image and ignores text parts", () => { + const content: UserContentPart[] = [ + { type: "text", text: "hi" }, + imagePart(base64OfBytes(1024)), + ]; + expect(validateUserContent(content)).toEqual({ ok: true, errors: [] }); + }); + + it("accepts an empty / text-only content list", () => { + expect(validateUserContent([]).ok).toBe(true); + expect(validateUserContent([{ type: "text", text: "no files" }]).ok).toBe(true); + }); + + it("rejects an unsupported media type", () => { + const res = validateUserContent([imagePart(base64OfBytes(10), "image/svg+xml")]); + expect(res.ok).toBe(false); + expect(res.errors[0]).toMatchObject({ code: "unsupported-type", mediaType: "image/svg+xml" }); + }); + + it("rejects an oversized image but allows a PDF of the same size", () => { + const big = base64OfBytes(MAX_IMAGE_BYTES + 3); + const imgRes = validateUserContent([imagePart(big, "image/png")]); + expect(imgRes.ok).toBe(false); + expect(imgRes.errors.some((e) => e.code === "image-too-large")).toBe(true); + + // Same byte size as a PDF is fine (PDF limit is much higher). + const pdfRes = validateUserContent([imagePart(big, "application/pdf")]); + expect(pdfRes.ok).toBe(true); + }); + + it("rejects an oversized PDF", () => { + const res = validateUserContent([ + imagePart(base64OfBytes(MAX_PDF_BYTES + 3), "application/pdf"), + ]); + expect(res.ok).toBe(false); + expect(res.errors.some((e) => e.code === "pdf-too-large")).toBe(true); + }); + + it("rejects an empty attachment payload", () => { + const res = validateUserContent([imagePart("", "image/png")]); + expect(res.ok).toBe(false); + expect(res.errors.some((e) => e.code === "empty")).toBe(true); + }); + + it("rejects too many attachments", () => { + const content: UserContentPart[] = Array.from({ length: MAX_ATTACHMENTS + 1 }, () => + imagePart(base64OfBytes(8)), + ); + const res = validateUserContent(content); + expect(res.ok).toBe(false); + expect(res.errors.some((e) => e.code === "too-many")).toBe(true); + }); + + it("rejects when the total payload exceeds the request ceiling", () => { + // Several individually-legal PDFs that together exceed the total cap. + const each = Math.floor(MAX_TOTAL_ATTACHMENT_BYTES / 3); + const content: UserContentPart[] = [ + imagePart(base64OfBytes(each), "application/pdf"), + imagePart(base64OfBytes(each), "application/pdf"), + imagePart(base64OfBytes(each), "application/pdf"), + imagePart(base64OfBytes(each), "application/pdf"), + ]; + const res = validateUserContent(content); + expect(res.ok).toBe(false); + expect(res.errors.some((e) => e.code === "total-too-large")).toBe(true); + }); +}); diff --git a/packages/core/tests/models/catalog.test.ts b/packages/core/tests/models/catalog.test.ts index 51043e6..f4bddc2 100644 --- a/packages/core/tests/models/catalog.test.ts +++ b/packages/core/tests/models/catalog.test.ts @@ -4,6 +4,7 @@ import { __resetCatalogCacheForTests, getModelsCatalog, resolveContextLimit, + resolveModelCapabilities, } from "../../src/models/catalog.js"; const CACHE_PATH = "/tmp/dispatch/models-dev.json"; @@ -13,14 +14,30 @@ const CATALOG = { anthropic: { id: "anthropic", models: { - "claude-sonnet-4-5": { limit: { context: 200000, output: 64000 } }, - "claude-sonnet-4-6": { limit: { context: 1000000, output: 64000 } }, + "claude-sonnet-4-5": { + limit: { context: 200000, output: 64000 }, + modalities: { input: ["text", "image", "pdf"], output: ["text"] }, + }, + "claude-sonnet-4-6": { + limit: { context: 1000000, output: 64000 }, + modalities: { input: ["text", "image", "pdf"], output: ["text"] }, + }, + // A text-only model: definitively no image/pdf input. + "text-only-model": { + limit: { context: 100000, output: 8192 }, + modalities: { input: ["text"], output: ["text"] }, + }, + // An entry predating the modalities field → capability unknown. + "legacy-model": { limit: { context: 100000, output: 8192 } }, }, }, opencode: { id: "opencode", models: { - "glm-4-6": { limit: { context: 131072, output: 8192 } }, + "glm-4-6": { + limit: { context: 131072, output: 8192 }, + modalities: { input: ["text", "image"], output: ["text"] }, + }, }, }, }; @@ -156,3 +173,55 @@ describe("getModelsCatalog caching", () => { warn.mockRestore(); }); }); + +describe("resolveModelCapabilities", () => { + it("reports image + pdf for a vision model", async () => { + mockFetchOnce(CATALOG); + expect(await resolveModelCapabilities("anthropic", "claude-sonnet-4-5")).toEqual({ + image: true, + pdf: true, + }); + }); + + it("reports image-only for a model whose modalities omit pdf", async () => { + mockFetchOnce(CATALOG); + // glm-4-6 lists image but not pdf (resolved via the opencode fallback). + expect(await resolveModelCapabilities("opencode-anthropic", "glm-4-6")).toEqual({ + image: true, + pdf: false, + }); + }); + + it("reports a definitive no for a text-only model", async () => { + mockFetchOnce(CATALOG); + expect(await resolveModelCapabilities("anthropic", "text-only-model")).toEqual({ + image: false, + pdf: false, + }); + }); + + it("returns null (unknown) for an entry without modalities", async () => { + mockFetchOnce(CATALOG); + expect(await resolveModelCapabilities("anthropic", "legacy-model")).toBeNull(); + }); + + it("returns null (unknown) for an unknown model id", async () => { + mockFetchOnce(CATALOG); + expect(await resolveModelCapabilities("anthropic", "no-such-model")).toBeNull(); + }); + + it("returns null for an unsupported provider without hitting the network", async () => { + const fetchFn = mockFetchOnce(CATALOG); + expect(await resolveModelCapabilities("google", "gemini-2.5-pro")).toBeNull(); + expect(await resolveModelCapabilities("anthropic", "")).toBeNull(); + expect(fetchFn).not.toHaveBeenCalled(); + }); + + it("returns null (unknown) when the catalog is offline with no cache", async () => { + const fetchFn = vi.fn(() => Promise.reject(new Error("offline"))); + vi.stubGlobal("fetch", fetchFn); + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + expect(await resolveModelCapabilities("anthropic", "claude-sonnet-4-5")).toBeNull(); + warn.mockRestore(); + }); +}); diff --git a/packages/core/tests/tools/key-usage.test.ts b/packages/core/tests/tools/key-usage.test.ts new file mode 100644 index 0000000..643e30e --- /dev/null +++ b/packages/core/tests/tools/key-usage.test.ts @@ -0,0 +1,317 @@ +import { describe, expect, it, vi } from "vitest"; + +// The tool imports `getAccountUsageWithSource` from `claude.ts`, which +// transitively imports `db/index.js` (top-level `import { Database } from +// "bun:sqlite"`) — unresolvable under vitest's Node runtime. These tests inject +// stub fetchers and never hit the real fetchers/DB, so stubbing the db module +// is enough to let the import chain resolve. +vi.mock("../../src/db/index.js", () => ({ + getDatabase: vi.fn(() => { + throw new Error("db not available in this test"); + }), +})); + +import type { ClaudeAccount, ClaudeUsageResult } from "../../src/credentials/claude.js"; +import type { OpencodeUsageReport } from "../../src/credentials/opencode.js"; +import { + createKeyUsageTool, + formatKeyUsage, + type KeyUsageCallbacks, +} from "../../src/tools/key-usage.js"; +import type { KeyDefinition, KeyState } from "../../src/types/index.js"; + +// ─── Builders ───────────────────────────────────────────────── + +function keyState( + def: Partial<KeyDefinition> & { id: string; provider: string }, + overrides: Partial<Omit<KeyState, "definition">> = {}, +): KeyState { + return { + definition: { base_url: "https://example.test", ...def }, + status: "active", + ...overrides, + }; +} + +function account(id: string, source = `/creds/${id}.json`): ClaudeAccount { + return { + id, + label: id, + source, + credentials: { accessToken: "tok", refreshToken: "ref", expiresAt: Date.now() + 3_600_000 }, + }; +} + +/** Build the tool with explicit stub fetchers — no network, no DB. */ +function buildTool(opts: { + keys: KeyState[]; + accounts?: ClaudeAccount[]; + anthropic?: (a: ClaudeAccount) => Promise<ClaudeUsageResult | null>; + opencode?: (keyId: string) => Promise<OpencodeUsageReport | null>; +}) { + const callbacks: KeyUsageCallbacks = { + listKeys: () => opts.keys, + listClaudeAccounts: () => opts.accounts ?? [], + fetchAnthropicUsage: opts.anthropic ?? (async () => null), + fetchOpencodeUsage: opts.opencode ?? (async () => null), + }; + return createKeyUsageTool(callbacks); +} + +const HOUR = 3_600_000; + +describe("key_usage tool", () => { + it("reports all keys when no key_id is given", async () => { + const reset5h = Date.now() + 2 * HOUR; + const tool = buildTool({ + keys: [ + keyState({ id: "claude-max", provider: "anthropic", credentials_file: "/creds/max.json" }), + keyState({ id: "opencode-1", provider: "opencode-go" }), + ], + accounts: [account("claude-max", "/creds/max.json")], + anthropic: async () => ({ + source: "live", + report: { + fiveHour: { utilization: 0.25, resetsAt: reset5h }, + sevenDay: { utilization: 0.6 }, + }, + }), + opencode: async () => ({ + fiveHour: { utilization: 0.1 }, + weekly: { utilization: 0.4 }, + monthly: { utilization: 0.7 }, + }), + }); + + const out = await tool.execute({}); + + // Both keys present with providers. + expect(out).toContain("[claude-max] provider: anthropic"); + expect(out).toContain("[opencode-1] provider: opencode-go"); + // Remaining = (1 - utilization) * 100. + expect(out).toContain("5-hour: 75% remaining"); + expect(out).toContain("week: 40% remaining"); + expect(out).toContain("5-hour: 90% remaining"); + expect(out).toContain("week: 60% remaining"); + expect(out).toContain("month: 30% remaining"); + expect(out).toContain("data: live (fetched just now)"); + }); + + it("filters to a single key when key_id is given and does not fetch others", async () => { + const opencodeFetch = vi.fn(async () => ({ fiveHour: { utilization: 0.5 } })); + const tool = buildTool({ + keys: [ + keyState({ id: "claude-max", provider: "anthropic" }), + keyState({ id: "opencode-1", provider: "opencode-go" }), + ], + accounts: [account("claude-max")], + anthropic: async () => ({ + source: "live", + report: { fiveHour: { utilization: 0.2 } }, + }), + opencode: opencodeFetch, + }); + + const out = await tool.execute({ key_id: "claude-max" }); + + expect(out).toContain("[claude-max] provider: anthropic"); + expect(out).not.toContain("opencode-1"); + expect(opencodeFetch).not.toHaveBeenCalled(); + }); + + it("returns a helpful error for an unknown key_id", async () => { + const tool = buildTool({ + keys: [ + keyState({ id: "claude-max", provider: "anthropic" }), + keyState({ id: "opencode-1", provider: "opencode-go" }), + ], + }); + + const out = await tool.execute({ key_id: "nope" }); + + expect(out).toContain('no key found with id "nope"'); + expect(out).toContain("claude-max"); + expect(out).toContain("opencode-1"); + }); + + it("reports cached data with the source's last-fetched timestamp", async () => { + const cachedAt = Date.UTC(2025, 0, 2, 3, 4, 5); + const tool = buildTool({ + keys: [keyState({ id: "claude-max", provider: "anthropic" })], + accounts: [account("claude-max")], + anthropic: async () => ({ + source: "cache", + cachedAt, + report: { fiveHour: { utilization: 0.5 } }, + }), + }); + + const out = await tool.execute({}); + + expect(out).toContain("data: cached — last fetched from source 2025-01-02T03:04:05.000Z"); + expect(out).toContain("5-hour: 50% remaining"); + }); + + it("omits the month window for anthropic (no monthly bucket)", async () => { + const tool = buildTool({ + keys: [keyState({ id: "claude-max", provider: "anthropic" })], + accounts: [account("claude-max")], + anthropic: async () => ({ + source: "live", + report: { fiveHour: { utilization: 0.1 }, sevenDay: { utilization: 0.2 } }, + }), + }); + + const out = await tool.execute({}); + + expect(out).toContain("5-hour:"); + expect(out).toContain("week:"); + expect(out).not.toContain("month:"); + }); + + it("includes the month window for opencode-go", async () => { + const tool = buildTool({ + keys: [keyState({ id: "opencode-1", provider: "opencode-go" })], + opencode: async () => ({ + fiveHour: { utilization: 0.1 }, + weekly: { utilization: 0.2 }, + monthly: { utilization: 0.3 }, + }), + }); + + const out = await tool.execute({}); + + expect(out).toContain("month: 70% remaining"); + }); + + it("surfaces exhausted status with the last error", async () => { + const exhaustedAt = Date.now() - HOUR; + const tool = buildTool({ + keys: [ + keyState( + { id: "opencode-1", provider: "opencode-go" }, + { status: "exhausted", lastError: "429 rate limit exceeded", exhaustedAt }, + ), + ], + opencode: async () => null, + }); + + const out = await tool.execute({}); + + expect(out).toContain("status: EXHAUSTED"); + expect(out).toContain("last error: 429 rate limit exceeded"); + }); + + it("flags providers without usage support", async () => { + const tool = buildTool({ + keys: [keyState({ id: "gem", provider: "google" })], + }); + + const out = await tool.execute({}); + + expect(out).toContain("[gem] provider: google"); + expect(out).toContain("not supported"); + }); + + it("reports unavailable when a supported provider returns no usage", async () => { + const tool = buildTool({ + keys: [keyState({ id: "claude-max", provider: "anthropic" })], + accounts: [account("claude-max")], + anthropic: async () => null, + }); + + const out = await tool.execute({}); + + expect(out).toContain("usage: unavailable"); + expect(out).toContain("no cached usage"); + }); + + it("reports unavailable for anthropic keys with no account credentials", async () => { + const tool = buildTool({ + keys: [keyState({ id: "claude-max", provider: "anthropic" })], + accounts: [], + }); + + const out = await tool.execute({}); + + expect(out).toContain("no Claude account credentials available"); + }); + + it("treats a fetcher that throws as unavailable (does not crash)", async () => { + const tool = buildTool({ + keys: [keyState({ id: "opencode-1", provider: "opencode-go" })], + opencode: async () => { + throw new Error("network down"); + }, + }); + + const out = await tool.execute({}); + + expect(out).toContain("usage: unavailable"); + }); + + it("reports when no keys are configured at all", async () => { + const tool = buildTool({ keys: [] }); + const out = await tool.execute({}); + expect(out).toBe("No API keys are configured."); + }); + + it("clamps out-of-range utilization to 0–100%", async () => { + const tool = buildTool({ + keys: [keyState({ id: "opencode-1", provider: "opencode-go" })], + opencode: async () => ({ + fiveHour: { utilization: 1.2 }, // over 100% used → 0% remaining + weekly: { utilization: -0.5 }, // negative → 100% remaining + }), + }); + + const out = await tool.execute({}); + + expect(out).toContain("5-hour: 0% remaining"); + expect(out).toContain("week: 100% remaining"); + }); +}); + +describe("formatKeyUsage (pure)", () => { + const now = Date.UTC(2025, 5, 1, 12, 0, 0); + + it("formats reset timestamps with ISO + relative time", () => { + const out = formatKeyUsage( + [ + { + keyId: "claude-max", + provider: "anthropic", + status: "active", + dataSource: "live", + windows: [{ label: "5-hour", remainingPercent: 80, resetsAt: now + 90 * 60_000 }], + }, + ], + now, + ); + + expect(out).toContain("5-hour: 80% remaining, resets 2025-06-01T13:30:00.000Z (in 1h 30m)"); + }); + + it("renders a past reset/exhaustion time as 'ago'", () => { + const out = formatKeyUsage( + [ + { + keyId: "opencode-1", + provider: "opencode-go", + status: "exhausted", + exhaustedAt: now - 2 * HOUR, + lastError: "boom", + windows: [], + }, + ], + now, + ); + + expect(out).toContain("status: EXHAUSTED (since 2025-06-01T10:00:00.000Z, 2h ago)"); + expect(out).toContain("last error: boom"); + }); + + it("returns a friendly message when no entries match", () => { + expect(formatKeyUsage([], now)).toBe("No API keys matched."); + }); +}); diff --git a/packages/frontend/src/App.svelte b/packages/frontend/src/App.svelte index 5f2b61f..405536c 100644 --- a/packages/frontend/src/App.svelte +++ b/packages/frontend/src/App.svelte @@ -131,6 +131,59 @@ $effect(() => { })(); }); +// ─── Image / PDF capability lookup ───────────────────────────── +// Resolve whether the active model accepts image/pdf INPUT from models.dev (via +// the API), so the chat input can block sending an unsupported attachment +// (no tokens spent) while staying permissive when the capability is unknown. +// `null` = unknown (catalog offline / unsupported provider) → optimistic allow. +let imageSupport = $state<{ image: boolean; pdf: boolean } | null>(null); +const capabilityCache = new Map<string, { image: boolean; pdf: boolean } | null>(); + +$effect(() => { + const tab = tabStore.activeTab; + const keyId = tab?.keyId ?? null; + const modelId = tab?.modelId ?? null; + const provider = keyId ? (modelsData.keys.find((k) => k.id === keyId)?.provider ?? null) : null; + + if (!provider || !modelId) { + imageSupport = null; + return; + } + + const cacheKey = `${provider}/${modelId}`; + if (capabilityCache.has(cacheKey)) { + imageSupport = capabilityCache.get(cacheKey) ?? null; + return; + } + + // Clear immediately so a slow/failed fetch can't leave the PREVIOUS model's + // capability on screen (which could wrongly block/allow this model). + imageSupport = null; + + void (async () => { + try { + const res = await fetch( + `${config.apiBase}/models/capabilities?provider=${encodeURIComponent(provider)}&modelId=${encodeURIComponent(modelId)}`, + ); + if (!res.ok) return; + const data = (await res.json()) as { + capabilities?: { image: boolean; pdf: boolean } | null; + }; + const caps = data.capabilities ?? null; + capabilityCache.set(cacheKey, caps); + const current = tabStore.activeTab; + const currentProvider = current?.keyId + ? (modelsData.keys.find((k) => k.id === current.keyId)?.provider ?? null) + : null; + if (currentProvider === provider && current?.modelId === modelId) { + imageSupport = caps; + } + } catch { + // Leave imageSupport as null (unknown → permissive) on network error. + } + })(); +}); + onMount(() => { // Apply persisted theme (or the shared DEFAULT_THEME if nothing is // stored) so the first paint matches what the Settings panel will @@ -174,7 +227,7 @@ onMount(() => { <div class="flex-1 overflow-hidden"> <ChatPanel /> </div> - <ChatInput {contextLimit} /> + <ChatInput {contextLimit} {imageSupport} /> </div> <!-- Right sidebar: overlay on small screens, inline on large --> diff --git a/packages/frontend/src/lib/attachment-tokens.ts b/packages/frontend/src/lib/attachment-tokens.ts new file mode 100644 index 0000000..79d4cbc --- /dev/null +++ b/packages/frontend/src/lib/attachment-tokens.ts @@ -0,0 +1,234 @@ +// Inline attachment tokens for the chat input. +// +// A pasted image/PDF is represented in the textarea draft as an inline TOKEN +// (e.g. `【image:a1b2c3】`). The token is ordinary text living inside the draft, +// so attachments have ORDER relative to typed text and to each other, and the +// user can reference them positionally ("here is image A: 【image:…】"). The +// token is also the ONLY handle on an attachment — deleting it (atomic delete, +// below) detaches the underlying file. There is no separate preview strip. +// +// This module is pure (no DOM, no Svelte) so it can be unit-tested directly. + +import type { UserContentPart } from "@dispatch/core/src/types/index.js"; + +export type AttachmentKind = "image" | "pdf"; + +/** A staged attachment, keyed by its short token id. */ +export interface StagedAttachment { + id: string; + kind: AttachmentKind; + /** IANA media type, e.g. `image/png`, `application/pdf`. */ + mediaType: string; + /** Base64 payload WITHOUT a `data:` URI prefix. */ + data: string; + /** Optional original filename (used for PDFs). */ + name?: string; +} + +/** + * Token grammar: `【<kind>:<id>】` where kind ∈ {image,pdf} and id is 6 + * lowercase alphanumerics. The CJK corner brackets (U+3010/U+3011) are used as + * delimiters because they're visually distinct and virtually never typed by + * hand, so a token won't collide with normal prose. + */ +export const ATTACHMENT_TOKEN_RE = /【(image|pdf):([a-z0-9]{6})】/g; + +/** Build the inline token string for a staged attachment id + kind. */ +export function makeAttachmentToken(kind: AttachmentKind, id: string): string { + return `【${kind}:${id}】`; +} + +/** Generate a short, URL-safe token id (6 lowercase alphanumerics). */ +export function generateTokenId(): string { + let out = ""; + const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789"; + // crypto.getRandomValues is available in browsers and modern Node/Bun. + const cryptoObj = (globalThis as { crypto?: Crypto }).crypto; + if (cryptoObj?.getRandomValues) { + const buf = new Uint32Array(6); + cryptoObj.getRandomValues(buf); + for (let i = 0; i < 6; i++) out += alphabet[(buf[i] ?? 0) % alphabet.length]; + return out; + } + for (let i = 0; i < 6; i++) out += alphabet[Math.floor(Math.random() * alphabet.length)]; + return out; +} + +export interface FoundToken { + id: string; + kind: AttachmentKind; + /** Inclusive start index of the token within the text. */ + start: number; + /** Exclusive end index of the token within the text. */ + end: number; +} + +/** Find all attachment tokens in `text`, in order of appearance. */ +export function findTokens(text: string): FoundToken[] { + const out: FoundToken[] = []; + // Fresh regex per call so `lastIndex` state never leaks between calls. + const re = new RegExp(ATTACHMENT_TOKEN_RE.source, "g"); + let m: RegExpExecArray | null = re.exec(text); + while (m !== null) { + out.push({ + kind: m[1] as AttachmentKind, + id: m[2] ?? "", + start: m.index, + end: m.index + m[0].length, + }); + m = re.exec(text); + } + return out; +} + +/** The set of attachment ids whose token is still intact in `text`. */ +export function intactTokenIds(text: string): Set<string> { + return new Set(findTokens(text).map((t) => t.id)); +} + +export interface DeletionResult { + /** Text after the deletion. */ + text: string; + /** New caret position (collapsed) after the deletion. */ + caret: number; + /** Ids of attachments whose tokens were removed by this deletion. */ + removedIds: string[]; +} + +/** + * Compute the result of a Backspace/Delete keystroke when it interacts with an + * attachment token, so a token deletes ATOMICALLY (one keystroke removes the + * whole `【…】`, never a single bracket). Returns `null` when the keystroke does + * NOT touch a token — the caller should then let the browser's default editing + * behaviour run. + * + * Rules: + * - Range selection (`selStart !== selEnd`): expand the range to fully cover + * any token it overlaps, then delete the expanded range. Only acts when at + * least one token actually overlaps (otherwise returns null). + * - Collapsed + Backspace: if a token ends exactly at the caret, delete it. + * - Collapsed + Delete: if a token starts exactly at the caret, delete it. + */ +export function computeTokenDeletion( + text: string, + selStart: number, + selEnd: number, + key: "Backspace" | "Delete", +): DeletionResult | null { + const tokens = findTokens(text); + if (tokens.length === 0) return null; + + if (selStart !== selEnd) { + const lo = Math.min(selStart, selEnd); + const hi = Math.max(selStart, selEnd); + const overlapping = tokens.filter((t) => t.start < hi && t.end > lo); + if (overlapping.length === 0) return null; + const delStart = Math.min(lo, ...overlapping.map((t) => t.start)); + const delEnd = Math.max(hi, ...overlapping.map((t) => t.end)); + return { + text: text.slice(0, delStart) + text.slice(delEnd), + caret: delStart, + removedIds: overlapping.map((t) => t.id), + }; + } + + // Collapsed caret. + if (key === "Backspace") { + const tok = tokens.find((t) => t.end === selStart); + if (!tok) return null; + return { + text: text.slice(0, tok.start) + text.slice(tok.end), + caret: tok.start, + removedIds: [tok.id], + }; + } + // Delete (forward). + const tok = tokens.find((t) => t.start === selStart); + if (!tok) return null; + return { + text: text.slice(0, tok.start) + text.slice(tok.end), + caret: tok.start, + removedIds: [tok.id], + }; +} + +/** Human-readable marker that replaces a token in persisted/display text. */ +export function markerFor(kind: AttachmentKind): string { + return kind === "pdf" ? "[pdf]" : "[image]"; +} + +export interface ParsedDraft { + /** + * Text-only projection of the draft with each attachment token replaced by a + * `[image]` / `[pdf]` marker. This is what gets persisted and rendered in the + * chat history (the raw bytes are never stored). + */ + displayText: string; + /** + * Ordered multimodal content (interleaved text + attachment parts) to send to + * the model, or `null` when the draft has no intact attachment token (the + * caller then sends plain text). + */ + content: UserContentPart[] | null; +} + +/** + * Split a draft (text containing attachment tokens) plus the staged-attachment + * map into: + * - `displayText`: tokens swapped for `[image]`/`[pdf]` markers, and + * - `content`: an ordered `UserContentPart[]` interleaving the surrounding text + * with the matching attachment parts. + * + * A token whose id has no matching staged attachment (e.g. a stray paste of the + * token text, or a detached attachment) is treated as plain text in BOTH + * outputs — its marker still appears in `displayText`, but it contributes no + * attachment part. `content` is `null` when no attachment part is produced. + */ +export function parseDraft(draft: string, attachments: Map<string, StagedAttachment>): ParsedDraft { + const tokens = findTokens(draft); + let displayText = ""; + const content: UserContentPart[] = []; + let textBuf = ""; + let cursor = 0; + let producedAttachment = false; + + const flushText = () => { + if (textBuf.length > 0) { + content.push({ type: "text", text: textBuf }); + textBuf = ""; + } + }; + + for (const tok of tokens) { + const between = draft.slice(cursor, tok.start); + textBuf += between; + displayText += between; + const att = attachments.get(tok.id); + if (att) { + // displayText (persisted/rendered) gets a `[image]`/`[pdf]` marker; + // the multimodal content gets the ACTUAL attachment part instead — no + // marker text, since the part itself represents the file to the model. + displayText += markerFor(tok.kind); + flushText(); + content.push({ + type: "attachment", + mediaType: att.mediaType, + data: att.data, + ...(att.name ? { name: att.name } : {}), + }); + producedAttachment = true; + } else { + // Orphan token (no staged attachment) → keep the marker as plain text + // in BOTH outputs; it contributes no attachment part. + displayText += markerFor(tok.kind); + textBuf += markerFor(tok.kind); + } + cursor = tok.end; + } + const tail = draft.slice(cursor); + textBuf += tail; + displayText += tail; + flushText(); + + return { displayText, content: producedAttachment ? content : null }; +} diff --git a/packages/frontend/src/lib/components/ChatInput.svelte b/packages/frontend/src/lib/components/ChatInput.svelte index f954be8..f3eadf7 100644 --- a/packages/frontend/src/lib/components/ChatInput.svelte +++ b/packages/frontend/src/lib/components/ChatInput.svelte @@ -1,12 +1,40 @@ <script lang="ts"> +import { + ACCEPTED_PDF_MEDIA_TYPE, + isImageMediaType, + isPdfMediaType, + MAX_ATTACHMENTS, + MAX_IMAGE_BYTES, + MAX_PDF_BYTES, +} from "@dispatch/core/src/models/attachments.js"; +import { + type AttachmentKind, + computeTokenDeletion, + generateTokenId, + makeAttachmentToken, + parseDraft, + type StagedAttachment, +} from "../attachment-tokens.js"; import { computeContextUsage } from "../context-window.js"; import { tabStore } from "../tabs.svelte.js"; -const { contextLimit = null }: { contextLimit?: number | null } = $props(); +const { + contextLimit = null, + imageSupport = null, +}: { + contextLimit?: number | null; + // Image/PDF INPUT capability for the active model, or `null` when unknown + // (catalog offline / unsupported provider) — null means "can't verify" + // (optimistic allow), not a hard no. + imageSupport?: { image: boolean; pdf: boolean } | null; +} = $props(); const MAX_LINES = 7; let inputEl: HTMLTextAreaElement | undefined; +// Transient error shown when a paste is rejected (bad type / too large / too +// many). Cleared on the next successful paste or any keystroke. +let pasteError = $state<string | null>(null); const agentStatus = $derived(tabStore.activeTab?.agentStatus ?? "idle"); const tabId = $derived(tabStore.activeTab?.id ?? ""); @@ -14,6 +42,7 @@ const tabId = $derived(tabStore.activeTab?.id ?? ""); // switching tabs saves the current draft and restores the target tab's text // automatically — drafts are never lost or clobbered by tab switching. const inputValue = $derived(tabStore.activeTab?.draft ?? ""); +const attachments = $derived(tabStore.activeTab?.attachments ?? []); const cacheStats = $derived(tabStore.activeTab?.cacheStats ?? null); const isRunning = $derived(agentStatus === "running"); @@ -25,9 +54,42 @@ const compactLocked = $derived( (tabStore.activeTab?.compactionError ?? null) !== null, ); const hasText = $derived(inputValue.trim().length > 0); +const hasAttachments = $derived(attachments.length > 0); // While generating with an empty box, the primary action is "stop". With text // in the box, it stays "send" (the message is queued behind the live turn). -const showStop = $derived(isRunning && !hasText); +const showStop = $derived(isRunning && !hasText && !hasAttachments); + +// ─── Attachment capability gating ────────────────────────────── +// A definitive "no" from the catalog (imageSupport.image === false with an +// image staged, or .pdf === false with a pdf staged) blocks the send so no +// tokens are spent. Unknown capability (imageSupport === null) is permissive. +const hasImageAttachment = $derived(attachments.some((a) => a.kind === "image")); +const hasPdfAttachment = $derived(attachments.some((a) => a.kind === "pdf")); +const imageBlocked = $derived( + hasImageAttachment && imageSupport !== null && imageSupport.image === false, +); +const pdfBlocked = $derived( + hasPdfAttachment && imageSupport !== null && imageSupport.pdf === false, +); +// Attachments require a fresh turn — they can't ride the queue path (which is +// text-only), so block sending an attachment while the agent is generating. +const attachmentsWhileRunning = $derived(hasAttachments && isRunning); + +const attachmentWarning = $derived.by(() => { + if (pasteError) return pasteError; + if (attachmentsWhileRunning) + return "Wait for the current response to finish before sending images."; + if (imageBlocked && pdfBlocked) + return "The selected model doesn't support image or PDF input. Remove the attachments to send."; + if (imageBlocked) + return "The selected model doesn't support image input. Remove the image to send."; + if (pdfBlocked) return "The selected model doesn't support PDF input. Remove the PDF to send."; + return null; +}); + +// Send is blocked (but not the box) when an attachment is definitively +// unsupported or when attachments are staged mid-generation. +const sendBlocked = $derived(imageBlocked || pdfBlocked || attachmentsWhileRunning); const usage = $derived(computeContextUsage(cacheStats, contextLimit)); const hasUsage = $derived((cacheStats?.last ?? null) !== null); @@ -84,22 +146,155 @@ $effect(() => { function handleInput(e: Event) { if (!tabId) return; + pasteError = null; + // setDraft also reconciles staged attachments against the surviving tokens, + // so deleting a token (by any means) detaches its attachment. tabStore.setDraft(tabId, (e.currentTarget as HTMLTextAreaElement).value); } +function kindForMediaType(mediaType: string): AttachmentKind | null { + if (isImageMediaType(mediaType)) return "image"; + if (isPdfMediaType(mediaType)) return "pdf"; + return null; +} + +function readAsBase64(file: File): Promise<string> { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => { + const result = reader.result; + if (typeof result !== "string") { + reject(new Error("unexpected reader result")); + return; + } + // Strip the `data:<mediaType>;base64,` prefix → bare base64. + const comma = result.indexOf(","); + resolve(comma === -1 ? result : result.slice(comma + 1)); + }; + reader.onerror = () => reject(reader.error ?? new Error("read failed")); + reader.readAsDataURL(file); + }); +} + +/** Insert `insert` at the textarea's caret, returning the new caret offset. */ +function insertAtCaret(insert: string): number { + const el = inputEl; + const text = inputValue; + const start = el?.selectionStart ?? text.length; + const end = el?.selectionEnd ?? text.length; + const next = text.slice(0, start) + insert + text.slice(end); + if (tabId) tabStore.setDraft(tabId, next); + return start + insert.length; +} + +async function handlePaste(e: ClipboardEvent) { + if (!tabId) return; + const items = e.clipboardData?.items; + if (!items) return; + const files: File[] = []; + for (const item of items) { + if (item.kind === "file") { + const file = item.getAsFile(); + if (file) files.push(file); + } + } + // No files in the clipboard → let the default text paste happen. + if (files.length === 0) return; + // We're handling at least one file; stop the browser from also pasting a + // filename / image fallback into the textarea. + e.preventDefault(); + pasteError = null; + + for (const file of files) { + const kind = kindForMediaType(file.type); + if (!kind) { + pasteError = `Unsupported file type: ${file.type || "unknown"}. Allowed: PNG, JPEG, WebP, GIF, PDF.`; + continue; + } + const current = tabStore.activeTab?.attachments ?? []; + if (current.length >= MAX_ATTACHMENTS) { + pasteError = `You can attach at most ${MAX_ATTACHMENTS} files per message.`; + break; + } + const limit = kind === "pdf" ? MAX_PDF_BYTES : MAX_IMAGE_BYTES; + if (file.size > limit) { + const mb = Math.round(limit / (1024 * 1024)); + pasteError = `${kind === "pdf" ? "PDF" : "Image"} is too large (max ${mb} MB).`; + continue; + } + try { + const data = await readAsBase64(file); + const id = generateTokenId(); + const mediaType = kind === "pdf" ? ACCEPTED_PDF_MEDIA_TYPE : file.type; + const staged: StagedAttachment = { + id, + kind, + mediaType, + data, + ...(file.name ? { name: file.name } : {}), + }; + // Stage first, then insert the token — `setDraft` reconciles against + // staged attachments, so the attachment must exist before its token + // appears in the draft. + tabStore.addAttachment(tabId, staged); + const caret = insertAtCaret(makeAttachmentToken(kind, id)); + // Restore the caret after the value updates. + requestAnimationFrame(() => { + const el = inputEl; + if (el) { + el.focus(); + el.setSelectionRange(caret, caret); + } + }); + } catch { + pasteError = "Failed to read the pasted file."; + } + } +} + function handleKeydown(e: KeyboardEvent) { if (e.key === "Enter" && !e.shiftKey) { e.preventDefault(); submit(); + return; + } + if ((e.key === "Backspace" || e.key === "Delete") && inputEl && tabId) { + // Atomic token delete: a single Backspace/Delete next to (or a selection + // overlapping) a `【…】` token removes the whole token in one stroke. + const result = computeTokenDeletion( + inputValue, + inputEl.selectionStart ?? 0, + inputEl.selectionEnd ?? 0, + e.key, + ); + if (result) { + e.preventDefault(); + tabStore.setDraft(tabId, result.text); + requestAnimationFrame(() => { + const el = inputEl; + if (el) { + el.focus(); + el.setSelectionRange(result.caret, result.caret); + } + }); + } } } function submit() { + if (!tabId) return; + // Block sending while this tab is mid-compaction (source or placeholder). if (compactLocked) return; - const text = inputValue.trim(); - if (!text) return; - if (tabId) tabStore.setDraft(tabId, ""); - tabStore.sendMessage(text); + const map = new Map(attachments.map((a) => [a.id, a] as const)); + const { displayText, content } = parseDraft(inputValue, map); + const trimmed = displayText.trim(); + // Nothing to send (no text and no usable attachment). + if (!trimmed && !content) return; + // Don't send when a staged attachment is unsupported / mid-generation. + if (sendBlocked) return; + const text = trimmed || displayText; + tabStore.setDraft(tabId, ""); + void tabStore.sendMessage(text, content ?? undefined); } function primaryAction() { @@ -112,26 +307,39 @@ function primaryAction() { </script> <div class="flex flex-col"> + {#if attachmentWarning} + <div class="px-3 pt-2 text-xs text-warning flex items-start gap-1"> + <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="w-3.5 h-3.5 mt-0.5 shrink-0" aria-hidden="true"> + <path d="M10.29 3.86 1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z"></path> + <line x1="12" y1="9" x2="12" y2="13"></line> + <line x1="12" y1="17" x2="12.01" y2="17"></line> + </svg> + <span>{attachmentWarning}</span> + </div> + {/if} <!-- Top bar: expanding textarea + send/stop action --> <div class="flex items-end gap-2 px-3 pt-3 pb-2"> <textarea bind:this={inputEl} value={inputValue} rows="1" - placeholder={compactLocked ? "Compaction in progress…" : "Type a message..."} + placeholder={compactLocked + ? "Compaction in progress…" + : "Type a message... (paste an image or PDF to attach)"} disabled={compactLocked} class="textarea textarea-ghost flex-1 resize-none leading-normal !min-h-0 h-auto" onkeydown={handleKeydown} oninput={handleInput} + onpaste={handlePaste} ></textarea> <!-- Single fixed-width button across all states so the layout never shifts when it morphs between Send and Stop. --> <button type="button" class="btn w-20 shrink-0 {showStop ? 'btn-error btn-outline' : 'btn-primary'}" - disabled={compactLocked || (!showStop && !hasText)} + disabled={compactLocked || (!showStop && !hasText && !hasAttachments) || sendBlocked} onclick={primaryAction} - title={showStop ? "Stop generation" : "Send message"} + title={showStop ? "Stop generation" : sendBlocked ? (attachmentWarning ?? "Cannot send") : "Send message"} > {#if showStop} <span class="loading loading-spinner loading-sm"></span> diff --git a/packages/frontend/src/lib/components/TabBar.svelte b/packages/frontend/src/lib/components/TabBar.svelte index 354260c..7371f7b 100644 --- a/packages/frontend/src/lib/components/TabBar.svelte +++ b/packages/frontend/src/lib/components/TabBar.svelte @@ -1,5 +1,6 @@ <script lang="ts"> import { tick } from "svelte"; +import type { Tab } from "../tabs.svelte.js"; import { tabStore } from "../tabs.svelte.js"; function statusColor(status: string): string { @@ -8,6 +9,21 @@ function statusColor(status: string): string { return "bg-success"; } +/** + * A tab "needs attention" — and should ping to grab the user's eye — when the + * agent has stopped and is likely waiting on the user: + * (a) the turn ended (idle) but the task list still has incomplete tasks + * (pending / in_progress) — the agent probably expects a response; or + * (b) the turn stopped due to an error of any kind. + */ +function needsAttention(tab: Tab): boolean { + if (tab.agentStatus === "error") return true; + if (tab.agentStatus === "idle") { + return tab.tasks.some((t) => t.status === "pending" || t.status === "in_progress"); + } + return false; +} + const userTabs = $derived(tabStore.tabs.filter((t) => t.parentTabId === null)); const subagentTabs = $derived( tabStore.tabs.filter((t) => t.parentTabId !== null && t.parentTabId === activeUserTabId), @@ -123,7 +139,14 @@ function handleRenameKeydown(e: KeyboardEvent): void { tabindex="0" > <span class="flex items-center gap-1.5"> - <span class="w-1.5 h-1.5 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span> + {#if needsAttention(tab)} + <span class="relative inline-grid shrink-0 *:[grid-area:1/1]"> + <span class="w-1.5 h-1.5 rounded-full animate-ping {statusColor(tab.agentStatus)}"></span> + <span class="w-1.5 h-1.5 rounded-full {statusColor(tab.agentStatus)}"></span> + </span> + {:else} + <span class="w-1.5 h-1.5 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span> + {/if} <span class="font-mono text-[10px] px-1 py-0.5 rounded bg-base-300 text-base-content/60 shrink-0" title="Tab ID — agents address this tab by this handle">{tabStore.shortHandleFor(tab.id)}</span> {#if editingTabId === tab.id} <input @@ -183,7 +206,14 @@ function handleRenameKeydown(e: KeyboardEvent): void { tabindex="0" > <span class="flex items-center gap-1"> - <span class="w-1 h-1 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span> + {#if needsAttention(tab)} + <span class="relative inline-grid shrink-0 *:[grid-area:1/1]"> + <span class="w-1 h-1 rounded-full animate-ping {statusColor(tab.agentStatus)}"></span> + <span class="w-1 h-1 rounded-full {statusColor(tab.agentStatus)}"></span> + </span> + {:else} + <span class="w-1 h-1 rounded-full shrink-0 {statusColor(tab.agentStatus)}"></span> + {/if} <span class="font-mono text-[10px] px-1 rounded bg-base-300 text-base-content/60 shrink-0" title="Tab ID — agents address this tab by this handle">{tabStore.shortHandleFor(tab.id)}</span> <span class="max-w-28 truncate text-xs">{tab.title}</span> </span> diff --git a/packages/frontend/src/lib/components/ToolPermissions.svelte b/packages/frontend/src/lib/components/ToolPermissions.svelte index 6b09a07..4298724 100644 --- a/packages/frontend/src/lib/components/ToolPermissions.svelte +++ b/packages/frontend/src/lib/components/ToolPermissions.svelte @@ -53,6 +53,12 @@ const toolPermissions: ToolPermission[] = [ description: "Allow the AI to search the codebase with the cs ranked code-search engine", }, { + id: "key_usage", + label: "Key usage", + description: + "Allow the AI to read current API-key usage levels, rate-limit headroom, and reset times", + }, + { id: "lsp", label: "LSP queries", description: diff --git a/packages/frontend/src/lib/settings.svelte.ts b/packages/frontend/src/lib/settings.svelte.ts index 0da4e45..1b93804 100644 --- a/packages/frontend/src/lib/settings.svelte.ts +++ b/packages/frontend/src/lib/settings.svelte.ts @@ -15,6 +15,7 @@ let toolPerms = $state<Record<string, boolean>>({ web_search: false, youtube_transcribe: false, search_code: false, + key_usage: false, lsp: false, }); let savedToolPerms = $state<Record<string, boolean>>({ @@ -29,6 +30,7 @@ let savedToolPerms = $state<Record<string, boolean>>({ web_search: false, youtube_transcribe: false, search_code: false, + key_usage: false, lsp: false, }); let skillChecks = $state<Record<string, boolean>>({}); diff --git a/packages/frontend/src/lib/tabs.svelte.ts b/packages/frontend/src/lib/tabs.svelte.ts index 3edd1e3..90e1cee 100644 --- a/packages/frontend/src/lib/tabs.svelte.ts +++ b/packages/frontend/src/lib/tabs.svelte.ts @@ -11,13 +11,14 @@ import { // DB-free; safe in the browser bundle. The flat chunk log is the frontend's // source of truth for HISTORY; `groupRowsToMessages` derives render bubbles. import { groupRowsToMessages, type MessageRow } from "@dispatch/core/src/chunks/transform.js"; -import type { ChunkRow } from "@dispatch/core/src/types/index.js"; +import type { ChunkRow, UserContentPart } from "@dispatch/core/src/types/index.js"; import { type AgentModelEntry, DEFAULT_REASONING_EFFORT, isReasoningEffort, type ReasoningEffort, } from "@dispatch/core/src/types/index.js"; +import { intactTokenIds, type StagedAttachment } from "./attachment-tokens.js"; import { config } from "./config.js"; import { appSettings } from "./settings.svelte.js"; import type { @@ -183,6 +184,13 @@ export interface Tab { */ draft: string; /** + * Staged image/PDF attachments for THIS tab's unsent draft (in-memory only — + * never persisted). Each corresponds to an inline `【image:…】`/`【pdf:…】` + * token in `draft`; removing the token detaches the attachment (reconciled on + * every keystroke). Ephemeral: sent to the model for one turn, then cleared. + */ + attachments: StagedAttachment[]; + /** * True once the user has manually renamed this tab (double-click rename). * Suppresses the first-message auto-title so a chosen name is never * clobbered. In-memory only — a renamed tab is no longer "New Tab" on @@ -322,6 +330,7 @@ export function createTabStore() { queuedMessages: [], chunkLimit: appSettings.chunkLimit, draft: "", + attachments: [], manualTitle: false, oldestLoadedSeq: null, totalChunks: 0, @@ -402,6 +411,7 @@ export function createTabStore() { queuedMessages: [], chunkLimit: appSettings.chunkLimit, draft: "", + attachments: [], manualTitle: false, oldestLoadedSeq: win.oldestSeq, totalChunks: win.total, @@ -506,8 +516,31 @@ export function createTabStore() { * target tab shows its own text. No-op if the tab is gone. */ function setDraft(id: string, text: string): void { - if (!getTabById(id)) return; - updateTab(id, { draft: text }); + const tab = getTabById(id); + if (!tab) return; + // Detach any staged attachment whose inline token is no longer intact in + // the new draft text (covers atomic-delete, manual mid-token edits, cut, + // select-all-delete, etc.). The token in the textarea is the ONLY handle + // on an attachment, so reconciling here keeps the two in lockstep. + const intact = intactTokenIds(text); + const keep = tab.attachments.filter((a) => intact.has(a.id)); + if (keep.length !== tab.attachments.length) { + updateTab(id, { draft: text, attachments: keep }); + } else { + updateTab(id, { draft: text }); + } + } + + /** + * Stage a pasted attachment on a tab. The caller is responsible for also + * inserting the matching `【image:…】`/`【pdf:…】` token into the draft (the + * token is what keeps the attachment alive through reconciliation). No-op if + * the tab is gone. + */ + function addAttachment(id: string, attachment: StagedAttachment): void { + const tab = getTabById(id); + if (!tab) return; + updateTab(id, { attachments: [...tab.attachments, attachment] }); } /** @@ -942,6 +975,7 @@ export function createTabStore() { queuedMessages: [], chunkLimit: appSettings.chunkLimit, draft: "", + attachments: [], manualTitle: false, oldestLoadedSeq: win.oldestSeq, totalChunks: win.total, @@ -1011,6 +1045,7 @@ export function createTabStore() { manualTitle: true, oldestLoadedSeq: null, totalChunks: 0, + attachments: [], compactingSource: sourceTabId, isCompacting: false, compactionError: null, @@ -1084,6 +1119,7 @@ export function createTabStore() { manualTitle: true, oldestLoadedSeq: win.oldestSeq, totalChunks: win.total, + attachments: [], compactingSource: null, isCompacting: false, compactionError: null, @@ -1436,6 +1472,7 @@ export function createTabStore() { queuedMessages: [], chunkLimit: appSettings.chunkLimit, draft: "", + attachments: [], manualTitle: false, oldestLoadedSeq: null, totalChunks: 0, @@ -1798,7 +1835,7 @@ export function createTabStore() { } } - async function sendMessage(text: string): Promise<void> { + async function sendMessage(text: string, content?: UserContentPart[]): Promise<void> { let tab = getActiveTab(); if (!tab) return; @@ -1809,8 +1846,11 @@ export function createTabStore() { if (!tab) return; } - // Fetch content for checked skills and build the message to send - let messageToSend = text; + // Fetch content for checked skills and build the message to send. + // `skillPrefix` (when non-empty) is prepended to BOTH the text projection + // that gets persisted/rendered AND the multimodal content array, so an + // image turn still carries the activated skills to the model. + let skillPrefix = ""; const checkedKeys = Object.entries(appSettings.skillChecks) .filter(([, v]) => v) .map(([k]) => k); @@ -1821,13 +1861,13 @@ export function createTabStore() { const [scope, ...nameParts] = key.split(":"); const name = nameParts.join(":"); if (!scope || !name) continue; - const content = await fetchSkillContent(scope, name); - if (content) { - skillSections.push(`<skill name="${name}">\n${content}\n</skill>`); + const skillContent = await fetchSkillContent(scope, name); + if (skillContent) { + skillSections.push(`<skill name="${name}">\n${skillContent}\n</skill>`); } } if (skillSections.length > 0) { - messageToSend = `[The following skills have been activated for this message]\n\n${skillSections.join("\n\n")}\n\n---\n\n${text}`; + skillPrefix = `[The following skills have been activated for this message]\n\n${skillSections.join("\n\n")}\n\n---\n\n`; } // Track injected skills on the tab @@ -1838,6 +1878,12 @@ export function createTabStore() { appSettings.skillChecks = {}; } + const messageToSend = `${skillPrefix}${text}`; + // Prepend the skill prefix to the multimodal content as a leading text + // part so the model sees the activated skills before the attachments. + const contentToSend = + content && skillPrefix ? [{ type: "text" as const, text: skillPrefix }, ...content] : content; + const userMsg: ChatMessage = { id: generateId(), role: "user", @@ -1914,6 +1960,7 @@ export function createTabStore() { body: JSON.stringify({ tabId: tab.id, message: messageToSend, + ...(contentToSend ? { content: contentToSend } : {}), ...(tab.keyId ? { keyId: tab.keyId } : {}), ...(tab.modelId ? { modelId: tab.modelId } : {}), ...(tab.agentModels ? { agentModels: tab.agentModels } : {}), @@ -2312,6 +2359,7 @@ export function createTabStore() { renameTab, reorderTabs, setDraft, + addAttachment, sendMessage, cancelQueuedMessage, stopGeneration, diff --git a/packages/frontend/tests/attachment-tokens.test.ts b/packages/frontend/tests/attachment-tokens.test.ts new file mode 100644 index 0000000..7208cf3 --- /dev/null +++ b/packages/frontend/tests/attachment-tokens.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it } from "vitest"; +import { + computeTokenDeletion, + findTokens, + generateTokenId, + intactTokenIds, + makeAttachmentToken, + markerFor, + parseDraft, + type StagedAttachment, +} from "../src/lib/attachment-tokens.js"; + +function img(id: string): StagedAttachment { + return { id, kind: "image", mediaType: "image/png", data: "QQ==" }; +} +function pdf(id: string): StagedAttachment { + return { id, kind: "pdf", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" }; +} + +describe("token helpers", () => { + it("round-trips make/find", () => { + const tok = makeAttachmentToken("image", "abc123"); + expect(tok).toBe("【image:abc123】"); + const found = findTokens(`x ${tok} y`); + expect(found).toHaveLength(1); + expect(found[0]).toMatchObject({ id: "abc123", kind: "image", start: 2, end: 2 + tok.length }); + }); + + it("generates 6-char lowercase-alnum ids", () => { + for (let i = 0; i < 20; i++) { + expect(generateTokenId()).toMatch(/^[a-z0-9]{6}$/); + } + }); + + it("finds multiple tokens in order and reports intact ids", () => { + const text = `a ${makeAttachmentToken("image", "aaaaaa")} b ${makeAttachmentToken("pdf", "bbbbbb")}`; + const found = findTokens(text); + expect(found.map((t) => t.id)).toEqual(["aaaaaa", "bbbbbb"]); + expect(intactTokenIds(text)).toEqual(new Set(["aaaaaa", "bbbbbb"])); + }); + + it("does not treat a partially-broken token as intact", () => { + // Missing closing bracket → not a valid token. + expect(intactTokenIds("【image:aaaaaa").size).toBe(0); + }); +}); + +describe("computeTokenDeletion", () => { + const tok = makeAttachmentToken("image", "abcabc"); + const text = `hi ${tok}!`; // token spans indices 3..3+len + const tokStart = 3; + const tokEnd = 3 + tok.length; + + it("returns null when no tokens exist", () => { + expect(computeTokenDeletion("plain", 2, 2, "Backspace")).toBeNull(); + }); + + it("Backspace just after a token removes the whole token atomically", () => { + const res = computeTokenDeletion(text, tokEnd, tokEnd, "Backspace"); + expect(res).not.toBeNull(); + expect(res?.text).toBe("hi !"); + expect(res?.caret).toBe(tokStart); + expect(res?.removedIds).toEqual(["abcabc"]); + }); + + it("Delete just before a token removes the whole token atomically", () => { + const res = computeTokenDeletion(text, tokStart, tokStart, "Delete"); + expect(res?.text).toBe("hi !"); + expect(res?.caret).toBe(tokStart); + expect(res?.removedIds).toEqual(["abcabc"]); + }); + + it("Backspace NOT adjacent to a token returns null (default editing)", () => { + // Caret at index 2 (after "hi"), token is further along. + expect(computeTokenDeletion(text, 2, 2, "Backspace")).toBeNull(); + }); + + it("a selection overlapping a token expands to cover the whole token", () => { + // Select from inside "hi " through the middle of the token. + const res = computeTokenDeletion(text, 1, tokStart + 3, "Backspace"); + expect(res).not.toBeNull(); + // Deletion starts at min(selStart, tokStart)=1 and ends at tokEnd. + expect(res?.text).toBe("h!"); + expect(res?.removedIds).toEqual(["abcabc"]); + }); + + it("a range selection touching no token returns null", () => { + expect(computeTokenDeletion(text, 0, 2, "Backspace")).toBeNull(); + }); +}); + +describe("parseDraft", () => { + it("returns plain text + null content when there are no attachments", () => { + const res = parseDraft("just text", new Map()); + expect(res.displayText).toBe("just text"); + expect(res.content).toBeNull(); + }); + + it("interleaves text and attachment parts in order", () => { + const a = img("aaaaaa"); + const b = pdf("bbbbbb"); + const map = new Map([ + [a.id, a], + [b.id, b], + ]); + const draft = `A: ${makeAttachmentToken("image", a.id)} B: ${makeAttachmentToken("pdf", b.id)} end`; + const res = parseDraft(draft, map); + + // displayText swaps tokens for markers. + expect(res.displayText).toBe(`A: ${markerFor("image")} B: ${markerFor("pdf")} end`); + + // content interleaves the surrounding text with the attachment parts. + expect(res.content).toEqual([ + { type: "text", text: "A: " }, + { type: "attachment", mediaType: "image/png", data: "QQ==" }, + { type: "text", text: " B: " }, + { type: "attachment", mediaType: "application/pdf", data: "QQ==", name: "doc.pdf" }, + { type: "text", text: " end" }, + ]); + }); + + it("treats an orphan token (no staged attachment) as plain text", () => { + // Token present in text but not in the attachments map. + const draft = `x ${makeAttachmentToken("image", "zzzzzz")} y`; + const res = parseDraft(draft, new Map()); + expect(res.displayText).toBe(`x ${markerFor("image")} y`); + // No real attachment → null content (plain-text send). + expect(res.content).toBeNull(); + }); +}); diff --git a/packages/frontend/tests/chat-store.test.ts b/packages/frontend/tests/chat-store.test.ts index a0d4ead..8639bff 100644 --- a/packages/frontend/tests/chat-store.test.ts +++ b/packages/frontend/tests/chat-store.test.ts @@ -2126,3 +2126,78 @@ describe("tabStore — per-tab chat input draft", () => { expect(store.tabs.every((t) => t.draft === "")).toBe(true); }); }); + +describe("tabStore — image/pdf attachments", () => { + function imgAttachment(id: string) { + return { id, kind: "image" as const, mediaType: "image/png", data: "QQ==" }; + } + + it("stages attachments and reconciles them against intact draft tokens", async () => { + vi.stubGlobal( + "fetch", + vi.fn(() => Promise.resolve({ ok: true, json: () => Promise.resolve({}) })), + ); + const store = createTabStore(); + const a = await store.createNewTab(); + store.switchTab(a.id); + + store.addAttachment(a.id, imgAttachment("aaaaaa")); + // Draft carries the token → attachment survives. + store.setDraft(a.id, "look 【image:aaaaaa】"); + expect(store.activeTab?.attachments.map((x) => x.id)).toEqual(["aaaaaa"]); + + // Remove the token from the draft → attachment is detached. + store.setDraft(a.id, "look "); + expect(store.activeTab?.attachments).toHaveLength(0); + }); + + it("sendMessage posts ordered multimodal content and clears the draft", async () => { + const fetchMock = vi.fn((url: string) => { + if (typeof url === "string" && url.endsWith("/chat")) { + return Promise.resolve({ ok: true, json: () => Promise.resolve({ status: "ok" }) }); + } + return Promise.resolve({ ok: true, json: () => Promise.resolve({}) }); + }); + vi.stubGlobal("fetch", fetchMock); + + const store = createTabStore(); + const a = await store.createNewTab(); + store.switchTab(a.id); + + await store.sendMessage("here is A: [image]", [ + { type: "text", text: "here is A: " }, + { type: "attachment", mediaType: "image/png", data: "QQ==" }, + ]); + + const chatCall = fetchMock.mock.calls.find( + (c) => typeof c[0] === "string" && (c[0] as string).endsWith("/chat"), + ); + expect(chatCall).toBeDefined(); + const body = JSON.parse((chatCall?.[1] as { body: string }).body); + expect(body.message).toBe("here is A: [image]"); + expect(body.content).toEqual([ + { type: "text", text: "here is A: " }, + { type: "attachment", mediaType: "image/png", data: "QQ==" }, + ]); + }); + + it("sendMessage omits content for a plain-text message", async () => { + const fetchMock = vi.fn((url: string) => { + if (typeof url === "string" && url.endsWith("/chat")) { + return Promise.resolve({ ok: true, json: () => Promise.resolve({ status: "ok" }) }); + } + return Promise.resolve({ ok: true, json: () => Promise.resolve({}) }); + }); + vi.stubGlobal("fetch", fetchMock); + + const store = createTabStore(); + await store.createNewTab(); + await store.sendMessage("just text"); + + const chatCall = fetchMock.mock.calls.find( + (c) => typeof c[0] === "string" && (c[0] as string).endsWith("/chat"), + ); + const body = JSON.parse((chatCall?.[1] as { body: string }).body); + expect(body.content).toBeUndefined(); + }); +}); |
