summaryrefslogtreecommitdiffhomepage
path: root/packages/api/src/agent-manager.ts
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-02 22:50:11 +0900
committerAdam Malczewski <[email protected]>2026-06-02 22:50:11 +0900
commit66e5d3b105bfd2b34c6f35876bf33dbb3cb9dcae (patch)
treec3e039e09c89231f84dfd16f7bbbf8aedcc2dc7d /packages/api/src/agent-manager.ts
parent4b45d33c256cf580a53054078be6fd7148fa6302 (diff)
downloaddispatch-66e5d3b105bfd2b34c6f35876bf33dbb3cb9dcae.tar.gz
dispatch-66e5d3b105bfd2b34c6f35876bf33dbb3cb9dcae.zip
feat(chat): paste-to-attach images/PDFs with model capability check
Add multimodal image/PDF input to the chat box via clipboard paste, gated by a graceful per-model capability check. UX: a pasted image/PDF inserts an inline token (【image:…】 / 【pdf:…】) into the draft, so attachments have ORDER relative to typed text and can be referenced positionally. The token is the only handle — deleting it (atomic Backspace/ Delete, or selection overlap) detaches the file; an input-reconciliation safety net detaches any attachment whose token is no longer intact. No preview strip. Capability check: resolveModelCapabilities reads models.dev modalities.input (new GET /models/capabilities, mirrors /context-limit). The input blocks Send (no tokens spent) only on a definitive 'no'; unknown capability (catalog offline / unmapped provider) stays permissive. Attachments require a fresh turn — Send is blocked while generating and /chat rejects content mid-turn (409). Attachments are EPHEMERAL: forwarded to the model for the turn via ordered AI SDK ImagePart/FilePart content, but never persisted (history keeps the text with [image]/[pdf] markers). Text-only turns serialize byte-identically to before. Limits (Anthropic-aligned, enforced at paste + re-validated server-side): PNG/JPEG/WebP/GIF/PDF; image ≤5MB, PDF ≤32MB, ≤20 attachments, ≤32MB total. core: UserContentPart types, models/attachments validator, capability resolver, agent.run+toModelMessages thread ordered content. api: /chat content validation + passthrough. frontend: attachment-tokens helper, ChatInput paste/token/gating, per-tab staged attachments, App.svelte capability fetch. +44 tests.
Diffstat (limited to 'packages/api/src/agent-manager.ts')
-rw-r--r--packages/api/src/agent-manager.ts11
1 files changed, 11 insertions, 0 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 2532efa..3b12a80 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -65,6 +65,7 @@ import {
toAvailableUserAgents,
type UsageData,
type UsageStats,
+ type UserContentPart,
validateConfig,
} from "@dispatch/core";
import type { PermissionManager } from "./permission-manager.js";
@@ -1536,6 +1537,13 @@ export class AgentManager {
workingDirectory?: string;
queueId?: string;
/**
+ * Ephemeral ordered multimodal content (image/pdf attachments) for a
+ * FRESH human turn. Forwarded to `processMessage` → `agent.run` only
+ * when the tab is idle (a started turn); never carried into the queue
+ * path (attachments require a fresh turn — the caller guards that).
+ */
+ content?: UserContentPart[];
+ /**
* Who is sending this message. `"human"` (default) is unrestricted
* and REFILLS the target's agent-to-agent auto-wake budget. `"agent"`
* (from the `send_to_tab` tool) is governed by that budget: an
@@ -1606,6 +1614,7 @@ export class AgentManager {
opts.reasoningEffort,
opts.workingDirectory,
agentModels,
+ opts.content,
).catch((err) => {
console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err);
});
@@ -1620,6 +1629,7 @@ export class AgentManager {
reasoningEffort?: ReasoningEffort,
workingDirectory?: string,
agentModels?: AgentModelEntry[],
+ content?: UserContentPart[],
): Promise<void> {
const tabAgent = this._getOrCreateTabAgent(tabId);
@@ -1731,6 +1741,7 @@ export class AgentManager {
for await (const event of agent.run(message, {
...(effortForEntry ? { reasoningEffort: effortForEntry } : {}),
abortSignal: tabAgent.abortController?.signal,
+ ...(content ? { content } : {}),
})) {
// Stop processing if the tab was aborted (closed/stopped).
// stopTab() already injected a `cancelled` system chunk into