summaryrefslogtreecommitdiffhomepage
path: root/packages/frontend/src/lib
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-02 22:50:11 +0900
committerAdam Malczewski <[email protected]>2026-06-02 22:50:11 +0900
commit66e5d3b105bfd2b34c6f35876bf33dbb3cb9dcae (patch)
treec3e039e09c89231f84dfd16f7bbbf8aedcc2dc7d /packages/frontend/src/lib
parent4b45d33c256cf580a53054078be6fd7148fa6302 (diff)
downloaddispatch-66e5d3b105bfd2b34c6f35876bf33dbb3cb9dcae.tar.gz
dispatch-66e5d3b105bfd2b34c6f35876bf33dbb3cb9dcae.zip
feat(chat): paste-to-attach images/PDFs with model capability check
Add multimodal image/PDF input to the chat box via clipboard paste, gated by a graceful per-model capability check. UX: a pasted image/PDF inserts an inline token (【image:…】 / 【pdf:…】) into the draft, so attachments have ORDER relative to typed text and can be referenced positionally. The token is the only handle — deleting it (atomic Backspace/ Delete, or selection overlap) detaches the file; an input-reconciliation safety net detaches any attachment whose token is no longer intact. No preview strip. Capability check: resolveModelCapabilities reads models.dev modalities.input (new GET /models/capabilities, mirrors /context-limit). The input blocks Send (no tokens spent) only on a definitive 'no'; unknown capability (catalog offline / unmapped provider) stays permissive. Attachments require a fresh turn — Send is blocked while generating and /chat rejects content mid-turn (409). Attachments are EPHEMERAL: forwarded to the model for the turn via ordered AI SDK ImagePart/FilePart content, but never persisted (history keeps the text with [image]/[pdf] markers). Text-only turns serialize byte-identically to before. Limits (Anthropic-aligned, enforced at paste + re-validated server-side): PNG/JPEG/WebP/GIF/PDF; image ≤5MB, PDF ≤32MB, ≤20 attachments, ≤32MB total. core: UserContentPart types, models/attachments validator, capability resolver, agent.run+toModelMessages thread ordered content. api: /chat content validation + passthrough. frontend: attachment-tokens helper, ChatInput paste/token/gating, per-tab staged attachments, App.svelte capability fetch. +44 tests.
Diffstat (limited to 'packages/frontend/src/lib')
-rw-r--r--packages/frontend/src/lib/attachment-tokens.ts234
-rw-r--r--packages/frontend/src/lib/components/ChatInput.svelte223
-rw-r--r--packages/frontend/src/lib/tabs.svelte.ts66
3 files changed, 504 insertions, 19 deletions
diff --git a/packages/frontend/src/lib/attachment-tokens.ts b/packages/frontend/src/lib/attachment-tokens.ts
new file mode 100644
index 0000000..79d4cbc
--- /dev/null
+++ b/packages/frontend/src/lib/attachment-tokens.ts
@@ -0,0 +1,234 @@
+// Inline attachment tokens for the chat input.
+//
+// A pasted image/PDF is represented in the textarea draft as an inline TOKEN
+// (e.g. `【image:a1b2c3】`). The token is ordinary text living inside the draft,
+// so attachments have ORDER relative to typed text and to each other, and the
+// user can reference them positionally ("here is image A: 【image:…】"). The
+// token is also the ONLY handle on an attachment — deleting it (atomic delete,
+// below) detaches the underlying file. There is no separate preview strip.
+//
+// This module is pure (no DOM, no Svelte) so it can be unit-tested directly.
+
+import type { UserContentPart } from "@dispatch/core/src/types/index.js";
+
+export type AttachmentKind = "image" | "pdf";
+
+/** A staged attachment, keyed by its short token id. */
+export interface StagedAttachment {
+ id: string;
+ kind: AttachmentKind;
+ /** IANA media type, e.g. `image/png`, `application/pdf`. */
+ mediaType: string;
+ /** Base64 payload WITHOUT a `data:` URI prefix. */
+ data: string;
+ /** Optional original filename (used for PDFs). */
+ name?: string;
+}
+
+/**
+ * Token grammar: `【<kind>:<id>】` where kind ∈ {image,pdf} and id is 6
+ * lowercase alphanumerics. The CJK corner brackets (U+3010/U+3011) are used as
+ * delimiters because they're visually distinct and virtually never typed by
+ * hand, so a token won't collide with normal prose.
+ */
+export const ATTACHMENT_TOKEN_RE = /【(image|pdf):([a-z0-9]{6})】/g;
+
+/** Build the inline token string for a staged attachment id + kind. */
+export function makeAttachmentToken(kind: AttachmentKind, id: string): string {
+ return `【${kind}:${id}】`;
+}
+
+/** Generate a short, URL-safe token id (6 lowercase alphanumerics). */
+export function generateTokenId(): string {
+ let out = "";
+ const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
+ // crypto.getRandomValues is available in browsers and modern Node/Bun.
+ const cryptoObj = (globalThis as { crypto?: Crypto }).crypto;
+ if (cryptoObj?.getRandomValues) {
+ const buf = new Uint32Array(6);
+ cryptoObj.getRandomValues(buf);
+ for (let i = 0; i < 6; i++) out += alphabet[(buf[i] ?? 0) % alphabet.length];
+ return out;
+ }
+ for (let i = 0; i < 6; i++) out += alphabet[Math.floor(Math.random() * alphabet.length)];
+ return out;
+}
+
+export interface FoundToken {
+ id: string;
+ kind: AttachmentKind;
+ /** Inclusive start index of the token within the text. */
+ start: number;
+ /** Exclusive end index of the token within the text. */
+ end: number;
+}
+
+/** Find all attachment tokens in `text`, in order of appearance. */
+export function findTokens(text: string): FoundToken[] {
+ const out: FoundToken[] = [];
+ // Fresh regex per call so `lastIndex` state never leaks between calls.
+ const re = new RegExp(ATTACHMENT_TOKEN_RE.source, "g");
+ let m: RegExpExecArray | null = re.exec(text);
+ while (m !== null) {
+ out.push({
+ kind: m[1] as AttachmentKind,
+ id: m[2] ?? "",
+ start: m.index,
+ end: m.index + m[0].length,
+ });
+ m = re.exec(text);
+ }
+ return out;
+}
+
+/** The set of attachment ids whose token is still intact in `text`. */
+export function intactTokenIds(text: string): Set<string> {
+ return new Set(findTokens(text).map((t) => t.id));
+}
+
+export interface DeletionResult {
+ /** Text after the deletion. */
+ text: string;
+ /** New caret position (collapsed) after the deletion. */
+ caret: number;
+ /** Ids of attachments whose tokens were removed by this deletion. */
+ removedIds: string[];
+}
+
+/**
+ * Compute the result of a Backspace/Delete keystroke when it interacts with an
+ * attachment token, so a token deletes ATOMICALLY (one keystroke removes the
+ * whole `【…】`, never a single bracket). Returns `null` when the keystroke does
+ * NOT touch a token — the caller should then let the browser's default editing
+ * behaviour run.
+ *
+ * Rules:
+ * - Range selection (`selStart !== selEnd`): expand the range to fully cover
+ * any token it overlaps, then delete the expanded range. Only acts when at
+ * least one token actually overlaps (otherwise returns null).
+ * - Collapsed + Backspace: if a token ends exactly at the caret, delete it.
+ * - Collapsed + Delete: if a token starts exactly at the caret, delete it.
+ */
+export function computeTokenDeletion(
+ text: string,
+ selStart: number,
+ selEnd: number,
+ key: "Backspace" | "Delete",
+): DeletionResult | null {
+ const tokens = findTokens(text);
+ if (tokens.length === 0) return null;
+
+ if (selStart !== selEnd) {
+ const lo = Math.min(selStart, selEnd);
+ const hi = Math.max(selStart, selEnd);
+ const overlapping = tokens.filter((t) => t.start < hi && t.end > lo);
+ if (overlapping.length === 0) return null;
+ const delStart = Math.min(lo, ...overlapping.map((t) => t.start));
+ const delEnd = Math.max(hi, ...overlapping.map((t) => t.end));
+ return {
+ text: text.slice(0, delStart) + text.slice(delEnd),
+ caret: delStart,
+ removedIds: overlapping.map((t) => t.id),
+ };
+ }
+
+ // Collapsed caret.
+ if (key === "Backspace") {
+ const tok = tokens.find((t) => t.end === selStart);
+ if (!tok) return null;
+ return {
+ text: text.slice(0, tok.start) + text.slice(tok.end),
+ caret: tok.start,
+ removedIds: [tok.id],
+ };
+ }
+ // Delete (forward).
+ const tok = tokens.find((t) => t.start === selStart);
+ if (!tok) return null;
+ return {
+ text: text.slice(0, tok.start) + text.slice(tok.end),
+ caret: tok.start,
+ removedIds: [tok.id],
+ };
+}
+
+/** Human-readable marker that replaces a token in persisted/display text. */
+export function markerFor(kind: AttachmentKind): string {
+ return kind === "pdf" ? "[pdf]" : "[image]";
+}
+
+export interface ParsedDraft {
+ /**
+ * Text-only projection of the draft with each attachment token replaced by a
+ * `[image]` / `[pdf]` marker. This is what gets persisted and rendered in the
+ * chat history (the raw bytes are never stored).
+ */
+ displayText: string;
+ /**
+ * Ordered multimodal content (interleaved text + attachment parts) to send to
+ * the model, or `null` when the draft has no intact attachment token (the
+ * caller then sends plain text).
+ */
+ content: UserContentPart[] | null;
+}
+
+/**
+ * Split a draft (text containing attachment tokens) plus the staged-attachment
+ * map into:
+ * - `displayText`: tokens swapped for `[image]`/`[pdf]` markers, and
+ * - `content`: an ordered `UserContentPart[]` interleaving the surrounding text
+ * with the matching attachment parts.
+ *
+ * A token whose id has no matching staged attachment (e.g. a stray paste of the
+ * token text, or a detached attachment) is treated as plain text in BOTH
+ * outputs — its marker still appears in `displayText`, but it contributes no
+ * attachment part. `content` is `null` when no attachment part is produced.
+ */
+export function parseDraft(draft: string, attachments: Map<string, StagedAttachment>): ParsedDraft {
+ const tokens = findTokens(draft);
+ let displayText = "";
+ const content: UserContentPart[] = [];
+ let textBuf = "";
+ let cursor = 0;
+ let producedAttachment = false;
+
+ const flushText = () => {
+ if (textBuf.length > 0) {
+ content.push({ type: "text", text: textBuf });
+ textBuf = "";
+ }
+ };
+
+ for (const tok of tokens) {
+ const between = draft.slice(cursor, tok.start);
+ textBuf += between;
+ displayText += between;
+ const att = attachments.get(tok.id);
+ if (att) {
+ // displayText (persisted/rendered) gets a `[image]`/`[pdf]` marker;
+ // the multimodal content gets the ACTUAL attachment part instead — no
+ // marker text, since the part itself represents the file to the model.
+ displayText += markerFor(tok.kind);
+ flushText();
+ content.push({
+ type: "attachment",
+ mediaType: att.mediaType,
+ data: att.data,
+ ...(att.name ? { name: att.name } : {}),
+ });
+ producedAttachment = true;
+ } else {
+ // Orphan token (no staged attachment) → keep the marker as plain text
+ // in BOTH outputs; it contributes no attachment part.
+ displayText += markerFor(tok.kind);
+ textBuf += markerFor(tok.kind);
+ }
+ cursor = tok.end;
+ }
+ const tail = draft.slice(cursor);
+ textBuf += tail;
+ displayText += tail;
+ flushText();
+
+ return { displayText, content: producedAttachment ? content : null };
+}
diff --git a/packages/frontend/src/lib/components/ChatInput.svelte b/packages/frontend/src/lib/components/ChatInput.svelte
index 079ef4a..4067b78 100644
--- a/packages/frontend/src/lib/components/ChatInput.svelte
+++ b/packages/frontend/src/lib/components/ChatInput.svelte
@@ -1,12 +1,40 @@
<script lang="ts">
+import {
+ ACCEPTED_PDF_MEDIA_TYPE,
+ isImageMediaType,
+ isPdfMediaType,
+ MAX_ATTACHMENTS,
+ MAX_IMAGE_BYTES,
+ MAX_PDF_BYTES,
+} from "@dispatch/core/src/models/attachments.js";
+import {
+ type AttachmentKind,
+ computeTokenDeletion,
+ generateTokenId,
+ makeAttachmentToken,
+ parseDraft,
+ type StagedAttachment,
+} from "../attachment-tokens.js";
import { computeContextUsage } from "../context-window.js";
import { tabStore } from "../tabs.svelte.js";
-const { contextLimit = null }: { contextLimit?: number | null } = $props();
+const {
+ contextLimit = null,
+ imageSupport = null,
+}: {
+ contextLimit?: number | null;
+ // Image/PDF INPUT capability for the active model, or `null` when unknown
+ // (catalog offline / unsupported provider) — null means "can't verify"
+ // (optimistic allow), not a hard no.
+ imageSupport?: { image: boolean; pdf: boolean } | null;
+} = $props();
const MAX_LINES = 7;
let inputEl: HTMLTextAreaElement | undefined;
+// Transient error shown when a paste is rejected (bad type / too large / too
+// many). Cleared on the next successful paste or any keystroke.
+let pasteError = $state<string | null>(null);
const agentStatus = $derived(tabStore.activeTab?.agentStatus ?? "idle");
const tabId = $derived(tabStore.activeTab?.id ?? "");
@@ -14,13 +42,47 @@ const tabId = $derived(tabStore.activeTab?.id ?? "");
// switching tabs saves the current draft and restores the target tab's text
// automatically — drafts are never lost or clobbered by tab switching.
const inputValue = $derived(tabStore.activeTab?.draft ?? "");
+const attachments = $derived(tabStore.activeTab?.attachments ?? []);
const cacheStats = $derived(tabStore.activeTab?.cacheStats ?? null);
const isRunning = $derived(agentStatus === "running");
const hasText = $derived(inputValue.trim().length > 0);
+const hasAttachments = $derived(attachments.length > 0);
// While generating with an empty box, the primary action is "stop". With text
// in the box, it stays "send" (the message is queued behind the live turn).
-const showStop = $derived(isRunning && !hasText);
+const showStop = $derived(isRunning && !hasText && !hasAttachments);
+
+// ─── Attachment capability gating ──────────────────────────────
+// A definitive "no" from the catalog (imageSupport.image === false with an
+// image staged, or .pdf === false with a pdf staged) blocks the send so no
+// tokens are spent. Unknown capability (imageSupport === null) is permissive.
+const hasImageAttachment = $derived(attachments.some((a) => a.kind === "image"));
+const hasPdfAttachment = $derived(attachments.some((a) => a.kind === "pdf"));
+const imageBlocked = $derived(
+ hasImageAttachment && imageSupport !== null && imageSupport.image === false,
+);
+const pdfBlocked = $derived(
+ hasPdfAttachment && imageSupport !== null && imageSupport.pdf === false,
+);
+// Attachments require a fresh turn — they can't ride the queue path (which is
+// text-only), so block sending an attachment while the agent is generating.
+const attachmentsWhileRunning = $derived(hasAttachments && isRunning);
+
+const attachmentWarning = $derived.by(() => {
+ if (pasteError) return pasteError;
+ if (attachmentsWhileRunning)
+ return "Wait for the current response to finish before sending images.";
+ if (imageBlocked && pdfBlocked)
+ return "The selected model doesn't support image or PDF input. Remove the attachments to send.";
+ if (imageBlocked)
+ return "The selected model doesn't support image input. Remove the image to send.";
+ if (pdfBlocked) return "The selected model doesn't support PDF input. Remove the PDF to send.";
+ return null;
+});
+
+// Send is blocked (but not the box) when an attachment is definitively
+// unsupported or when attachments are staged mid-generation.
+const sendBlocked = $derived(imageBlocked || pdfBlocked || attachmentsWhileRunning);
const usage = $derived(computeContextUsage(cacheStats, contextLimit));
const hasUsage = $derived((cacheStats?.last ?? null) !== null);
@@ -77,21 +139,153 @@ $effect(() => {
function handleInput(e: Event) {
if (!tabId) return;
+ pasteError = null;
+ // setDraft also reconciles staged attachments against the surviving tokens,
+ // so deleting a token (by any means) detaches its attachment.
tabStore.setDraft(tabId, (e.currentTarget as HTMLTextAreaElement).value);
}
+function kindForMediaType(mediaType: string): AttachmentKind | null {
+ if (isImageMediaType(mediaType)) return "image";
+ if (isPdfMediaType(mediaType)) return "pdf";
+ return null;
+}
+
+function readAsBase64(file: File): Promise<string> {
+ return new Promise((resolve, reject) => {
+ const reader = new FileReader();
+ reader.onload = () => {
+ const result = reader.result;
+ if (typeof result !== "string") {
+ reject(new Error("unexpected reader result"));
+ return;
+ }
+ // Strip the `data:<mediaType>;base64,` prefix → bare base64.
+ const comma = result.indexOf(",");
+ resolve(comma === -1 ? result : result.slice(comma + 1));
+ };
+ reader.onerror = () => reject(reader.error ?? new Error("read failed"));
+ reader.readAsDataURL(file);
+ });
+}
+
+/** Insert `insert` at the textarea's caret, returning the new caret offset. */
+function insertAtCaret(insert: string): number {
+ const el = inputEl;
+ const text = inputValue;
+ const start = el?.selectionStart ?? text.length;
+ const end = el?.selectionEnd ?? text.length;
+ const next = text.slice(0, start) + insert + text.slice(end);
+ if (tabId) tabStore.setDraft(tabId, next);
+ return start + insert.length;
+}
+
+async function handlePaste(e: ClipboardEvent) {
+ if (!tabId) return;
+ const items = e.clipboardData?.items;
+ if (!items) return;
+ const files: File[] = [];
+ for (const item of items) {
+ if (item.kind === "file") {
+ const file = item.getAsFile();
+ if (file) files.push(file);
+ }
+ }
+ // No files in the clipboard → let the default text paste happen.
+ if (files.length === 0) return;
+ // We're handling at least one file; stop the browser from also pasting a
+ // filename / image fallback into the textarea.
+ e.preventDefault();
+ pasteError = null;
+
+ for (const file of files) {
+ const kind = kindForMediaType(file.type);
+ if (!kind) {
+ pasteError = `Unsupported file type: ${file.type || "unknown"}. Allowed: PNG, JPEG, WebP, GIF, PDF.`;
+ continue;
+ }
+ const current = tabStore.activeTab?.attachments ?? [];
+ if (current.length >= MAX_ATTACHMENTS) {
+ pasteError = `You can attach at most ${MAX_ATTACHMENTS} files per message.`;
+ break;
+ }
+ const limit = kind === "pdf" ? MAX_PDF_BYTES : MAX_IMAGE_BYTES;
+ if (file.size > limit) {
+ const mb = Math.round(limit / (1024 * 1024));
+ pasteError = `${kind === "pdf" ? "PDF" : "Image"} is too large (max ${mb} MB).`;
+ continue;
+ }
+ try {
+ const data = await readAsBase64(file);
+ const id = generateTokenId();
+ const mediaType = kind === "pdf" ? ACCEPTED_PDF_MEDIA_TYPE : file.type;
+ const staged: StagedAttachment = {
+ id,
+ kind,
+ mediaType,
+ data,
+ ...(file.name ? { name: file.name } : {}),
+ };
+ // Stage first, then insert the token — `setDraft` reconciles against
+ // staged attachments, so the attachment must exist before its token
+ // appears in the draft.
+ tabStore.addAttachment(tabId, staged);
+ const caret = insertAtCaret(makeAttachmentToken(kind, id));
+ // Restore the caret after the value updates.
+ requestAnimationFrame(() => {
+ const el = inputEl;
+ if (el) {
+ el.focus();
+ el.setSelectionRange(caret, caret);
+ }
+ });
+ } catch {
+ pasteError = "Failed to read the pasted file.";
+ }
+ }
+}
+
function handleKeydown(e: KeyboardEvent) {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
submit();
+ return;
+ }
+ if ((e.key === "Backspace" || e.key === "Delete") && inputEl && tabId) {
+ // Atomic token delete: a single Backspace/Delete next to (or a selection
+ // overlapping) a `【…】` token removes the whole token in one stroke.
+ const result = computeTokenDeletion(
+ inputValue,
+ inputEl.selectionStart ?? 0,
+ inputEl.selectionEnd ?? 0,
+ e.key,
+ );
+ if (result) {
+ e.preventDefault();
+ tabStore.setDraft(tabId, result.text);
+ requestAnimationFrame(() => {
+ const el = inputEl;
+ if (el) {
+ el.focus();
+ el.setSelectionRange(result.caret, result.caret);
+ }
+ });
+ }
}
}
function submit() {
- const text = inputValue.trim();
- if (!text) return;
- if (tabId) tabStore.setDraft(tabId, "");
- tabStore.sendMessage(text);
+ if (!tabId) return;
+ const map = new Map(attachments.map((a) => [a.id, a] as const));
+ const { displayText, content } = parseDraft(inputValue, map);
+ const trimmed = displayText.trim();
+ // Nothing to send (no text and no usable attachment).
+ if (!trimmed && !content) return;
+ // Don't send when a staged attachment is unsupported / mid-generation.
+ if (sendBlocked) return;
+ const text = trimmed || displayText;
+ tabStore.setDraft(tabId, "");
+ void tabStore.sendMessage(text, content ?? undefined);
}
function primaryAction() {
@@ -104,25 +298,36 @@ function primaryAction() {
</script>
<div class="flex flex-col">
+ {#if attachmentWarning}
+ <div class="px-3 pt-2 text-xs text-warning flex items-start gap-1">
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="w-3.5 h-3.5 mt-0.5 shrink-0" aria-hidden="true">
+ <path d="M10.29 3.86 1.82 18a2 2 0 0 0 1.71 3h16.94a2 2 0 0 0 1.71-3L13.71 3.86a2 2 0 0 0-3.42 0z"></path>
+ <line x1="12" y1="9" x2="12" y2="13"></line>
+ <line x1="12" y1="17" x2="12.01" y2="17"></line>
+ </svg>
+ <span>{attachmentWarning}</span>
+ </div>
+ {/if}
<!-- Top bar: expanding textarea + send/stop action -->
<div class="flex items-end gap-2 px-3 pt-3 pb-2">
<textarea
bind:this={inputEl}
value={inputValue}
rows="1"
- placeholder="Type a message..."
+ placeholder="Type a message... (paste an image or PDF to attach)"
class="textarea textarea-ghost flex-1 resize-none leading-normal !min-h-0 h-auto"
onkeydown={handleKeydown}
oninput={handleInput}
+ onpaste={handlePaste}
></textarea>
<!-- Single fixed-width button across all states so the layout never
shifts when it morphs between Send and Stop. -->
<button
type="button"
class="btn w-20 shrink-0 {showStop ? 'btn-error btn-outline' : 'btn-primary'}"
- disabled={!showStop && !hasText}
+ disabled={!showStop && !hasText && !hasAttachments || sendBlocked}
onclick={primaryAction}
- title={showStop ? "Stop generation" : "Send message"}
+ title={showStop ? "Stop generation" : sendBlocked ? (attachmentWarning ?? "Cannot send") : "Send message"}
>
{#if showStop}
<span class="loading loading-spinner loading-sm"></span>
diff --git a/packages/frontend/src/lib/tabs.svelte.ts b/packages/frontend/src/lib/tabs.svelte.ts
index 9975d7b..e33a0e9 100644
--- a/packages/frontend/src/lib/tabs.svelte.ts
+++ b/packages/frontend/src/lib/tabs.svelte.ts
@@ -11,13 +11,14 @@ import {
// DB-free; safe in the browser bundle. The flat chunk log is the frontend's
// source of truth for HISTORY; `groupRowsToMessages` derives render bubbles.
import { groupRowsToMessages, type MessageRow } from "@dispatch/core/src/chunks/transform.js";
-import type { ChunkRow } from "@dispatch/core/src/types/index.js";
+import type { ChunkRow, UserContentPart } from "@dispatch/core/src/types/index.js";
import {
type AgentModelEntry,
DEFAULT_REASONING_EFFORT,
isReasoningEffort,
type ReasoningEffort,
} from "@dispatch/core/src/types/index.js";
+import { intactTokenIds, type StagedAttachment } from "./attachment-tokens.js";
import { config } from "./config.js";
import { appSettings } from "./settings.svelte.js";
import type {
@@ -183,6 +184,13 @@ export interface Tab {
*/
draft: string;
/**
+ * Staged image/PDF attachments for THIS tab's unsent draft (in-memory only —
+ * never persisted). Each corresponds to an inline `【image:…】`/`【pdf:…】`
+ * token in `draft`; removing the token detaches the attachment (reconciled on
+ * every keystroke). Ephemeral: sent to the model for one turn, then cleared.
+ */
+ attachments: StagedAttachment[];
+ /**
* True once the user has manually renamed this tab (double-click rename).
* Suppresses the first-message auto-title so a chosen name is never
* clobbered. In-memory only — a renamed tab is no longer "New Tab" on
@@ -312,6 +320,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: null,
totalChunks: 0,
@@ -389,6 +398,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: win.oldestSeq,
totalChunks: win.total,
@@ -493,8 +503,31 @@ export function createTabStore() {
* target tab shows its own text. No-op if the tab is gone.
*/
function setDraft(id: string, text: string): void {
- if (!getTabById(id)) return;
- updateTab(id, { draft: text });
+ const tab = getTabById(id);
+ if (!tab) return;
+ // Detach any staged attachment whose inline token is no longer intact in
+ // the new draft text (covers atomic-delete, manual mid-token edits, cut,
+ // select-all-delete, etc.). The token in the textarea is the ONLY handle
+ // on an attachment, so reconciling here keeps the two in lockstep.
+ const intact = intactTokenIds(text);
+ const keep = tab.attachments.filter((a) => intact.has(a.id));
+ if (keep.length !== tab.attachments.length) {
+ updateTab(id, { draft: text, attachments: keep });
+ } else {
+ updateTab(id, { draft: text });
+ }
+ }
+
+ /**
+ * Stage a pasted attachment on a tab. The caller is responsible for also
+ * inserting the matching `【image:…】`/`【pdf:…】` token into the draft (the
+ * token is what keeps the attachment alive through reconciliation). No-op if
+ * the tab is gone.
+ */
+ function addAttachment(id: string, attachment: StagedAttachment): void {
+ const tab = getTabById(id);
+ if (!tab) return;
+ updateTab(id, { attachments: [...tab.attachments, attachment] });
}
/**
@@ -929,6 +962,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: win.oldestSeq,
totalChunks: win.total,
@@ -1284,6 +1318,7 @@ export function createTabStore() {
queuedMessages: [],
chunkLimit: appSettings.chunkLimit,
draft: "",
+ attachments: [],
manualTitle: false,
oldestLoadedSeq: null,
totalChunks: 0,
@@ -1604,7 +1639,7 @@ export function createTabStore() {
}
}
- async function sendMessage(text: string): Promise<void> {
+ async function sendMessage(text: string, content?: UserContentPart[]): Promise<void> {
let tab = getActiveTab();
if (!tab) return;
@@ -1615,8 +1650,11 @@ export function createTabStore() {
if (!tab) return;
}
- // Fetch content for checked skills and build the message to send
- let messageToSend = text;
+ // Fetch content for checked skills and build the message to send.
+ // `skillPrefix` (when non-empty) is prepended to BOTH the text projection
+ // that gets persisted/rendered AND the multimodal content array, so an
+ // image turn still carries the activated skills to the model.
+ let skillPrefix = "";
const checkedKeys = Object.entries(appSettings.skillChecks)
.filter(([, v]) => v)
.map(([k]) => k);
@@ -1627,13 +1665,13 @@ export function createTabStore() {
const [scope, ...nameParts] = key.split(":");
const name = nameParts.join(":");
if (!scope || !name) continue;
- const content = await fetchSkillContent(scope, name);
- if (content) {
- skillSections.push(`<skill name="${name}">\n${content}\n</skill>`);
+ const skillContent = await fetchSkillContent(scope, name);
+ if (skillContent) {
+ skillSections.push(`<skill name="${name}">\n${skillContent}\n</skill>`);
}
}
if (skillSections.length > 0) {
- messageToSend = `[The following skills have been activated for this message]\n\n${skillSections.join("\n\n")}\n\n---\n\n${text}`;
+ skillPrefix = `[The following skills have been activated for this message]\n\n${skillSections.join("\n\n")}\n\n---\n\n`;
}
// Track injected skills on the tab
@@ -1644,6 +1682,12 @@ export function createTabStore() {
appSettings.skillChecks = {};
}
+ const messageToSend = `${skillPrefix}${text}`;
+ // Prepend the skill prefix to the multimodal content as a leading text
+ // part so the model sees the activated skills before the attachments.
+ const contentToSend =
+ content && skillPrefix ? [{ type: "text" as const, text: skillPrefix }, ...content] : content;
+
const userMsg: ChatMessage = {
id: generateId(),
role: "user",
@@ -1720,6 +1764,7 @@ export function createTabStore() {
body: JSON.stringify({
tabId: tab.id,
message: messageToSend,
+ ...(contentToSend ? { content: contentToSend } : {}),
...(tab.keyId ? { keyId: tab.keyId } : {}),
...(tab.modelId ? { modelId: tab.modelId } : {}),
...(tab.agentModels ? { agentModels: tab.agentModels } : {}),
@@ -2118,6 +2163,7 @@ export function createTabStore() {
renameTab,
reorderTabs,
setDraft,
+ addAttachment,
sendMessage,
cancelQueuedMessage,
stopGeneration,