summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-27 20:06:47 +0900
committerAdam Malczewski <[email protected]>2026-06-27 20:06:47 +0900
commit2e741d1c1ac309327aff4fed0e248bc5baa342d4 (patch)
treefb84708f55e07572e4c69447884365d9b457755c
parent2c91dc63802a386b1612ea0ed8c1e96b6f4421db (diff)
downloaddispatch-2e741d1c1ac309327aff4fed0e248bc5baa342d4.tar.gz
dispatch-2e741d1c1ac309327aff4fed0e248bc5baa342d4.zip
feat(vision): store images in tmp dir instead of SQLite — compact URLs + purge on compaction/close
-rw-r--r--packages/session-orchestrator/src/orchestrator.ts30
-rw-r--r--packages/transport-http/src/app.ts31
-rw-r--r--packages/vision-handoff/src/extension.ts88
-rw-r--r--packages/vision-handoff/src/service.ts135
4 files changed, 279 insertions, 5 deletions
diff --git a/packages/session-orchestrator/src/orchestrator.ts b/packages/session-orchestrator/src/orchestrator.ts
index c0493f3..045b88d 100644
--- a/packages/session-orchestrator/src/orchestrator.ts
+++ b/packages/session-orchestrator/src/orchestrator.ts
@@ -49,6 +49,20 @@ import type { ToolAssembly } from "./tools-filter.js";
* call `consult_vision`) and the images are registered for tool access.
*/
export interface VisionHandoffService {
+ /**
+ * Store images to tmp files and return compact URLs. Each input image's data
+ * URL is saved to a tmp file and replaced with a compact HTTP path so the
+ * persisted conversation store holds a tiny string, not megabytes of base64.
+ * When `saveImageToTmp` is not configured, data URLs pass through unchanged.
+ */
+ readonly storeImages: (
+ conversationId: string,
+ images: readonly ImageInput[],
+ ) => Promise<readonly ImageInput[]>;
+
+ /** Delete all tmp images for a conversation (on close). Best-effort. */
+ readonly purgeConversationImages: (conversationId: string) => Promise<void>;
+
readonly prepareForProvider: (
messages: readonly ChatMessage[],
currentModelName: string | undefined,
@@ -625,7 +639,18 @@ export function createSessionOrchestrator(
const effectiveModelName = resolveModelName(modelName, storedModel);
const history = await deps.conversationStore.load(conversationId);
- const userMsg = buildUserMessage(text, images);
+
+ // Store images to tmp files (compact URLs) BEFORE building the user
+ // message so the persisted chunks hold tiny URL references, not
+ // megabytes of base64 data URLs. When the vision-handoff service isn't
+ // loaded, images pass through unchanged (backward compatible).
+ const visionHandoffForStore = deps.resolveVisionHandoff?.();
+ const storedImages =
+ visionHandoffForStore !== undefined && images !== undefined
+ ? await visionHandoffForStore.storeImages(conversationId, images)
+ : images;
+
+ const userMsg = buildUserMessage(text, storedImages);
// Workspace assignment for new conversations happens BEFORE
// effective-cwd resolution (see workspaceSetupPromise above) so
@@ -988,6 +1013,9 @@ export function createSessionOrchestrator(
});
});
void deps.conversationStore.setConversationStatus(conversationId, "closed");
+ // Purge tmp images for this conversation (best-effort, fire-and-forget).
+ const vh = deps.resolveVisionHandoff?.();
+ if (vh !== undefined) void vh.purgeConversationImages(conversationId);
return { abortedTurn };
},
diff --git a/packages/transport-http/src/app.ts b/packages/transport-http/src/app.ts
index ea216e1..16c4167 100644
--- a/packages/transport-http/src/app.ts
+++ b/packages/transport-http/src/app.ts
@@ -201,6 +201,37 @@ export function createApp(opts: CreateServerOptions): Hono {
app.get("/health", (c) => c.json({ ok: true }));
+ // ── Tmp image serving (vision handoff) ──────────────────────────────────────
+ app.get("/images/:conversationId/:imageId", async (c) => {
+ const conversationId = c.req.param("conversationId");
+ const imageId = c.req.param("imageId");
+ if (imageId.includes("/") || imageId.includes("..")) {
+ return c.json({ error: "Invalid image ID" }, 400);
+ }
+ const imageDir = process.env.DISPATCH_IMAGE_DIR ?? "/tmp/dispatch/images";
+ const { join } = await import("node:path");
+ const { readFile: fsReadFile } = await import("node:fs/promises");
+ const filePath = join(imageDir, conversationId, imageId);
+ try {
+ const buf = await fsReadFile(filePath);
+ const ext = imageId.toLowerCase();
+ const mime = ext.endsWith(".png")
+ ? "image/png"
+ : ext.endsWith(".jpg") || ext.endsWith(".jpeg")
+ ? "image/jpeg"
+ : ext.endsWith(".webp")
+ ? "image/webp"
+ : ext.endsWith(".gif")
+ ? "image/gif"
+ : ext.endsWith(".bmp")
+ ? "image/bmp"
+ : "application/octet-stream";
+ return new Response(buf, { headers: { "Content-Type": mime, "Cache-Control": "no-cache" } });
+ } catch {
+ return c.json({ error: "Image not found" }, 404);
+ }
+ });
+
app.get("/conversations/:id/metrics", async (c) => {
const conversationId = c.req.param("id");
diff --git a/packages/vision-handoff/src/extension.ts b/packages/vision-handoff/src/extension.ts
index af646aa..faf4621 100644
--- a/packages/vision-handoff/src/extension.ts
+++ b/packages/vision-handoff/src/extension.ts
@@ -9,13 +9,18 @@
* image + the model's specific question, and returns the conversation ID + the
* vision model's answer. Follow-ups go through the dispatch CLI.
*
+ * Images are saved to a tmp directory (`/tmp/dispatch/images/<convId>/`) so the
+ * conversation store (SQLite) only holds a compact URL reference — not
+ * megabytes of base64. Tmp files are purged on reboot (ephemeral dir), after
+ * compaction (the transcription replaces the image), and on conversation close.
+ *
* Effects (filesystem, orchestrator) live here in the shell, injected into the
* service. The pure decisions live in `pure.ts`. No `console.*`; logging via
* `host.logger`.
*/
-import { readFile } from "node:fs/promises";
-import { extname, isAbsolute, resolve as pathResolve } from "node:path";
+import { mkdir, readFile, rm, unlink, writeFile } from "node:fs/promises";
+import { extname, isAbsolute, join, resolve as pathResolve } from "node:path";
import { conversationStoreHandle } from "@dispatch/conversation-store";
import type { CredentialStore } from "@dispatch/credential-store";
import { credentialStoreHandle } from "@dispatch/credential-store";
@@ -38,6 +43,8 @@ export const manifest: Manifest = {
contributes: { services: ["vision-handoff/service"], tools: ["consult_vision"] },
};
+const IMAGE_DIR = process.env.DISPATCH_IMAGE_DIR ?? "/tmp/dispatch/images";
+
/** MIME types for recognized image extensions. */
const MIME_BY_EXT: Readonly<Record<string, string>> = {
".png": "image/png",
@@ -48,6 +55,15 @@ const MIME_BY_EXT: Readonly<Record<string, string>> = {
".bmp": "image/bmp",
};
+/** Reverse: MIME → extension. */
+const EXT_BY_MIME: Readonly<Record<string, string>> = {
+ "image/png": ".png",
+ "image/jpeg": ".jpg",
+ "image/webp": ".webp",
+ "image/gif": ".gif",
+ "image/bmp": ".bmp",
+};
+
/**
* Read an image file from disk as a base64 data URL. Resolves relative paths
* against the cwd (the conversation's working directory). Throws on missing
@@ -61,6 +77,70 @@ async function readFileAsDataUrl(path: string, cwd?: string): Promise<string> {
return `data:${mime};base64,${buf.toString("base64")}`;
}
+/**
+ * Save a data URL image to a tmp file and return a compact HTTP path.
+ * The compact URL (`/images/<conversationId>/<uuid>.<ext>`) is what gets
+ * persisted in the conversation store — a tiny string, not megabytes of base64.
+ */
+async function saveImageToTmp(
+ conversationId: string,
+ dataUrl: string,
+ mimeType?: string,
+): Promise<string> {
+ const mime = mimeType ?? "image/png";
+ const ext = EXT_BY_MIME[mime] ?? ".png";
+ const imageId = `${crypto.randomUUID()}${ext}`;
+ const dir = join(IMAGE_DIR, conversationId);
+ await mkdir(dir, { recursive: true });
+ const filePath = join(dir, imageId);
+ const base64 = dataUrl.split(",")[1] ?? "";
+ await writeFile(filePath, Buffer.from(base64, "base64"));
+ return `/images/${conversationId}/${imageId}`;
+}
+
+/**
+ * Resolve a compact URL (`/images/<convId>/<imageId>`) back to a data URL by
+ * reading the tmp file. Data URLs and HTTP URLs pass through unchanged.
+ */
+async function resolveImageUrl(url: string): Promise<string> {
+ if (url.startsWith("data:") || url.startsWith("http")) return url;
+ if (!url.startsWith("/images/")) return url;
+ const parts = url.split("/"); // ["", "images", convId, imageId]
+ const convId = parts[2];
+ const imageId = parts[3];
+ if (convId === undefined || imageId === undefined) return url;
+ const filePath = join(IMAGE_DIR, convId, imageId);
+ const buf = await readFile(filePath);
+ const ext = extname(imageId).toLowerCase();
+ const mime = MIME_BY_EXT[ext] ?? "image/png";
+ return `data:${mime};base64,${buf.toString("base64")}`;
+}
+
+/** Delete a single tmp image file (after compaction — best-effort). */
+async function deleteTmpImage(compactUrl: string): Promise<void> {
+ if (!compactUrl.startsWith("/images/")) return;
+ const parts = compactUrl.split("/");
+ const convId = parts[2];
+ const imageId = parts[3];
+ if (convId === undefined || imageId === undefined) return;
+ const filePath = join(IMAGE_DIR, convId, imageId);
+ try {
+ await unlink(filePath);
+ } catch {
+ // Best-effort — file may already be deleted.
+ }
+}
+
+/** Delete all tmp images for a conversation (on close — best-effort). */
+async function deleteConversationImages(conversationId: string): Promise<void> {
+ const dir = join(IMAGE_DIR, conversationId);
+ try {
+ await rm(dir, { recursive: true, force: true });
+ } catch {
+ // Best-effort.
+ }
+}
+
export async function activate(host: HostAPI): Promise<void> {
const credentialStore = host.getService(credentialStoreHandle) as CredentialStore | undefined;
if (credentialStore === undefined) {
@@ -82,6 +162,10 @@ export async function activate(host: HostAPI): Promise<void> {
credentialStore,
resolveModel,
readFileAsDataUrl,
+ saveImageToTmp,
+ resolveImageUrl,
+ deleteTmpImage,
+ deleteConversationImages,
resolveOrchestrator: () => {
const loaded = host.getExtensions().some((m) => m.id === "session-orchestrator");
if (!loaded) return undefined;
diff --git a/packages/vision-handoff/src/service.ts b/packages/vision-handoff/src/service.ts
index 7403c21..cc13d93 100644
--- a/packages/vision-handoff/src/service.ts
+++ b/packages/vision-handoff/src/service.ts
@@ -115,6 +115,37 @@ export interface VisionHandoffDeps {
imageUrl: string,
transcription: string,
) => Promise<void>;
+ /**
+ * Save an image data URL to a tmp file and return a compact URL
+ * (`/images/<conversationId>/<imageId>.<ext>`) that can be persisted in the
+ * conversation store instead of the full data URL (which would be megabytes).
+ * The frontend serves the image via `GET /images/...`; the provider resolves
+ * it back to a data URL via {@link resolveImageUrl} at runtime. When `undefined`,
+ * data URLs pass through unchanged (images persist in SQLite — the large-DB
+ * path, for environments without tmp file support).
+ */
+ readonly saveImageToTmp?: (
+ conversationId: string,
+ dataUrl: string,
+ mimeType?: string,
+ ) => Promise<string>;
+ /**
+ * Resolve a compact URL (`/images/...`) back to a data URL by reading the tmp
+ * file. Data URLs and HTTP URLs pass through unchanged. Paired with
+ * {@link saveImageToTmp}.
+ */
+ readonly resolveImageUrl?: (url: string) => Promise<string>;
+ /**
+ * Delete a tmp image file (after it has been compacted to text — the
+ * transcription is cached, the raw image is no longer needed). Best-effort:
+ * errors are logged, not thrown.
+ */
+ readonly deleteTmpImage?: (compactUrl: string) => Promise<void>;
+ /**
+ * Delete all tmp images for a conversation (on conversation close).
+ * Best-effort.
+ */
+ readonly deleteConversationImages?: (conversationId: string) => Promise<void>;
/** Generate a new conversation ID for a consultation. Defaults to crypto.randomUUID. */
readonly generateId?: () => string;
readonly logger?: Logger;
@@ -128,6 +159,24 @@ export interface VisionHandoffService {
readonly isVisionCapable: (modelName: string | undefined) => Promise<boolean>;
/**
+ * Store images to tmp files and return compact URLs. Each input image's data
+ * URL is saved to `/tmp/dispatch/images/<conversationId>/<uuid>.<ext>` and
+ * replaced with a compact HTTP path (`/images/<conversationId>/<uuid>.<ext>`)
+ * so the persisted conversation store holds a tiny string, not megabytes of
+ * base64. When `saveImageToTmp` is not configured, data URLs pass through
+ * unchanged (backward compatible).
+ */
+ readonly storeImages: (
+ conversationId: string,
+ images: readonly ImageInput[],
+ ) => Promise<readonly ImageInput[]>;
+
+ /**
+ * Delete all tmp images for a conversation (on close). Best-effort.
+ */
+ readonly purgeConversationImages: (conversationId: string) => Promise<void>;
+
+ /**
* Resolve a vision-capable model from the catalog (any provider). Returns
* `undefined` when none is available.
*/
@@ -306,6 +355,15 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
if (convId !== undefined && deps.setImageTranscription !== undefined) {
await deps.setImageTranscription(convId, entry.url, text);
}
+ // The image has been transcribed to text — delete the tmp file
+ // (the transcription is cached, the raw image is no longer needed).
+ if (deps.deleteTmpImage !== undefined) {
+ try {
+ await deps.deleteTmpImage(entry.url);
+ } catch {
+ // Best-effort — don't let cleanup failure break the turn.
+ }
+ }
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
log?.warn("vision-handoff: image compaction transcription failed", { error: msg });
@@ -340,6 +398,42 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
return result;
}
+ async function resolveImageUrlsInMessages(
+ messages: readonly ChatMessage[],
+ ): Promise<readonly ChatMessage[]> {
+ if (deps.resolveImageUrl === undefined) return messages;
+ let hasCompact = false;
+ for (const msg of messages) {
+ if (msg.chunks.some((c) => c.type === "image")) {
+ hasCompact = true;
+ break;
+ }
+ }
+ if (!hasCompact) return messages;
+ const result: ChatMessage[] = [];
+ for (const msg of messages) {
+ if (!msg.chunks.some((c) => c.type === "image")) {
+ result.push(msg);
+ continue;
+ }
+ const newChunks: Chunk[] = [];
+ for (const chunk of msg.chunks) {
+ if (chunk.type === "image") {
+ const dataUrl = await deps.resolveImageUrl!(chunk.url);
+ newChunks.push({
+ type: "image",
+ url: dataUrl,
+ ...(chunk.mimeType !== undefined ? { mimeType: chunk.mimeType } : {}),
+ });
+ } else {
+ newChunks.push(chunk);
+ }
+ }
+ result.push({ role: msg.role, chunks: newChunks });
+ }
+ return result;
+ }
+
const service: VisionHandoffService = {
async isVisionCapable(modelName: string | undefined): Promise<boolean> {
if (modelName === undefined) return false;
@@ -347,6 +441,38 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
return isVisionCapable(modelName, info);
},
+ async storeImages(
+ conversationId: string,
+ images: readonly ImageInput[],
+ ): Promise<readonly ImageInput[]> {
+ if (deps.saveImageToTmp === undefined) return images;
+ const result: ImageInput[] = [];
+ for (const img of images) {
+ if (img.url.startsWith("data:")) {
+ const compactUrl = await deps.saveImageToTmp(conversationId, img.url, img.mimeType);
+ result.push({
+ url: compactUrl,
+ ...(img.mimeType !== undefined ? { mimeType: img.mimeType } : {}),
+ });
+ } else {
+ result.push(img);
+ }
+ }
+ return result;
+ },
+
+ async purgeConversationImages(conversationId: string): Promise<void> {
+ if (deps.deleteConversationImages === undefined) return;
+ try {
+ await deps.deleteConversationImages(conversationId);
+ } catch (err) {
+ log?.warn("vision-handoff: failed to purge conversation images", {
+ conversationId,
+ error: err instanceof Error ? err.message : String(err),
+ });
+ }
+ },
+
resolveVisionModel,
async prepareForProvider(
@@ -362,6 +488,11 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
// Fast path: no images anywhere → nothing to do.
if (!hasImageChunks(messages)) return messages;
+ // Resolve compact URLs (/images/...) → data URLs for the provider.
+ // The persisted chunks store compact URLs (tiny strings); the provider
+ // needs data URLs (read from tmp files at runtime).
+ const resolved = await resolveImageUrlsInMessages(messages);
+
const isCapable =
currentModelName !== undefined &&
(await isVisionCapable(currentModelName, await getInfo(currentModelName)));
@@ -371,7 +502,7 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
// are transcribed to text (one-time, cached) and stripped from the
// provider messages. Recent images (within the limit) stay native.
if (isCapable) {
- return compactImagesForVisionModel(messages, opts, currentModelName);
+ return compactImagesForVisionModel(resolved, opts, currentModelName);
}
// ── Non-vision model: placeholders + consult_vision ──────────────────
@@ -388,7 +519,7 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
// per-conversation registry so the consult_vision tool can look it up.
let seqId = 0;
const result: ChatMessage[] = [];
- for (const msg of messages) {
+ for (const msg of resolved) {
if (!msg.chunks.some((c) => c.type === "image")) {
result.push(msg);
continue;