packages/api/src/app.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278

import {
	type AgentModelEntry,
	getTab,
	isReasoningEffort,
	NotificationDispatcher,
	type UserContentPart,
	validateUserContent,
} from "@dispatch/core";
import { Hono } from "hono";
import { cors } from "hono/cors";
import { AgentManager } from "./agent-manager.js";
import { PermissionManager } from "./permission-manager.js";
import { agentsRoutes } from "./routes/agents.js";
import { configRoutes } from "./routes/config.js";
import { modelsRoutes, startWakeScheduler } from "./routes/models.js";
import { notificationsRoutes } from "./routes/notifications.js";
import { skillsRoutes } from "./routes/skills.js";
import { tabsRoutes } from "./routes/tabs.js";

/**
 * Validate and normalise the `agentModels` fallback chain coming from the
 * frontend. Each entry must carry string `key_id`/`model_id`; an `effort` is
 * kept only when it's a recognised level (otherwise dropped so the per-tab /
 * default effort applies). Returns `undefined` when the input isn't an array.
 */
function sanitizeAgentModels(raw: unknown): AgentModelEntry[] | undefined {
	if (!Array.isArray(raw)) return undefined;
	const out: AgentModelEntry[] = [];
	for (const m of raw) {
		if (!m || typeof m !== "object") continue;
		const entry = m as Record<string, unknown>;
		if (typeof entry.key_id !== "string" || typeof entry.model_id !== "string") continue;
		out.push({
			key_id: entry.key_id,
			model_id: entry.model_id,
			...(isReasoningEffort(entry.effort) ? { effort: entry.effort } : {}),
		});
	}
	return out;
}

/**
 * Validate and normalise the optional multimodal `content` array from the
 * `/chat` body. Each entry is either a `{ type: "text", text }` part or a
 * `{ type: "attachment", mediaType, data, name? }` part (base64 payload).
 * Returns `undefined` when the input isn't a non-empty array or contains no
 * attachment (so the plain-string path is taken — byte-identical to before).
 * Shape only: SIZE/TYPE limits are enforced separately by `validateUserContent`.
 */
function sanitizeUserContent(raw: unknown): UserContentPart[] | undefined {
	if (!Array.isArray(raw) || raw.length === 0) return undefined;
	const out: UserContentPart[] = [];
	let hasAttachment = false;
	for (const p of raw) {
		if (!p || typeof p !== "object") continue;
		const part = p as Record<string, unknown>;
		if (part.type === "text") {
			if (typeof part.text === "string") out.push({ type: "text", text: part.text });
			continue;
		}
		if (part.type === "attachment") {
			if (typeof part.mediaType !== "string" || typeof part.data !== "string") continue;
			hasAttachment = true;
			out.push({
				type: "attachment",
				mediaType: part.mediaType,
				data: part.data,
				...(typeof part.name === "string" ? { name: part.name } : {}),
			});
		}
	}
	// No attachment → let the plain-text path handle it (avoids needlessly
	// switching the model message to array content for a text-only turn).
	return hasAttachment ? out : undefined;
}

export const permissionManager = new PermissionManager();
export const agentManager = new AgentManager(permissionManager);

// ntfy.sh push notifications. The dispatcher reads its config from the
// `settings` table on every send, so config changes apply immediately —
// no restart, no re-attach needed.
export const notificationDispatcher = new NotificationDispatcher({
	getTabTitle: (tabId) => {
		try {
			return getTab(tabId)?.title ?? null;
		} catch {
			return null;
		}
	},
	getTabParentId: (tabId) => {
		try {
			// `undefined` when the lookup fails (tab not found / DB unavailable)
			// so the dispatcher falls back to "treat as top-level" rather than
			// silently dropping notifications.
			const row = getTab(tabId);
			return row ? row.parentTabId : undefined;
		} catch {
			return undefined;
		}
	},
});
notificationDispatcher.attachToAgentManager(agentManager);
notificationDispatcher.attachToPermissionManager(permissionManager);

export const app = new Hono();

app.use(
	"*",
	cors({
		origin: (origin) => origin || "*",
		credentials: true,
		allowHeaders: ["Content-Type", "Authorization"],
		allowMethods: ["GET", "POST", "PATCH", "PUT", "DELETE", "OPTIONS"],
	}),
);

app.get("/health", (c) => {
	return c.json({ ok: true });
});

app.get("/status", (c) => {
	return c.json({
		status: agentManager.getStatus(),
		messageCount: agentManager.getMessageCount(),
		statuses: agentManager.getAllStatuses(),
	});
});

app.post("/chat", async (c) => {
	const body = await c.req.json<{
		tabId?: unknown;
		message?: unknown;
		content?: unknown;
		keyId?: unknown;
		modelId?: unknown;
		agentModels?: unknown;
		reasoningEffort?: unknown;
		workingDirectory?: unknown;
		queueId?: unknown;
	}>();
	const { tabId, message } = body;

	if (typeof tabId !== "string" || tabId.trim() === "") {
		return c.json({ error: "tabId must be a non-empty string" }, 400);
	}

	if (typeof message !== "string" || message.trim() === "") {
		return c.json({ error: "message must be a non-empty string" }, 400);
	}

	const keyId = typeof body.keyId === "string" ? body.keyId : undefined;
	const modelId = typeof body.modelId === "string" ? body.modelId : undefined;
	const agentModels = sanitizeAgentModels(body.agentModels);
	const workingDirectory =
		typeof body.workingDirectory === "string" ? body.workingDirectory : undefined;
	const queueId = typeof body.queueId === "string" ? body.queueId : undefined;
	const reasoningEffort = isReasoningEffort(body.reasoningEffort)
		? body.reasoningEffort
		: undefined;

	// Optional multimodal content (image/pdf attachments). When present, the
	// attachments are EPHEMERAL — forwarded to the model for this turn only and
	// never persisted (the chunk log keeps just `message`, which the frontend
	// has already projected to text with `[image]`/`[pdf]` markers).
	const content = sanitizeUserContent(body.content);
	if (content) {
		// Enforce size/type/count ceilings server-side (defence in depth; the
		// frontend also enforces them at paste time). Reject the whole request
		// so no tokens are spent on an over-limit payload.
		const validation = validateUserContent(content);
		if (!validation.ok) {
			return c.json({ error: "invalid attachments", details: validation.errors }, 400);
		}
		// Attachments only attach to a FRESH turn. If the tab is mid-turn the
		// message would queue (text-only machinery), silently dropping the
		// images. Reject clearly instead so the user can retry once idle.
		if (agentManager.getTabStatus(tabId) === "running") {
			return c.json(
				{ error: "cannot attach images while the agent is generating; wait for it to finish" },
				409,
			);
		}
	}

	// Single routing decision (queue if busy, new turn if idle) shared with the
	// `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a
	// started turn runs in the background.
	const outcome = agentManager.deliverMessage(tabId, message, {
		...(keyId ? { keyId } : {}),
		...(modelId ? { modelId } : {}),
		...(agentModels ? { agentModels } : {}),
		...(reasoningEffort ? { reasoningEffort } : {}),
		...(workingDirectory !== undefined ? { workingDirectory } : {}),
		...(queueId ? { queueId } : {}),
		...(content ? { content } : {}),
	});

	if (outcome.status === "queued") {
		return c.json({ status: "queued", messageId: outcome.messageId });
	}
	return c.json({ status: "ok" });
});

app.route("/config", configRoutes);

app.post("/chat/cancel", async (c) => {
	const body = await c.req.json();
	if (typeof body.tabId !== "string" || typeof body.messageId !== "string") {
		return c.json({ error: "tabId and messageId are required strings" }, 400);
	}
	const tabId = body.tabId;
	const messageId = body.messageId;
	const cancelled = agentManager.cancelQueuedMessage(tabId, messageId);
	return c.json({ success: cancelled });
});

app.post("/chat/stop", async (c) => {
	const body = await c.req.json();
	if (typeof body.tabId !== "string") {
		return c.json({ error: "tabId is required" }, 400);
	}
	agentManager.stopTab(body.tabId);
	return c.json({ success: true });
});

// Prompt-cache WARMING (see AgentManager.warmCacheForTab / Agent.warmCache).
//
// Replays the tab's exact cached prefix + one trivial throwaway turn so the
// provider's ~5-min prompt-cache TTL is refreshed while the tab sits idle.
// The frontend's cache-warming timer drives this every ~4 minutes. The
// warming request is NEVER persisted, NEVER emitted, and NEVER folded into the
// real usage aggregate — we return ONLY its `usage` so the UI can show a
// warming-specific "last request" cache rate without polluting the real
// Cache Rate metric. Returns 409 when the tab is mid-turn (caller also gates).
app.post("/chat/warm", async (c) => {
	const body = await c.req.json<{
		tabId?: unknown;
		keyId?: unknown;
		modelId?: unknown;
		agentModels?: unknown;
		reasoningEffort?: unknown;
	}>();
	const { tabId } = body;
	if (typeof tabId !== "string" || tabId.trim() === "") {
		return c.json({ error: "tabId must be a non-empty string" }, 400);
	}
	const keyId = typeof body.keyId === "string" ? body.keyId : undefined;
	const modelId = typeof body.modelId === "string" ? body.modelId : undefined;
	const agentModels = sanitizeAgentModels(body.agentModels);
	// Same effort the real turn would use — a message-cache key, so warming must
	// match it to refresh the SAME bucket the next real message reads.
	const reasoningEffort = isReasoningEffort(body.reasoningEffort)
		? body.reasoningEffort
		: undefined;

	const result = await agentManager.warmCacheForTab(tabId, {
		...(keyId ? { keyId } : {}),
		...(modelId ? { modelId } : {}),
		...(agentModels ? { agentModels } : {}),
		...(reasoningEffort ? { reasoningEffort } : {}),
	});
	if (!result.ok) {
		// "tab is generating" is an expected race (not a server fault) → 409.
		const status = result.error === "tab is generating" ? 409 : 500;
		return c.json({ error: result.error }, status);
	}
	return c.json({ usage: result.usage });
});

app.route("/skills", skillsRoutes);
app.route("/models", modelsRoutes);
app.route("/tabs", tabsRoutes);
app.route("/agents", agentsRoutes);
app.route("/notifications", notificationsRoutes);

// Start the wake scheduler on boot (restores persisted schedule)
startWakeScheduler();