diff options
| author | Adam Malczewski <[email protected]> | 2026-06-01 01:46:13 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-01 01:46:13 +0900 |
| commit | 8b9533c22a47bbf6f916667e2c25d8e8e419da37 (patch) | |
| tree | 715a6a3d6f43781395e7dc7c8cdb519cef46a870 /packages/api | |
| parent | 1853dd1d40308deb829bc621beb79c5d39b9c57f (diff) | |
| download | dispatch-8b9533c22a47bbf6f916667e2c25d8e8e419da37.tar.gz dispatch-8b9533c22a47bbf6f916667e2c25d8e8e419da37.zip | |
feat(tabs): tab-to-tab agent communication via short handles
Add send_to_tab / read_tab tools so an agent can message or read another
tab by a git-style short handle (shortest unique prefix of the tab UUID,
min 4 chars), shown in the tab bar.
- core/db/tabs: resolveTabPrefix + shortestUniquePrefix (open tabs only,
LIKE-sanitized prefix matching)
- new tools read-tab.ts / send-to-tab.ts (+ tests) decoupled from the DB
TabRow via a minimal ResolvedTabRef projection
- agent-manager: unified deliverMessage routing (busy -> queue, idle ->
new turn) shared by POST /chat and send_to_tab; agent->agent auto-wake
budget (MAX_AGENT_AUTO_WAKES) to bound ping-pong loops
- summon/loader: send_to_tab + read_tab as grantable tools
- frontend: shortHandleFor + handle badge in TabBar; perm toggles
- notes: tab-comm / user-agents / todo-redesign plans
- chore: biome format fixes (debug-logger, summon.test)
Refs notes/plan-tab-comm.md
Diffstat (limited to 'packages/api')
| -rw-r--r-- | packages/api/src/agent-manager.ts | 263 | ||||
| -rw-r--r-- | packages/api/src/app.ts | 25 | ||||
| -rw-r--r-- | packages/api/tests/agent-manager.test.ts | 365 | ||||
| -rw-r--r-- | packages/api/tests/routes.test.ts | 28 |
4 files changed, 663 insertions, 18 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 111237c..517c661 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -15,8 +15,10 @@ import { createListFilesTool, createReadFileSliceTool, createReadFileTool, + createReadTabTool, createRetrieveTool, createRunShellTool, + createSendToTabTool, createSkillsWatcher, createSummonTool, createTaskListTool, @@ -32,6 +34,8 @@ import { getClaudeAccountsFromDB, getMessagesForTab, getSetting, + getTab, + listOpenTabs, loadAgent, loadAgents, loadConfig, @@ -41,8 +45,11 @@ import { refreshAccountCredentials, refreshAccountCredentialsAsync, resolveApiKey, + resolveTabPrefix, type SkillDefinition, type SystemChunkKind, + shortestUniquePrefix, + type TabResolution, type TabStatusSnapshot, TaskList, toAvailableSubagents, @@ -73,6 +80,16 @@ const TOOL_DESCRIPTIONS: Record<string, string> = { "Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.", }; +/** + * Maximum number of CONSECUTIVE agent-to-agent auto-wakes a tab will accept + * before it stops auto-responding and waits for a human. Each `send_to_tab` + * that would wake an idle tab consumes one unit; any human-originated message + * (e.g. via `POST /chat`) refills the budget to full. This bounds runaway + * agent ping-pong loops (A wakes B wakes A ...) that would otherwise spend + * tokens unbounded with no human in the loop. See notes/plan-tab-comm.md. + */ +const MAX_AGENT_AUTO_WAKES = 6; + const DEFAULT_SYSTEM_PROMPT = "You are Dispatch, an agent designed to help with any task that the user asks for. Be helpful and concise."; @@ -197,6 +214,14 @@ interface TabAgent { * rows. Set at the start of `processMessage`, cleared when the turn ends. */ currentTurnId: string | null; + /** + * Remaining consecutive agent-to-agent auto-wakes this tab will accept + * before requiring human intervention (see `MAX_AGENT_AUTO_WAKES`). + * Refilled to the max by any human-originated `deliverMessage`; decremented + * each time an agent-originated `send_to_tab` wakes this tab from idle. When + * it hits 0, further agent messages are queued but do NOT start a turn. + */ + autoWakeBudget: number; } export class AgentManager { @@ -343,6 +368,7 @@ export class AgentManager { currentChunks: null, currentAssistantId: null, currentTurnId: null, + autoWakeBudget: MAX_AGENT_AUTO_WAKES, }; this.tabAgents.set(tabId, tabAgent); } @@ -366,10 +392,12 @@ export class AgentManager { const permBash = getSetting("perm_bash") === "allow"; const permSummon = getSetting("perm_summon") === "allow"; const permUserAgent = getSetting("perm_user_agent") === "allow"; + const permSendToTab = getSetting("perm_send_to_tab") === "allow"; + const permReadTab = getSetting("perm_read_tab") === "allow"; const permWebSearch = getSetting("perm_web_search") === "allow"; const permYoutubeTranscribe = getSetting("perm_youtube_transcribe") === "allow"; const sysPrompt = getSetting("system_prompt") ?? ""; - const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permWebSearch}:${permYoutubeTranscribe}:${sysPrompt}`; + const permKey = `${permRead}:${permEdit}:${permBash}:${permSummon}:${permUserAgent}:${permSendToTab}:${permReadTab}:${permWebSearch}:${permYoutubeTranscribe}:${sysPrompt}`; // If the override differs or permissions changed, invalidate the cached agent if ( @@ -504,6 +532,12 @@ export class AgentManager { }), }); } + // Tab-to-tab communication — gated on the child whitelist. + if (allowed.has("send_to_tab") || allowed.has("read_tab")) { + for (const entry of this.buildTabCommToolEntries(tabId)) { + if (allowed.has(entry.name)) toolEntries.push(entry); + } + } } else { // Parent agent: use permission settings from DB if (permRead) { @@ -581,6 +615,14 @@ export class AgentManager { }), }); } + if (permSendToTab || permReadTab) { + const tabCommAllowed = new Set<string>(); + if (permSendToTab) tabCommAllowed.add("send_to_tab"); + if (permReadTab) tabCommAllowed.add("read_tab"); + for (const entry of this.buildTabCommToolEntries(tabId)) { + if (tabCommAllowed.has(entry.name)) toolEntries.push(entry); + } + } } const tools = toolEntries.map((e) => e.tool); @@ -971,14 +1013,18 @@ export class AgentManager { if (!agentDef) { const allDefs = loadAgents(parentEffectiveDir); if (options.topLevel) { - const userAgents = allDefs.filter((d) => !d.is_subagent).map((d) => `${d.slug} (${d.name})`); + const userAgents = allDefs + .filter((d) => !d.is_subagent) + .map((d) => `${d.slug} (${d.name})`); const hint = userAgents.length > 0 ? ` Available user agents: ${userAgents.join(", ")}.` : " No user agent definitions exist yet."; throw new Error(`Agent definition not found: "${options.agentSlug}".${hint}`); } else { - const subagents = allDefs.filter((d) => d.is_subagent).map((d) => `${d.slug} (${d.name})`); + const subagents = allDefs + .filter((d) => d.is_subagent) + .map((d) => `${d.slug} (${d.name})`); const hint = subagents.length > 0 ? ` Available subagents: ${subagents.join(", ")}.` @@ -1147,7 +1193,8 @@ export class AgentManager { if (tabAgent.status === "running") { return { status: "error", - error: "This is a user agent (top-level tab) and cannot be retrieved. User agents are fire-and-forget.", + error: + "This is a user agent (top-level tab) and cannot be retrieved. User agents are fire-and-forget.", }; } return { @@ -1159,6 +1206,214 @@ export class AgentManager { return tabAgent.completionPromise; } + // ─── Tab-to-tab communication ─────────────────────────────────── + // + // `send_to_tab` / `read_tab` let an agent message a peer tab by its short + // handle (a git-style prefix of the tab UUID). Delivery reuses the exact + // running→queue / idle→new-turn routing that `POST /chat` uses (see + // `deliverMessage`), so an agent message behaves identically to a user one. + + /** + * Build the `send_to_tab` + `read_tab` tool entries for `tabId`. Shared by + * both tool-construction paths (child whitelist + permission-gated parent). + * `selfHandle` is computed once so the calling tab can stamp provenance and + * reject self-sends. + */ + private buildTabCommToolEntries( + tabId: string, + ): Array<{ name: string; tool: ReturnType<typeof createSendToTabTool> }> { + const selfHandle = shortestUniquePrefix(tabId); + return [ + { + name: "send_to_tab", + tool: createSendToTabTool({ + resolveShortId: (prefix) => this.resolveTabHandle(prefix), + // origin: "agent" subjects this to the receiver's auto-wake + // budget so agent↔agent loops are bounded (see deliverMessage). + deliver: (targetId, message) => + this.deliverMessage(targetId, message, { origin: "agent" }), + listOpenHandles: () => this.listOpenHandles(tabId), + self: { id: tabId, handle: selfHandle }, + }), + }, + { + name: "read_tab", + tool: createReadTabTool({ + resolveShortId: (prefix) => this.resolveTabHandle(prefix), + getLastResponse: (targetId) => this.getLastTabResponse(targetId), + listOpenHandles: () => this.listOpenHandles(tabId), + }), + }, + ]; + } + + /** + * Project a core `ResolveTabPrefixResult` down to the tool-facing + * `TabResolution` (minimal `{ id, title, handle }` refs). Each match's + * `handle` is recomputed via `shortestUniquePrefix` so the value the tool + * echoes back always matches what the UI currently shows. + */ + private resolveTabHandle(prefix: string): TabResolution { + const res = resolveTabPrefix(prefix); + if (res.status === "none") return { status: "none" }; + if (res.status === "ok") { + return { + status: "ok", + tab: { + id: res.tab.id, + title: res.tab.title, + handle: shortestUniquePrefix(res.tab.id), + }, + }; + } + return { + status: "ambiguous", + matches: res.matches.map((t) => ({ + id: t.id, + title: t.title, + handle: shortestUniquePrefix(t.id), + })), + }; + } + + /** Snapshot of open tabs as `{ handle, title }`, excluding `exceptId` + * (typically the caller's own tab). Drives the "available tabs" hints. */ + private listOpenHandles(exceptId?: string): Array<{ handle: string; title: string }> { + return listOpenTabs() + .filter((t) => t.id !== exceptId) + .map((t) => ({ handle: shortestUniquePrefix(t.id), title: t.title })); + } + + /** + * Return a tab's most recent COMPLETED assistant turn as flat text, plus + * its current status. Reads the persisted chunk log (source of truth) and + * grabs the last `role === "assistant"` group's text chunks. `text` is null + * when no completed assistant turn exists yet. + */ + getLastTabResponse(tabId: string): { text: string | null; status: AgentStatus } { + const status = this.getTabStatus(tabId); + try { + const messages = getMessagesForTab(tabId); + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (!msg || msg.role !== "assistant") continue; + const text = msg.chunks + .filter((c): c is { type: "text"; text: string } => c.type === "text") + .map((c) => c.text) + .join("") + .trim(); + if (text.length > 0) return { text, status }; + } + } catch { + // DB unavailable / tab unknown — fall through to null. + } + return { text: null, status }; + } + + /** + * Deliver `message` to `tabId`, choosing the SAME routing as `POST /chat`: + * - target running → queue it (consumed like a user interrupt). + * - target idle/errored → wake it and start a new turn. + * + * Returns quickly; does NOT block on the turn. Both the HTTP `/chat` path + * and the `send_to_tab` tool call through here so the running/idle decision + * lives in exactly one place. + * + * `opts` carries the per-request knobs `/chat` forwards (key/model, agent + * fallback chain, reasoning effort, working dir, an explicit queue id). The + * `send_to_tab` tool passes none of these — for a cold wake (a tab not in + * `tabAgents`, e.g. after a server restart) the key/model are hydrated from + * the live `TabAgent` if present, else from the persisted tab row. (A cold + * tab keeps its stored key/model but not its full agent-definition fallback + * chain — see plan notes.) + */ + deliverMessage( + tabId: string, + message: string, + opts: { + keyId?: string; + modelId?: string; + agentModels?: Array<{ key_id: string; model_id: string }>; + reasoningEffort?: "none" | "low" | "medium" | "high" | "max"; + workingDirectory?: string; + queueId?: string; + /** + * Who is sending this message. `"human"` (default) is unrestricted + * and REFILLS the target's agent-to-agent auto-wake budget. `"agent"` + * (from the `send_to_tab` tool) is governed by that budget: an + * agent-originated wake of an idle tab consumes one unit, and once the + * budget is exhausted the message is queued WITHOUT starting a turn + * (returned as `suppressed`) so a runaway A↔B loop can't spend tokens + * forever with no human in the loop. + */ + origin?: "human" | "agent"; + } = {}, + ): { status: "queued"; messageId: string } | { status: "started" } | { status: "suppressed" } { + const origin = opts.origin ?? "human"; + + // A human touching the tab clears any accumulated agent-wake throttle: + // the conversation is back under human supervision, so peers get a fresh + // budget of auto-wakes again. + if (origin === "human") { + this._getOrCreateTabAgent(tabId).autoWakeBudget = MAX_AGENT_AUTO_WAKES; + } + + if (this.getTabStatus(tabId) === "running") { + // Busy target → always queue (consumed like a user interrupt), + // regardless of origin. Queuing does not itself start a turn, so it + // can't drive a runaway loop; we don't spend budget here. + const { messageId } = this.queueMessage(tabId, message, opts.queueId); + return { status: "queued", messageId }; + } + + // Idle/errored target → this delivery would WAKE the tab (start a turn). + // For agent-originated wakes, enforce the auto-wake budget first. + if (origin === "agent") { + const target = this._getOrCreateTabAgent(tabId); + if (target.autoWakeBudget <= 0) { + // Budget exhausted: preserve the message (queue it, never drop) + // but do NOT wake the tab. A human message will refill the budget + // and the queued message will be seen on the next human turn. + this.queueMessage(tabId, message, opts.queueId); + const notice = + `Automatic agent-to-agent message limit reached for this tab ` + + `(${MAX_AGENT_AUTO_WAKES} consecutive). Further messages from other tabs ` + + `are held until you send a message here.`; + this.emit({ type: "notice", message: notice }, tabId); + this.routeSystemEventToTab(tabId, "notice", notice); + return { status: "suppressed" }; + } + target.autoWakeBudget -= 1; + } + + // Resolve key/model: explicit opts win, then the live tab agent's, then + // the persisted row's. + const tabAgent = this.tabAgents.get(tabId); + let keyId = opts.keyId ?? tabAgent?.keyId ?? undefined; + let modelId = opts.modelId ?? tabAgent?.modelId ?? undefined; + const agentModels = opts.agentModels ?? tabAgent?.agentModels; + if (!keyId || !modelId) { + const row = getTab(tabId); + if (row) { + keyId = keyId ?? row.keyId ?? undefined; + modelId = modelId ?? row.modelId ?? undefined; + } + } + + this.processMessage( + tabId, + message, + keyId, + modelId, + opts.reasoningEffort, + opts.workingDirectory, + agentModels, + ).catch((err) => { + console.error(`[dispatch] deliverMessage processMessage error for tab ${tabId}:`, err); + }); + return { status: "started" }; + } + async processMessage( tabId: string, message: string, diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index 73d3de5..19cc193 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -56,28 +56,33 @@ app.post("/chat", async (c) => { return c.json({ error: "message must be a non-empty string" }, 400); } - if (agentManager.getTabStatus(tabId) === "running") { - const queueId = typeof body.queueId === "string" ? body.queueId : undefined; - const { messageId } = agentManager.queueMessage(tabId, message, queueId); - return c.json({ status: "queued", messageId }); - } - const keyId = typeof body.keyId === "string" ? body.keyId : undefined; const modelId = typeof body.modelId === "string" ? body.modelId : undefined; const agentModels = Array.isArray(body.agentModels) ? body.agentModels : undefined; const workingDirectory = typeof body.workingDirectory === "string" ? body.workingDirectory : undefined; + const queueId = typeof body.queueId === "string" ? body.queueId : undefined; const validEfforts = ["none", "low", "medium", "high", "max"]; const reasoningEffort = typeof body.reasoningEffort === "string" && validEfforts.includes(body.reasoningEffort) ? (body.reasoningEffort as "none" | "low" | "medium" | "high" | "max") : undefined; - // Non-blocking — let the agent run in the background - agentManager - .processMessage(tabId, message, keyId, modelId, reasoningEffort, workingDirectory, agentModels) - .catch(console.error); + // Single routing decision (queue if busy, new turn if idle) shared with the + // `send_to_tab` tool via `AgentManager.deliverMessage`. Non-blocking — a + // started turn runs in the background. + const outcome = agentManager.deliverMessage(tabId, message, { + ...(keyId ? { keyId } : {}), + ...(modelId ? { modelId } : {}), + ...(agentModels ? { agentModels } : {}), + ...(reasoningEffort ? { reasoningEffort } : {}), + ...(workingDirectory !== undefined ? { workingDirectory } : {}), + ...(queueId ? { queueId } : {}), + }); + if (outcome.status === "queued") { + return c.json({ status: "queued", messageId: outcome.messageId }); + } return c.json({ status: "ok" }); }); diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index 4415bbb..1358eb1 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -24,6 +24,39 @@ function resetFakeMessages(): void { function setFakeMessages(tabId: string, rows: FakeMessageRow[]): void { fakeMessagesByTab.set(tabId, rows); } + +// Configurable stub for the tabs DB (getTab / listOpenTabs). Tests can seed +// rows to exercise deliverMessage cold-hydration and handle resolution. +interface FakeTabRow { + id: string; + title: string; + keyId: string | null; + modelId: string | null; + parentTabId: string | null; + status: string; + isOpen: boolean; + position: number; + createdAt: number; + updatedAt: number; +} +const fakeTabs = new Map<string, FakeTabRow>(); +function resetFakeTabs(): void { + fakeTabs.clear(); +} +function setFakeTab(row: Partial<FakeTabRow> & { id: string }): void { + fakeTabs.set(row.id, { + title: "Tab", + keyId: null, + modelId: null, + parentTabId: null, + status: "idle", + isOpen: true, + position: 0, + createdAt: 0, + updatedAt: 0, + ...row, + }); +} function makeRow( tabId: string, seq: number, @@ -42,11 +75,22 @@ function makeRow( // because the production code reassigns `agent.messages = // rows.slice(...)` AFTER `new Agent()` returns — capturing a // reference at construction would yield a stale empty array. -const constructedAgents: Array<{ initialMessages: unknown[] }> = []; +const constructedAgents: Array<{ initialMessages: unknown[]; toolNames: string[] }> = []; function resetConstructedAgents(): void { constructedAgents.length = 0; } +// Configurable settings store so tests can toggle tool permissions +// (perm_send_to_tab / perm_read_tab / ...) and assert which tools the +// constructed Agent receives. Defaults to empty (getSetting → null). +const fakeSettings = new Map<string, string>(); +function resetFakeSettings(): void { + fakeSettings.clear(); +} +function setFakeSetting(key: string, value: string): void { + fakeSettings.set(key, value); +} + // Allow tests to swap in a custom `run` generator (e.g. to simulate // a fallback failure mid-stream). Returning to undefined restores // the default. @@ -87,12 +131,19 @@ vi.mock("@dispatch/core", () => ({ Agent: class MockAgent { status = "idle"; messages: unknown[] = []; + toolNames: string[] = []; + constructor(config: { tools?: Array<{ name: string }> }) { + this.toolNames = (config?.tools ?? []).map((t) => t.name); + } async *run(message: string): AsyncGenerator<unknown> { // Snapshot the post-construction pre-populated message list // the first thing `run()` does, before the real `Agent.run` // would push the current user message at line 546. Tests // inspect this to verify history was loaded correctly. - constructedAgents.push({ initialMessages: [...this.messages] }); + constructedAgents.push({ + initialMessages: [...this.messages], + toolNames: [...this.toolNames], + }); if (runImpl) { for await (const ev of runImpl(message)) yield ev; return; @@ -244,6 +295,41 @@ vi.mock("@dispatch/core", () => ({ }; }, createTab() {}, + getTab(id: string) { + return fakeTabs.get(id) ?? null; + }, + listOpenTabs() { + return [...fakeTabs.values()].filter((t) => t.isOpen); + }, + resolveTabPrefix(prefix: string) { + const sanitized = (prefix ?? "").toLowerCase().replace(/[^0-9a-f-]/g, ""); + if (sanitized.length < 4) return { status: "none" }; + const matches = [...fakeTabs.values()].filter( + (t) => t.isOpen && t.id.toLowerCase().startsWith(sanitized), + ); + if (matches.length === 0) return { status: "none" }; + if (matches.length === 1) return { status: "ok", tab: matches[0] }; + return { status: "ambiguous", matches }; + }, + shortestUniquePrefix(id: string) { + return (id ?? "").slice(0, 4); + }, + createSendToTabTool(_callbacks: unknown): ToolDefinition { + return { + name: "send_to_tab", + description: "send to tab", + parameters: { _type: "z.ZodObject", shape: {} } as unknown as ToolDefinition["parameters"], + execute: async () => "mock", + }; + }, + createReadTabTool(_callbacks: unknown): ToolDefinition { + return { + name: "read_tab", + description: "read tab", + parameters: { _type: "z.ZodObject", shape: {} } as unknown as ToolDefinition["parameters"], + execute: async () => "mock", + }; + }, getClaudeAccountsFromDB() { return []; }, @@ -256,8 +342,8 @@ vi.mock("@dispatch/core", () => ({ resolveApiKey() { return null; }, - getSetting(_key: string) { - return null; + getSetting(key: string) { + return fakeSettings.get(key) ?? null; }, appendChunks() { return []; @@ -316,6 +402,8 @@ describe("AgentManager", () => { beforeEach(() => { resetFakeMessages(); resetConstructedAgents(); + resetFakeTabs(); + resetFakeSettings(); setRunImpl(null); appendEventToChunksSpy.mockClear(); }); @@ -849,4 +937,273 @@ describe("AgentManager", () => { expect(snap["tab-early"]).not.toHaveProperty("currentChunks"); expect(snap["tab-early"]).not.toHaveProperty("currentAssistantId"); }); + + // ─── Tab-to-tab communication ───────────────────────────────── + + describe("deliverMessage", () => { + it("starts a new turn when the target tab is idle", async () => { + const manager = new AgentManager(); + const events: AgentEvent[] = []; + manager.onEvent((e) => events.push(e)); + + const outcome = manager.deliverMessage("tab-idle", "wake up"); + expect(outcome.status).toBe("started"); + + // Let the background turn run to completion. + await new Promise<void>((r) => setTimeout(r, 60)); + expect(events.some((e) => e.type === "text-delta")).toBe(true); + expect(manager.getTabStatus("tab-idle")).toBe("idle"); + }); + + it("queues the message when the target tab is running", () => { + const manager = new AgentManager(); + const inner = manager as unknown as { + tabAgents: Map<string, Record<string, unknown>>; + }; + // Seed a running tab agent directly. + inner.tabAgents.set("tab-busy", { + agent: null, + status: "running", + keyId: null, + modelId: null, + taskList: { onChange: () => {} }, + messageQueue: [], + queueListeners: [], + shellStore: {}, + transcriptStore: {}, + currentChunks: null, + currentAssistantId: null, + currentTurnId: null, + }); + + const outcome = manager.deliverMessage("tab-busy", "queued msg"); + expect(outcome.status).toBe("queued"); + if (outcome.status === "queued") { + expect(typeof outcome.messageId).toBe("string"); + } + // The message landed on the running tab's queue. + const agent = inner.tabAgents.get("tab-busy") as { messageQueue: unknown[] }; + expect(agent.messageQueue).toHaveLength(1); + }); + + it("hydrates key/model from the persisted tab row for a cold wake", () => { + const manager = new AgentManager(); + setFakeTab({ id: "tab-cold", keyId: "persisted-key", modelId: "persisted-model" }); + + // Spy on processMessage to capture the key/model deliverMessage + // forwarded — asserting the hydration decision directly rather than + // downstream tabAgent state (which the mocked ModelRegistry rewrites). + const spy = vi.spyOn(manager, "processMessage").mockResolvedValue(undefined); + + const outcome = manager.deliverMessage("tab-cold", "hello"); + expect(outcome.status).toBe("started"); + + expect(spy).toHaveBeenCalledTimes(1); + const args = spy.mock.calls[0] ?? []; + expect(args[0]).toBe("tab-cold"); // tabId + expect(args[1]).toBe("hello"); // message + expect(args[2]).toBe("persisted-key"); // keyId hydrated from row + expect(args[3]).toBe("persisted-model"); // modelId hydrated from row + }); + + it("prefers explicit opts over the persisted row on a cold wake", () => { + const manager = new AgentManager(); + setFakeTab({ id: "tab-cold2", keyId: "row-key", modelId: "row-model" }); + const spy = vi.spyOn(manager, "processMessage").mockResolvedValue(undefined); + + manager.deliverMessage("tab-cold2", "hello", { + keyId: "explicit-key", + modelId: "explicit-model", + }); + + const args = spy.mock.calls[0] ?? []; + expect(args[2]).toBe("explicit-key"); + expect(args[3]).toBe("explicit-model"); + }); + }); + + describe("deliverMessage — agent auto-wake budget", () => { + it("allows up to 6 consecutive agent wakes, then suppresses further ones", () => { + const manager = new AgentManager(); + const spy = vi.spyOn(manager, "processMessage").mockResolvedValue(undefined); + + // 6 agent-originated wakes of an idle tab should all start turns. + for (let i = 0; i < 6; i++) { + const outcome = manager.deliverMessage("tab-pp", `msg ${i}`, { origin: "agent" }); + expect(outcome.status).toBe("started"); + } + expect(spy).toHaveBeenCalledTimes(6); + + // The 7th is suppressed: no new turn, message preserved on the queue. + const seventh = manager.deliverMessage("tab-pp", "msg 7", { origin: "agent" }); + expect(seventh.status).toBe("suppressed"); + expect(spy).toHaveBeenCalledTimes(6); // unchanged — no wake + + const inner = manager as unknown as { + tabAgents: Map<string, { messageQueue: unknown[]; autoWakeBudget: number }>; + }; + const agent = inner.tabAgents.get("tab-pp"); + expect(agent?.autoWakeBudget).toBe(0); + // Suppressed message is queued, not dropped. + expect(agent?.messageQueue).toHaveLength(1); + }); + + it("a human message refills the budget and re-enables agent wakes", () => { + const manager = new AgentManager(); + vi.spyOn(manager, "processMessage").mockResolvedValue(undefined); + + // Exhaust the budget with agent wakes. + for (let i = 0; i < 6; i++) { + manager.deliverMessage("tab-refill", `a${i}`, { origin: "agent" }); + } + expect(manager.deliverMessage("tab-refill", "blocked", { origin: "agent" }).status).toBe( + "suppressed", + ); + + // A human message refills the budget... + const humanOutcome = manager.deliverMessage("tab-refill", "human here", { + origin: "human", + }); + expect(humanOutcome.status).toBe("started"); + + const inner = manager as unknown as { + tabAgents: Map<string, { autoWakeBudget: number }>; + }; + expect(inner.tabAgents.get("tab-refill")?.autoWakeBudget).toBe(6); + + // ...so an agent can wake it again. + expect(manager.deliverMessage("tab-refill", "again", { origin: "agent" }).status).toBe( + "started", + ); + }); + + it("does not consume budget when the message is merely queued (busy target)", () => { + const manager = new AgentManager(); + const inner = manager as unknown as { + tabAgents: Map<string, Record<string, unknown>>; + }; + inner.tabAgents.set("tab-busy-budget", { + agent: null, + status: "running", + keyId: null, + modelId: null, + taskList: { onChange: () => {} }, + messageQueue: [], + queueListeners: [], + shellStore: {}, + transcriptStore: {}, + currentChunks: null, + currentAssistantId: null, + currentTurnId: null, + autoWakeBudget: 6, + }); + + const outcome = manager.deliverMessage("tab-busy-budget", "queued one", { + origin: "agent", + }); + expect(outcome.status).toBe("queued"); + // Budget untouched — queuing can't drive a runaway loop. + const agent = inner.tabAgents.get("tab-busy-budget") as { autoWakeBudget: number }; + expect(agent.autoWakeBudget).toBe(6); + }); + + it("human-originated wakes are never throttled", () => { + const manager = new AgentManager(); + const spy = vi.spyOn(manager, "processMessage").mockResolvedValue(undefined); + + // Far more than the budget, all human-originated → all start turns. + for (let i = 0; i < 10; i++) { + const outcome = manager.deliverMessage("tab-human", `h${i}`, { origin: "human" }); + expect(outcome.status).toBe("started"); + } + expect(spy).toHaveBeenCalledTimes(10); + }); + + it("defaults origin to human when unspecified (POST /chat path)", () => { + const manager = new AgentManager(); + const spy = vi.spyOn(manager, "processMessage").mockResolvedValue(undefined); + for (let i = 0; i < 8; i++) { + expect(manager.deliverMessage("tab-default", `d${i}`).status).toBe("started"); + } + expect(spy).toHaveBeenCalledTimes(8); + }); + }); + + describe("getLastTabResponse", () => { + it("returns the most recent assistant turn's text and current status", () => { + const manager = new AgentManager(); + setFakeMessages("tab-hist", [ + makeRow("tab-hist", 1, "user", [{ type: "text", text: "hi" }]), + makeRow("tab-hist", 2, "assistant", [{ type: "text", text: "first answer" }]), + makeRow("tab-hist", 3, "user", [{ type: "text", text: "again" }]), + makeRow("tab-hist", 4, "assistant", [ + { type: "text", text: "second " }, + { type: "text", text: "answer" }, + ]), + ]); + + const res = manager.getLastTabResponse("tab-hist"); + expect(res.text).toBe("second answer"); + expect(res.status).toBe("idle"); + }); + + it("returns null text when the tab has no assistant turn yet", () => { + const manager = new AgentManager(); + setFakeMessages("tab-empty", [ + makeRow("tab-empty", 1, "user", [{ type: "text", text: "hi" }]), + ]); + const res = manager.getLastTabResponse("tab-empty"); + expect(res.text).toBeNull(); + }); + + it("skips assistant turns that contain no text chunks", () => { + const manager = new AgentManager(); + setFakeMessages("tab-toolonly", [ + makeRow("tab-toolonly", 1, "assistant", [{ type: "text", text: "real answer" }]), + // A later assistant turn with only non-text chunks should be skipped. + makeRow("tab-toolonly", 2, "assistant", [{ type: "thinking", text: "hmm" }]), + ]); + const res = manager.getLastTabResponse("tab-toolonly"); + expect(res.text).toBe("real answer"); + }); + }); + + describe("send_to_tab / read_tab permission split", () => { + // Drives the real parent-path tool construction in getOrCreateAgentForTab + // by toggling the new split permissions and inspecting which tools the + // constructed Agent received. + async function toolsForPerms(tabId: string, perms: Record<string, string>): Promise<string[]> { + for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v); + const manager = new AgentManager(); + await manager.processMessage(tabId, "go"); + return constructedAgents.at(-1)?.toolNames ?? []; + } + + it("grants only send_to_tab when only perm_send_to_tab is allowed", async () => { + const tools = await toolsForPerms("tab-send-only", { perm_send_to_tab: "allow" }); + expect(tools).toContain("send_to_tab"); + expect(tools).not.toContain("read_tab"); + }); + + it("grants only read_tab when only perm_read_tab is allowed", async () => { + const tools = await toolsForPerms("tab-read-only", { perm_read_tab: "allow" }); + expect(tools).toContain("read_tab"); + expect(tools).not.toContain("send_to_tab"); + }); + + it("grants both when both permissions are allowed", async () => { + const tools = await toolsForPerms("tab-both", { + perm_send_to_tab: "allow", + perm_read_tab: "allow", + }); + expect(tools).toContain("send_to_tab"); + expect(tools).toContain("read_tab"); + }); + + it("grants neither when both permissions are off", async () => { + const tools = await toolsForPerms("tab-neither", {}); + expect(tools).not.toContain("send_to_tab"); + expect(tools).not.toContain("read_tab"); + }); + }); }); diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts index 4b8dd40..9ab2afe 100644 --- a/packages/api/tests/routes.test.ts +++ b/packages/api/tests/routes.test.ts @@ -166,6 +166,34 @@ vi.mock("@dispatch/core", () => ({ }; }, createTab() {}, + getTab() { + return null; + }, + listOpenTabs() { + return []; + }, + resolveTabPrefix() { + return { status: "none" }; + }, + shortestUniquePrefix(id: string) { + return (id ?? "").slice(0, 4); + }, + createSendToTabTool(_callbacks: unknown) { + return { + name: "send_to_tab", + description: "send to tab", + parameters: { _type: "z.ZodObject", shape: {} }, + execute: async () => "mock", + }; + }, + createReadTabTool(_callbacks: unknown) { + return { + name: "read_tab", + description: "read tab", + parameters: { _type: "z.ZodObject", shape: {} }, + execute: async () => "mock", + }; + }, getClaudeAccountsFromDB() { return []; }, |
