diff options
| author | Adam Malczewski <[email protected]> | 2026-06-02 16:06:13 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-02 16:06:13 +0900 |
| commit | b3aca3efe9e8cda79db6e2c7fa20482880ed16c3 (patch) | |
| tree | 3480c1e670d78040bb03a9ec930d815575efc463 /packages | |
| parent | 1541e8d9ecc305bb27cf004cb919ef9065eca8be (diff) | |
| parent | 2b57c1af0247954ccf57d9ba3b0f4a45502ef3da (diff) | |
| download | dispatch-b3aca3efe9e8cda79db6e2c7fa20482880ed16c3.tar.gz dispatch-b3aca3efe9e8cda79db6e2c7fa20482880ed16c3.zip | |
Merge branch 'dev' into feat/plus-button-sticky
Diffstat (limited to 'packages')
| -rw-r--r-- | packages/api/src/agent-manager.ts | 51 | ||||
| -rw-r--r-- | packages/api/tests/agent-manager.test.ts | 132 | ||||
| -rw-r--r-- | packages/core/src/tools/send-to-tab.ts | 61 | ||||
| -rw-r--r-- | packages/core/src/tools/summon.ts | 163 | ||||
| -rw-r--r-- | packages/core/tests/tools/send-to-tab.test.ts | 47 | ||||
| -rw-r--r-- | packages/core/tests/tools/summon.test.ts | 108 |
6 files changed, 491 insertions, 71 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 85dd160..2795a6c 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -83,6 +83,10 @@ const TOOL_DESCRIPTIONS: Record<string, string> = { web_search: "Search the web and optionally scrape full page content from results.", youtube_transcribe: "Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.", + send_to_tab: + "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — if the target replies it will wake you with a new message in a later turn; if you are only waiting, end your turn.", + read_tab: + "Read another tab (agent)'s most recent completed response by its short ID. Returns a non-blocking snapshot; if the target is still running you get its previous completed turn. Use after send_to_tab to collect a reply.", }; /** @@ -542,7 +546,7 @@ export class AgentManager { } // Tab-to-tab communication — gated on the child whitelist. if (allowed.has("send_to_tab") || allowed.has("read_tab")) { - for (const entry of this.buildTabCommToolEntries(tabId)) { + for (const entry of this.buildTabCommToolEntries(tabId, allowed.has("read_tab"))) { if (allowed.has(entry.name)) toolEntries.push(entry); } } @@ -575,7 +579,13 @@ export class AgentManager { }); } toolEntries.push({ name: "todo", tool: createTaskListTool(tabAgent.taskList) }); - if (permSummon) { + // The `summon` tool is registered when EITHER the subagent + // permission (`perm_summon`) OR the user-agent permission + // (`perm_user_agent`) is granted — the two are independent. + // `perm_summon` enables ordinary subagent spawning; granting + // only `perm_user_agent` exposes summon in user-agent-only mode + // (spawns top-level user agents exclusively). + if (permSummon || permUserAgent) { // Capture parent's allowed tool names for child permission enforcement const parentAllowedTools = new Set(toolEntries.map((e) => e.name)); const allAgentDefs = loadAgents(workingDirectory); @@ -609,25 +619,31 @@ export class AgentManager { availableUserAgents, agentDirPaths, permUserAgent, + permSummon, ), }); - toolEntries.push({ - name: "retrieve", - tool: createRetrieveTool({ - getResult: (id) => - tabAgent.shellStore.has(id) - ? tabAgent.shellStore.getResult(id) - : tabAgent.transcriptStore.has(id) - ? tabAgent.transcriptStore.getResult(id) - : this.getChildResult(id), - }), - }); + // `retrieve` collects subagent results. User agents are + // fire-and-forget, so it is bundled with the subagent + // permission only — a user-agent-only grant doesn't get it. + if (permSummon) { + toolEntries.push({ + name: "retrieve", + tool: createRetrieveTool({ + getResult: (id) => + tabAgent.shellStore.has(id) + ? tabAgent.shellStore.getResult(id) + : tabAgent.transcriptStore.has(id) + ? tabAgent.transcriptStore.getResult(id) + : this.getChildResult(id), + }), + }); + } } if (permSendToTab || permReadTab) { const tabCommAllowed = new Set<string>(); if (permSendToTab) tabCommAllowed.add("send_to_tab"); if (permReadTab) tabCommAllowed.add("read_tab"); - for (const entry of this.buildTabCommToolEntries(tabId)) { + for (const entry of this.buildTabCommToolEntries(tabId, permReadTab)) { if (tabCommAllowed.has(entry.name)) toolEntries.push(entry); } } @@ -1237,9 +1253,15 @@ export class AgentManager { * both tool-construction paths (child whitelist + permission-gated parent). * `selfHandle` is computed once so the calling tab can stamp provenance and * reject self-sends. + * + * `canReadTab` reflects whether THIS tab will also be granted `read_tab` + * (the permissions are split). It is forwarded into `send_to_tab` so the + * tool only points the agent at `read_tab` when it actually has it — never + * advertising a tool the agent wasn't granted. */ private buildTabCommToolEntries( tabId: string, + canReadTab: boolean, ): Array<{ name: string; tool: ReturnType<typeof createSendToTabTool> }> { const selfHandle = shortestUniquePrefix(tabId); return [ @@ -1253,6 +1275,7 @@ export class AgentManager { this.deliverMessage(targetId, message, { origin: "agent" }), listOpenHandles: () => this.listOpenHandles(tabId), self: { id: tabId, handle: selfHandle }, + canReadTab, }), }, { diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index 014022a..3353aff 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -75,7 +75,11 @@ function makeRow( // because the production code reassigns `agent.messages = // rows.slice(...)` AFTER `new Agent()` returns — capturing a // reference at construction would yield a stale empty array. -const constructedAgents: Array<{ initialMessages: unknown[]; toolNames: string[] }> = []; +const constructedAgents: Array<{ + initialMessages: unknown[]; + toolNames: string[]; + systemPrompt: string; +}> = []; function resetConstructedAgents(): void { constructedAgents.length = 0; } @@ -159,8 +163,10 @@ vi.mock("@dispatch/core", () => ({ status = "idle"; messages: unknown[] = []; toolNames: string[] = []; - constructor(config: { tools?: Array<{ name: string }> }) { + systemPrompt = ""; + constructor(config: { tools?: Array<{ name: string }>; systemPrompt?: string }) { this.toolNames = (config?.tools ?? []).map((t) => t.name); + this.systemPrompt = config?.systemPrompt ?? ""; } async *run(message: string, options?: { reasoningEffort?: string }): AsyncGenerator<unknown> { // Snapshot the post-construction pre-populated message list @@ -170,6 +176,7 @@ vi.mock("@dispatch/core", () => ({ constructedAgents.push({ initialMessages: [...this.messages], toolNames: [...this.toolNames], + systemPrompt: this.systemPrompt, }); capturedRunOptions.push(options); if (runImpl) { @@ -319,6 +326,22 @@ vi.mock("@dispatch/core", () => ({ execute: async () => "mock", }; }, + // Summon parent-path dependencies. The real implementations load agent + // definitions from disk; tests only need the summon/retrieve tool entries + // to appear, so these return empty projections. + loadAgents() { + return []; + }, + toAvailableSubagents() { + return []; + }, + toAvailableUserAgents() { + return []; + }, + getAgentDirPaths() { + return []; + }, + GLOBAL_AGENTS_DIR: "/tmp/global-agents", createTab() {}, getTab(id: string) { return fakeTabs.get(id) ?? null; @@ -1441,6 +1464,111 @@ describe("AgentManager", () => { }); }); + describe("summon / user_agent permission split", () => { + // Drives the real parent-path tool construction in + // getOrCreateAgentForTab by toggling perm_summon and perm_user_agent + // independently, then inspecting which tools the constructed Agent + // received. The summon tool must be registered when EITHER permission + // is granted; `retrieve` rides with the subagent permission only + // (user agents are fire-and-forget). + async function toolsForPerms(tabId: string, perms: Record<string, string>): Promise<string[]> { + for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v); + const manager = new AgentManager(); + await manager.processMessage(tabId, "go"); + return constructedAgents.at(-1)?.toolNames ?? []; + } + + it("grants summon + retrieve when only perm_summon is allowed", async () => { + const tools = await toolsForPerms("tab-summon-only", { perm_summon: "allow" }); + expect(tools).toContain("summon"); + expect(tools).toContain("retrieve"); + }); + + it("grants summon WITHOUT retrieve when only perm_user_agent is allowed", async () => { + // Regression: granting only the user-agent permission used to leave + // the agent unable to summon user agents because the whole summon + // tool was gated behind perm_summon. + const tools = await toolsForPerms("tab-user-agent-only", { perm_user_agent: "allow" }); + expect(tools).toContain("summon"); + expect(tools).not.toContain("retrieve"); + }); + + it("grants summon + retrieve when both permissions are allowed", async () => { + const tools = await toolsForPerms("tab-summon-both", { + perm_summon: "allow", + perm_user_agent: "allow", + }); + expect(tools).toContain("summon"); + expect(tools).toContain("retrieve"); + }); + + it("grants neither summon nor retrieve when both permissions are off", async () => { + const tools = await toolsForPerms("tab-summon-neither", {}); + expect(tools).not.toContain("summon"); + expect(tools).not.toContain("retrieve"); + }); + }); + + // Regression: granted tab-messaging tools must also be ADVERTISED in the + // agent's system prompt. The tools were registered in the API tool payload + // but `buildSystemPrompt` filtered its "You have access to the following + // tools" list through TOOL_DESCRIPTIONS, which lacked send_to_tab/read_tab + // — so the model was told it didn't have them and refused to use them. This + // locks the prompt's capability list to the granted toolset. + describe("send_to_tab / read_tab system-prompt advertisement", () => { + async function promptForPerms(tabId: string, perms: Record<string, string>): Promise<string> { + for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v); + const manager = new AgentManager(); + await manager.processMessage(tabId, "go"); + return constructedAgents.at(-1)?.systemPrompt ?? ""; + } + + it("lists send_to_tab in the system prompt when granted", async () => { + const prompt = await promptForPerms("tab-prompt-send", { perm_send_to_tab: "allow" }); + expect(prompt).toContain("- send_to_tab:"); + expect(prompt).not.toContain("- read_tab:"); + }); + + it("lists read_tab in the system prompt when granted", async () => { + const prompt = await promptForPerms("tab-prompt-read", { perm_read_tab: "allow" }); + expect(prompt).toContain("- read_tab:"); + expect(prompt).not.toContain("- send_to_tab:"); + }); + + it("lists both tab-messaging tools when both are granted", async () => { + const prompt = await promptForPerms("tab-prompt-both", { + perm_send_to_tab: "allow", + perm_read_tab: "allow", + }); + expect(prompt).toContain("- send_to_tab:"); + expect(prompt).toContain("- read_tab:"); + }); + + it("omits both from the system prompt when neither is granted", async () => { + const prompt = await promptForPerms("tab-prompt-neither", {}); + expect(prompt).not.toContain("- send_to_tab:"); + expect(prompt).not.toContain("- read_tab:"); + }); + + it("advertises exactly the granted tab tools (prompt list matches schema)", async () => { + for (const [k, v] of Object.entries({ + perm_send_to_tab: "allow", + perm_read_tab: "allow", + })) { + setFakeSetting(k, v); + } + const manager = new AgentManager(); + await manager.processMessage("tab-prompt-match", "go"); + const inst = constructedAgents.at(-1); + // Every granted tab-messaging tool surfaced in the schema must also be + // advertised in the prompt, so the model never believes it lacks one. + for (const name of ["send_to_tab", "read_tab"]) { + expect(inst?.toolNames).toContain(name); + expect(inst?.systemPrompt).toContain(`- ${name}:`); + } + }); + }); + // ─── Usage side-channel persistence ────────────────────────────── // // `usage` AgentEvents (one per LLM round-trip) are persisted as invisible diff --git a/packages/core/src/tools/send-to-tab.ts b/packages/core/src/tools/send-to-tab.ts index eb86b7e..eae6bfa 100644 --- a/packages/core/src/tools/send-to-tab.ts +++ b/packages/core/src/tools/send-to-tab.ts @@ -44,6 +44,13 @@ export interface SendToTabCallbacks { /** The calling tab's own id + handle — used to block self-sends and to * stamp provenance onto the delivered message. */ self: { id: string; handle: string }; + /** + * Whether THIS calling tab also has the `read_tab` tool granted. The + * tab-messaging permissions are split, so a tab can hold `send_to_tab` + * without `read_tab`. When false, the tool must NOT tell the agent to use + * `read_tab` (it doesn't have it) — replies only arrive on their own. + */ + canReadTab: boolean; } /** Render the "available tabs" hint shared by the none/ambiguous branches. */ @@ -54,6 +61,19 @@ function renderOpenHandles(handles: Array<{ handle: string; title: string }>): s } export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefinition { + // The `read_tab` follow-up hint is only truthful when this tab actually + // holds the `read_tab` tool (the permissions are split). When it doesn't, + // the only honest guidance is that a reply will wake it as a new message — never tell + // the agent to call a tool it wasn't granted. + const waitLine = callbacks.canReadTab + ? "money. If the target replies it will WAKE you with a new message in a later turn; you" + : "money. If the target replies it will WAKE you with a new message in a later turn."; + const readTabLine = callbacks.canReadTab + ? ["can also call 'read_tab' with the same ID in a FUTURE turn to check. If you have other"] + : []; + const keepGoingLine = callbacks.canReadTab + ? "work to do, keep going; if you are ONLY waiting for the reply, end your turn now." + : "If you have other work to do, keep going; if you are ONLY waiting for the reply, end your turn now."; return { name: "send_to_tab", description: [ @@ -64,9 +84,14 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti " - If the target tab is idle, your message WAKES it and starts a new turn.", "", "This is fire-and-forget: it returns immediately and does NOT wait for a reply.", - "Use the 'read_tab' tool with the same ID later to read the target's latest response.", + "Do NOT sleep, poll, or run shell commands to wait for a reply — that wastes turns and", + waitLine, + ...readTabLine, + keepGoingLine, "", - "Your tab ID is auto-added to the top of the message so the recipient can reply to you.", + "Your tab ID is auto-added to the top of the message so the recipient knows who to reply", + "to. The recipient must use this same 'send_to_tab' tool (addressed to your ID) to answer;", + "a plain text response reaches only their own user, not you.", "IDs are git-style prefixes: pass any length that uniquely identifies the target (min 4 chars).", "If the ID is ambiguous you'll be asked to add a character.", ].join("\n"), @@ -117,8 +142,18 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti } // Stamp provenance so the recipient (and the watching user) can see - // which tab the message came from and reply back via its handle. - const delivered = `[message from tab ${callbacks.self.handle}]\n\n${message}`; + // which tab the message came from and how to reply. The header makes + // clear this is a PEER AGENT, not the recipient's own user, and the + // footer states the reply contract: a reply (only if warranted) must + // go back through `send_to_tab`, since a plain text answer reaches + // only the recipient's own user — not this sender. + const delivered = [ + `[message from tab ${callbacks.self.handle} — this is another agent, NOT your user]`, + "", + message, + "", + `[To reply to tab ${callbacks.self.handle}, use the send_to_tab tool with tab_id "${callbacks.self.handle}". ONLY reply if this message asks you to, or your user tells you to — it may just be context or instructions. A plain text response goes to your own user, not to this agent.]`, + ].join("\n"); try { const result = await callbacks.deliver(target.id, delivered); @@ -138,7 +173,23 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti result.status === "queued" ? "queued (target is busy; it will be picked up next turn)" : "delivered (target was idle; a new turn has started)"; - return `Message ${verb}. Target tab: ${target.handle} (${target.title}). Use read_tab with "${target.handle}" to read its reply later.`; + const tail = callbacks.canReadTab + ? [ + "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", + `will WAKE you with a new message later; you can also call read_tab with "${target.handle}"`, + "in a FUTURE turn to check. Keep working if you have other tasks; if you are ONLY", + "waiting for this reply, end your turn now.", + ] + : [ + "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", + "will WAKE you with a new message later. Keep working if you have other tasks; if", + "you are ONLY waiting for this reply, end your turn now.", + ]; + return [ + `Message ${verb}. Target tab: ${target.handle} (${target.title}).`, + "", + ...tail, + ].join("\n"); } catch (err) { return `Error delivering message: ${err instanceof Error ? err.message : String(err)}`; } diff --git a/packages/core/src/tools/summon.ts b/packages/core/src/tools/summon.ts index 4820e89..cfee8b8 100644 --- a/packages/core/src/tools/summon.ts +++ b/packages/core/src/tools/summon.ts @@ -60,10 +60,13 @@ function renderAgentGroup(label: string, agents: AvailableAgent[]): string[] { * the disk locations where they live, injected into the summon tool's * description. * - * When `userAgentEnabled` is false only subagents are shown (under the - * generic "Available agents" heading). When it is true, subagents and - * user agents are listed as two labelled groups so the LLM understands - * which slugs require `top_level=true`. + * `subagentEnabled` and `userAgentEnabled` independently control which + * groups are shown — they mirror the `perm_summon` and `perm_user_agent` + * permissions respectively: + * - subagents only → generic "Available agents" heading; + * - user agents only → a single user-agent group (top_level is implied); + * - both → two labelled groups so the LLM understands which slugs + * require `top_level=true`. * * Returns a compact "no agents defined" notice when nothing is visible. */ @@ -72,6 +75,7 @@ function buildAgentsCatalog( userAgents: AvailableAgent[], agentDirs: string[], userAgentEnabled: boolean, + subagentEnabled: boolean, ): string { const lines: string[] = []; lines.push(""); @@ -80,8 +84,9 @@ function buildAgentsCatalog( lines.push(` - ${d}`); } + const visibleSubagents = subagentEnabled ? subagents : []; const visibleUserAgents = userAgentEnabled ? userAgents : []; - if (subagents.length === 0 && visibleUserAgents.length === 0) { + if (visibleSubagents.length === 0 && visibleUserAgents.length === 0) { lines.push(""); lines.push("No agent definitions are currently defined."); return lines.join("\n"); @@ -93,12 +98,26 @@ function buildAgentsCatalog( lines.push("and working directory; the 'tools' parameter is ignored."); lines.push(""); + // User-agent-only mode: list just the user agents. top_level is implied + // (it is the only thing this grant can spawn), so the heading omits it. + if (!subagentEnabled && userAgentEnabled) { + lines.push( + ...renderAgentGroup( + "User agents (spawned as independent top-level tabs):", + visibleUserAgents, + ), + ); + return lines.join("\n"); + } + + // Subagent-only mode: single generic heading. if (!userAgentEnabled) { - lines.push(...renderAgentGroup("Available agents:", subagents)); + lines.push(...renderAgentGroup("Available agents:", visibleSubagents)); return lines.join("\n"); } - const subagentLines = renderAgentGroup("Subagents (spawned as child tabs):", subagents); + // Both enabled: two labelled groups. + const subagentLines = renderAgentGroup("Subagents (spawned as child tabs):", visibleSubagents); const userAgentLines = renderAgentGroup( "User agents (spawned as independent top-level tabs, requires top_level=true):", visibleUserAgents, @@ -122,9 +141,14 @@ function buildAgentsCatalog( * its description; this is information-only — the runtime resolves * slugs through `loadAgent` independently. * - * `userAgentEnabled` controls whether the `top_level` parameter and the - * user-agent catalog are surfaced to the LLM. It mirrors the - * `perm_user_agent` permission. + * `userAgentEnabled` mirrors the `perm_user_agent` permission and + * `subagentEnabled` mirrors the `perm_summon` permission. They are + * independent: the tool is registered whenever at least one is granted. + * - subagentEnabled only → spawn ordinary subagents (no `top_level`); + * - userAgentEnabled only → spawn ONLY top-level user agents + * (`top_level` is forced on, the `background` knob is dropped, and + * the catalog lists user agents only); + * - both → full behavior (subagents plus `top_level` user agents). */ export function createSummonTool( _defaultWorkingDirectory: string, @@ -133,39 +157,29 @@ export function createSummonTool( availableUserAgents: AvailableAgent[] = [], agentDirs: string[] = [], userAgentEnabled = false, + subagentEnabled = true, ): ToolDefinition { + // When only the user-agent permission is granted the tool spawns user + // agents exclusively: `top_level` is implied (and forced), subagent + // mechanics (background, retrieve, parallel work) are irrelevant. + const userAgentOnly = userAgentEnabled && !subagentEnabled; + const catalog = buildAgentsCatalog( availableSubagents, availableUserAgents, agentDirs, userAgentEnabled, + subagentEnabled, ); const subagentSlugs = availableSubagents.map((a) => a.slug); const userAgentSlugs = availableUserAgents.map((a) => a.slug); - const allSlugs = userAgentEnabled ? [...subagentSlugs, ...userAgentSlugs] : subagentSlugs; + const allSlugs = userAgentOnly + ? userAgentSlugs + : userAgentEnabled + ? [...subagentSlugs, ...userAgentSlugs] + : subagentSlugs; - const description = [ - "Spawn a new child agent to work on a task independently.", - "", - "By default, blocks until the child agent finishes and returns the result directly.", - "Set background=true to return immediately with an agent_id instead — use retrieve to collect the result later.", - "", - "The child agent runs in its own tab visible to the user. Use the 'retrieve' tool with the returned agent_id to get the result when needed.", - "", - "Pattern for parallel work:", - " 1. Call summon multiple times with background=true to start several agents", - " 2. Do your own work or wait", - " 3. Call retrieve for each agent_id to collect results", - ...(userAgentEnabled - ? [ - "", - "Set top_level=true to spawn an independent user agent — a first-class", - "top-level tab with no parent. User agents are fire-and-forget: you get", - "an agent_id back but cannot retrieve their result. top_level requires an", - "'agent' definition listed under 'User agents' below.", - ] - : []), - "", + const toolNamesList = [ "The 'tools' parameter controls what the child can do. Available tool names:", " - read_file: Read file contents", " - read_file_slice: Read a character-range slice of a single line", @@ -179,11 +193,50 @@ export function createSummonTool( " - youtube_transcribe: Fetch YouTube video transcripts", " - send_to_tab: Send a message to another tab/agent by its ID", " - read_tab: Read another tab/agent's latest response by its ID", - "", - "The 'agent' parameter is required — every spawned agent must use a definition.", - "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).", - catalog, - ].join("\n"); + ]; + + const description = userAgentOnly + ? [ + "Spawn an independent top-level user agent to work on a task.", + "", + "User agents are first-class top-level tabs with no parent. They are", + "fire-and-forget: you get an agent_id back but cannot retrieve their result.", + "The user agent runs in its own tab visible to the user.", + "", + ...toolNamesList, + "", + "The 'agent' parameter is required — every spawned agent must use a definition.", + "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).", + catalog, + ].join("\n") + : [ + "Spawn a new child agent to work on a task independently.", + "", + "By default, blocks until the child agent finishes and returns the result directly.", + "Set background=true to return immediately with an agent_id instead — use retrieve to collect the result later.", + "", + "The child agent runs in its own tab visible to the user. Use the 'retrieve' tool with the returned agent_id to get the result when needed.", + "", + "Pattern for parallel work:", + " 1. Call summon multiple times with background=true to start several agents", + " 2. Do your own work or wait", + " 3. Call retrieve for each agent_id to collect results", + ...(userAgentEnabled + ? [ + "", + "Set top_level=true to spawn an independent user agent — a first-class", + "top-level tab with no parent. User agents are fire-and-forget: you get", + "an agent_id back but cannot retrieve their result. top_level requires an", + "'agent' definition listed under 'User agents' below.", + ] + : []), + "", + ...toolNamesList, + "", + "The 'agent' parameter is required — every spawned agent must use a definition.", + "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).", + catalog, + ].join("\n"); const parametersShape = { task: z @@ -205,7 +258,10 @@ export function createSummonTool( .filter(Boolean) .join(" "), ), - ...(userAgentEnabled + // `top_level` is only an explicit choice when BOTH subagents and user + // agents are available. In user-agent-only mode it is implied (forced + // on), so the knob is omitted entirely. + ...(userAgentEnabled && !userAgentOnly ? { top_level: z .boolean() @@ -248,12 +304,18 @@ export function createSummonTool( .describe( "Absolute path for the child to work in. Defaults to the agent definition's cwd (or the spawning agent's directory).", ), - background: z - .boolean() - .optional() - .describe( - "If true, returns immediately with an agent_id for later retrieval. If false (default), blocks until the child agent finishes and returns the result directly. Ignored when top_level is true.", - ), + // `background` is meaningless for fire-and-forget user agents, so the + // knob is omitted in user-agent-only mode. + ...(userAgentOnly + ? {} + : { + background: z + .boolean() + .optional() + .describe( + "If true, returns immediately with an agent_id for later retrieval. If false (default), blocks until the child agent finishes and returns the result directly. Ignored when top_level is true.", + ), + }), }; return { @@ -266,9 +328,14 @@ export function createSummonTool( const tools = args.tools as string[] | undefined; const workingDirectory = args.working_directory as string | undefined; const background = (args.background as boolean | undefined) ?? false; - const topLevel = userAgentEnabled - ? ((args.top_level as boolean | undefined) ?? false) - : false; + // User-agent-only mode always spawns top-level user agents. When both + // capabilities are present the caller chooses via `top_level`. When + // only subagents are available, top-level spawning is unavailable. + const topLevel = userAgentOnly + ? true + : userAgentEnabled + ? ((args.top_level as boolean | undefined) ?? false) + : false; try { const agentId = await callbacks.spawn({ diff --git a/packages/core/tests/tools/send-to-tab.test.ts b/packages/core/tests/tools/send-to-tab.test.ts index 4450fc5..21d8032 100644 --- a/packages/core/tests/tools/send-to-tab.test.ts +++ b/packages/core/tests/tools/send-to-tab.test.ts @@ -14,6 +14,7 @@ function makeCallbacks(overrides: Partial<SendToTabCallbacks> = {}): SendToTabCa deliver: () => ({ status: "started" }), listOpenHandles: () => [{ handle: "targ", title: "Target" }], self: { id: "self-id", handle: "self" }, + canReadTab: true, ...overrides, }; } @@ -24,6 +25,22 @@ describe("createSendToTabTool — schema & description", () => { expect(tool.name).toBe("send_to_tab"); expect(tool.description).toContain("fire-and-forget"); expect(tool.description.toLowerCase()).toContain("queued"); + // Description must steer the model away from busy-waiting for a reply. + expect(tool.description.toLowerCase()).toContain("do not sleep"); + expect(tool.description.toLowerCase()).toContain("end your turn"); + }); + + it("mentions read_tab in the description only when canReadTab is true", () => { + const tool = createSendToTabTool(makeCallbacks({ canReadTab: true })); + expect(tool.description).toContain("read_tab"); + }); + + it("never mentions read_tab in the description when canReadTab is false", () => { + const tool = createSendToTabTool(makeCallbacks({ canReadTab: false })); + expect(tool.description).not.toContain("read_tab"); + // Still tells the agent a reply will wake it + to end its turn. + expect(tool.description.toLowerCase()).toContain("wake you with a new message"); + expect(tool.description.toLowerCase()).toContain("end your turn"); }); }); @@ -35,11 +52,37 @@ describe("createSendToTabTool — execute()", () => { expect(deliver).toHaveBeenCalledTimes(1); const [targetId, delivered] = deliver.mock.calls[0] ?? []; expect(targetId).toBe("target-id"); - // Provenance prefix names the sending tab's handle. - expect(delivered).toContain("[message from tab self]"); + // Provenance header names the sending tab's handle and marks it as a + // peer agent (not the recipient's own user). + expect(delivered).toContain("[message from tab self"); + expect(delivered).toContain("another agent"); expect(delivered).toContain("hello there"); + // Reply contract: the recipient must answer via send_to_tab back to the + // sender's handle, not as a plain text reply to its own user. + expect(delivered).toContain('send_to_tab tool with tab_id "self"'); + expect(delivered).toContain("ONLY reply if"); expect(out).toContain("idle"); expect(out).toContain("targ"); + // Sender is steered away from busy-waiting and told to end its turn. + expect(out.toLowerCase()).toContain("do not sleep"); + expect(out.toLowerCase()).toContain("end your turn"); + }); + + it("points the sender at read_tab in the result only when canReadTab is true", async () => { + const deliver = vi.fn(() => ({ status: "started" as const })); + const tool = createSendToTabTool(makeCallbacks({ deliver, canReadTab: true })); + const out = await tool.execute({ tab_id: "targ", message: "hi" }); + expect(out).toContain("read_tab"); + }); + + it("omits read_tab from the result when canReadTab is false", async () => { + const deliver = vi.fn(() => ({ status: "started" as const })); + const tool = createSendToTabTool(makeCallbacks({ deliver, canReadTab: false })); + const out = await tool.execute({ tab_id: "targ", message: "hi" }); + expect(out).not.toContain("read_tab"); + // Still steers away from busy-waiting and toward ending the turn. + expect(out.toLowerCase()).toContain("do not sleep"); + expect(out.toLowerCase()).toContain("end your turn"); }); it("reports the queued status when the target is busy", async () => { diff --git a/packages/core/tests/tools/summon.test.ts b/packages/core/tests/tools/summon.test.ts index f59f345..4885a94 100644 --- a/packages/core/tests/tools/summon.test.ts +++ b/packages/core/tests/tools/summon.test.ts @@ -239,3 +239,111 @@ describe("createSummonTool — execute() argument forwarding", () => { expect(getResult).toHaveBeenCalled(); }); }); + +describe("createSummonTool — user-agent-only mode (perm_user_agent without perm_summon)", () => { + // userAgentEnabled=true, subagentEnabled=false → the tool spawns ONLY + // top-level user agents. `top_level` is implied (and forced), the + // subagent/parallel-work prose is dropped, and only the user-agent + // catalog group is shown. + const subagents: AvailableAgent[] = [ + { + slug: "programmer", + name: "Programmer", + description: "Codes things", + path: "/agents/programmer.toml", + }, + ]; + const userAgents: AvailableAgent[] = [ + { + slug: "default", + name: "Default", + description: "Default agent", + path: "/agents/default.toml", + }, + ]; + + function userAgentOnlyTool( + spawn = vi.fn(async () => "ua-1"), + getResult = vi.fn(async () => ({ status: "done" as const, result: "nope" })), + ) { + return { + spawn, + getResult, + tool: createSummonTool( + "/tmp/work", + { spawn, getResult }, + subagents, + userAgents, + ["/agents"], + true, // userAgentEnabled + false, // subagentEnabled + ), + }; + } + + it("describes spawning user agents and omits subagent/parallel-work prose", () => { + const { tool } = userAgentOnlyTool(); + expect(tool.description).toContain("Spawn an independent top-level user agent"); + expect(tool.description).toContain("fire-and-forget"); + expect(tool.description).not.toContain("Pattern for parallel work"); + expect(tool.description).not.toContain("Set background=true"); + }); + + it("lists only the user-agent catalog group, not subagents", () => { + const { tool } = userAgentOnlyTool(); + expect(tool.description).toContain("User agents (spawned as independent top-level tabs):"); + expect(tool.description).toContain("default"); + // Subagents must not be advertised in user-agent-only mode. + expect(tool.description).not.toContain("Subagents (spawned as child tabs):"); + expect(tool.description).not.toContain("- programmer: Programmer"); + }); + + it("only lists user-agent slugs in the 'agent' parameter description", () => { + const { tool } = userAgentOnlyTool(); + const agentParam = (tool.parameters as unknown as { shape: { agent: { description: string } } }) + .shape.agent; + expect(agentParam.description).toContain("default"); + expect(agentParam.description).not.toContain("programmer"); + }); + + it("omits the top_level parameter (it is implied)", () => { + const { tool } = userAgentOnlyTool(); + const shape = (tool.parameters as unknown as { shape: Record<string, unknown> }).shape; + expect("top_level" in shape).toBe(false); + }); + + it("omits the background parameter (user agents are fire-and-forget)", () => { + const { tool } = userAgentOnlyTool(); + const shape = (tool.parameters as unknown as { shape: Record<string, unknown> }).shape; + expect("background" in shape).toBe(false); + }); + + it("forces topLevel=true on spawn even when top_level is not passed", async () => { + const spawn = vi.fn(async () => "ua-99"); + const getResult = vi.fn(async () => ({ status: "done" as const, result: "nope" })); + const { tool } = userAgentOnlyTool(spawn, getResult); + const out = await tool.execute({ task: "do stuff", agent: "default" }); + expect(out).toContain("User agent spawned successfully"); + expect(out).toContain("ua-99"); + expect(out).toContain("fire-and-forget"); + // Never blocks on a result for fire-and-forget user agents. + expect(getResult).not.toHaveBeenCalled(); + const callArg = spawn.mock.calls[0]?.[0]; + expect(callArg).toMatchObject({ topLevel: true, agentSlug: "default" }); + }); +}); + +describe("createSummonTool — subagentEnabled defaults preserve legacy behavior", () => { + it("defaults subagentEnabled=true so omitting it keeps subagent spawning", async () => { + const spawn = vi.fn(async () => "tab-1"); + const getResult = vi.fn(async () => ({ status: "done" as const, result: "child" })); + // No userAgentEnabled/subagentEnabled args → legacy subagent-only mode. + const tool = createSummonTool("/tmp/work", { spawn, getResult }, [], []); + const out = await tool.execute({ task: "x", agent: "programmer" }); + // Foreground subagent summon blocks and returns the child result. + expect(out).toBe("agent_id: tab-1\n\nchild"); + expect(getResult).toHaveBeenCalled(); + const callArg = spawn.mock.calls[0]?.[0]; + expect(callArg).not.toHaveProperty("topLevel"); + }); +}); |
