From 9c89ec9db22d0a7226c36b62640addc00918029b Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Tue, 2 Jun 2026 15:30:42 +0900 Subject: fix(tabs): advertise send_to_tab/read_tab in the agent system prompt Granted tab-messaging tools were registered in the API tool payload but buildSystemPrompt built its 'You have access to the following tools' list by filtering toolNames through TOOL_DESCRIPTIONS, which had no entries for send_to_tab/read_tab. The model was therefore told it lacked those tools and refused to use them even when explicitly granted. Add the two missing TOOL_DESCRIPTIONS entries so the capability list matches the granted toolset. Add regression tests that capture the constructed Agent's systemPrompt and assert the tab-messaging tools are listed when granted (and omitted when not), locking the prompt list to the schema list so they can't drift again. --- packages/api/src/agent-manager.ts | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'packages/api/src') diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 85dd160..36a26f8 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -83,6 +83,10 @@ const TOOL_DESCRIPTIONS: Record = { web_search: "Search the web and optionally scrape full page content from results.", youtube_transcribe: "Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.", + send_to_tab: + "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Use read_tab later to read the target's response.", + read_tab: + "Read another tab (agent)'s most recent completed response by its short ID. Returns a non-blocking snapshot; if the target is still running you get its previous completed turn. Use after send_to_tab to collect a reply.", }; /** -- cgit v1.2.3 From e475e527cd768dc05368a0881a07a84ea140e13e Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Tue, 2 Jun 2026 15:42:00 +0900 Subject: fix(tabs): clearer send_to_tab context to stop busy-wait + wrong-recipient replies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two behavioral problems observed once the tools were usable: 1. The SENDER busy-waited for a reply (ran 'sleep 20' / polled) instead of ending its turn. Tool description, the delivery result text, and the system-prompt one-liner now say plainly: do not sleep/poll/run commands to wait; a reply arrives on its own in a later turn (or via read_tab in a future turn); keep working if there's other work, else end your turn. 2. The RECIPIENT replied to its OWN user in plain text instead of routing the answer back through send_to_tab. The provenance wrapper now states the message is from another AGENT (not your user), and that to reply you must use send_to_tab addressed to the sender's handle — and only if asked, since it may just be context. A plain text answer reaches only your own user. Tests updated for the new wording. --- packages/api/src/agent-manager.ts | 2 +- packages/core/src/tools/send-to-tab.ts | 32 ++++++++++++++++++++++----- packages/core/tests/tools/send-to-tab.test.ts | 16 ++++++++++++-- 3 files changed, 42 insertions(+), 8 deletions(-) (limited to 'packages/api/src') diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 36a26f8..4264884 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -84,7 +84,7 @@ const TOOL_DESCRIPTIONS: Record = { youtube_transcribe: "Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.", send_to_tab: - "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Use read_tab later to read the target's response.", + "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — a reply arrives on its own in a later turn (or use read_tab in a future turn); if you are only waiting, end your turn.", read_tab: "Read another tab (agent)'s most recent completed response by its short ID. Returns a non-blocking snapshot; if the target is still running you get its previous completed turn. Use after send_to_tab to collect a reply.", }; diff --git a/packages/core/src/tools/send-to-tab.ts b/packages/core/src/tools/send-to-tab.ts index eb86b7e..84e5f25 100644 --- a/packages/core/src/tools/send-to-tab.ts +++ b/packages/core/src/tools/send-to-tab.ts @@ -64,9 +64,14 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti " - If the target tab is idle, your message WAKES it and starts a new turn.", "", "This is fire-and-forget: it returns immediately and does NOT wait for a reply.", - "Use the 'read_tab' tool with the same ID later to read the target's latest response.", + "Do NOT sleep, poll, or run shell commands to wait for a reply — that wastes turns and", + "money. If the target replies it arrives on its own as a new message in a later turn; you", + "can also call 'read_tab' with the same ID in a FUTURE turn to check. If you have other", + "work to do, keep going; if you are ONLY waiting for the reply, end your turn now.", "", - "Your tab ID is auto-added to the top of the message so the recipient can reply to you.", + "Your tab ID is auto-added to the top of the message so the recipient knows who to reply", + "to. The recipient must use this same 'send_to_tab' tool (addressed to your ID) to answer;", + "a plain text response reaches only their own user, not you.", "IDs are git-style prefixes: pass any length that uniquely identifies the target (min 4 chars).", "If the ID is ambiguous you'll be asked to add a character.", ].join("\n"), @@ -117,8 +122,18 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti } // Stamp provenance so the recipient (and the watching user) can see - // which tab the message came from and reply back via its handle. - const delivered = `[message from tab ${callbacks.self.handle}]\n\n${message}`; + // which tab the message came from and how to reply. The header makes + // clear this is a PEER AGENT, not the recipient's own user, and the + // footer states the reply contract: a reply (only if warranted) must + // go back through `send_to_tab`, since a plain text answer reaches + // only the recipient's own user — not this sender. + const delivered = [ + `[message from tab ${callbacks.self.handle} — this is another agent, NOT your user]`, + "", + message, + "", + `[To reply to tab ${callbacks.self.handle}, use the send_to_tab tool with tab_id "${callbacks.self.handle}". Only reply if this message asks you to, or your user tells you to — it may just be context or instructions. A plain text response goes to your own user, not to this agent.]`, + ].join("\n"); try { const result = await callbacks.deliver(target.id, delivered); @@ -138,7 +153,14 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti result.status === "queued" ? "queued (target is busy; it will be picked up next turn)" : "delivered (target was idle; a new turn has started)"; - return `Message ${verb}. Target tab: ${target.handle} (${target.title}). Use read_tab with "${target.handle}" to read its reply later.`; + return [ + `Message ${verb}. Target tab: ${target.handle} (${target.title}).`, + "", + "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", + `arrives on its own as a new message later; you can also call read_tab with "${target.handle}"`, + "in a FUTURE turn to check. Keep working if you have other tasks; if you are ONLY", + "waiting for this reply, end your turn now.", + ].join("\n"); } catch (err) { return `Error delivering message: ${err instanceof Error ? err.message : String(err)}`; } diff --git a/packages/core/tests/tools/send-to-tab.test.ts b/packages/core/tests/tools/send-to-tab.test.ts index 4450fc5..68f8fa0 100644 --- a/packages/core/tests/tools/send-to-tab.test.ts +++ b/packages/core/tests/tools/send-to-tab.test.ts @@ -24,6 +24,9 @@ describe("createSendToTabTool — schema & description", () => { expect(tool.name).toBe("send_to_tab"); expect(tool.description).toContain("fire-and-forget"); expect(tool.description.toLowerCase()).toContain("queued"); + // Description must steer the model away from busy-waiting for a reply. + expect(tool.description.toLowerCase()).toContain("do not sleep"); + expect(tool.description.toLowerCase()).toContain("end your turn"); }); }); @@ -35,11 +38,20 @@ describe("createSendToTabTool — execute()", () => { expect(deliver).toHaveBeenCalledTimes(1); const [targetId, delivered] = deliver.mock.calls[0] ?? []; expect(targetId).toBe("target-id"); - // Provenance prefix names the sending tab's handle. - expect(delivered).toContain("[message from tab self]"); + // Provenance header names the sending tab's handle and marks it as a + // peer agent (not the recipient's own user). + expect(delivered).toContain("[message from tab self"); + expect(delivered).toContain("another agent"); expect(delivered).toContain("hello there"); + // Reply contract: the recipient must answer via send_to_tab back to the + // sender's handle, not as a plain text reply to its own user. + expect(delivered).toContain('send_to_tab tool with tab_id "self"'); + expect(delivered.toLowerCase()).toContain("only reply if"); expect(out).toContain("idle"); expect(out).toContain("targ"); + // Sender is steered away from busy-waiting and told to end its turn. + expect(out.toLowerCase()).toContain("do not sleep"); + expect(out.toLowerCase()).toContain("end your turn"); }); it("reports the queued status when the target is busy", async () => { -- cgit v1.2.3 From aa295e82197ebc77d9466eee28380bc5bcc0863d Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Tue, 2 Jun 2026 15:53:15 +0900 Subject: fix(tabs): only mention read_tab when the sender actually has it; CAPS on ONLY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The send_to_tab guidance previously told the agent it could call read_tab to check for a reply, but the tab-messaging permissions are split — a tab can hold send_to_tab WITHOUT read_tab (the exact case in testing). Advertising a tool the agent wasn't granted is wrong. Thread a canReadTab flag from AgentManager.buildTabCommToolEntries into createSendToTabTool (true iff this tab is also granted read_tab). The tool description and the delivery-result text now only reference read_tab when canReadTab is true; otherwise they say a reply arrives on its own and to end the turn. Drop the read_tab phrasing from the static TOOL_DESCRIPTIONS one-liner (can't be conditional per-tab there). Also uppercase ONLY in the recipient reply-contract footer for emphasis. Tests: cover both canReadTab branches for description + result text; assert ONLY is uppercased. --- packages/api/src/agent-manager.ts | 13 ++++++-- packages/core/src/tools/send-to-tab.ts | 45 ++++++++++++++++++++++----- packages/core/tests/tools/send-to-tab.test.ts | 33 +++++++++++++++++++- 3 files changed, 79 insertions(+), 12 deletions(-) (limited to 'packages/api/src') diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 4264884..3d233fc 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -84,7 +84,7 @@ const TOOL_DESCRIPTIONS: Record = { youtube_transcribe: "Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.", send_to_tab: - "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — a reply arrives on its own in a later turn (or use read_tab in a future turn); if you are only waiting, end your turn.", + "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — a reply arrives on its own in a later turn; if you are only waiting, end your turn.", read_tab: "Read another tab (agent)'s most recent completed response by its short ID. Returns a non-blocking snapshot; if the target is still running you get its previous completed turn. Use after send_to_tab to collect a reply.", }; @@ -546,7 +546,7 @@ export class AgentManager { } // Tab-to-tab communication — gated on the child whitelist. if (allowed.has("send_to_tab") || allowed.has("read_tab")) { - for (const entry of this.buildTabCommToolEntries(tabId)) { + for (const entry of this.buildTabCommToolEntries(tabId, allowed.has("read_tab"))) { if (allowed.has(entry.name)) toolEntries.push(entry); } } @@ -631,7 +631,7 @@ export class AgentManager { const tabCommAllowed = new Set(); if (permSendToTab) tabCommAllowed.add("send_to_tab"); if (permReadTab) tabCommAllowed.add("read_tab"); - for (const entry of this.buildTabCommToolEntries(tabId)) { + for (const entry of this.buildTabCommToolEntries(tabId, permReadTab)) { if (tabCommAllowed.has(entry.name)) toolEntries.push(entry); } } @@ -1241,9 +1241,15 @@ export class AgentManager { * both tool-construction paths (child whitelist + permission-gated parent). * `selfHandle` is computed once so the calling tab can stamp provenance and * reject self-sends. + * + * `canReadTab` reflects whether THIS tab will also be granted `read_tab` + * (the permissions are split). It is forwarded into `send_to_tab` so the + * tool only points the agent at `read_tab` when it actually has it — never + * advertising a tool the agent wasn't granted. */ private buildTabCommToolEntries( tabId: string, + canReadTab: boolean, ): Array<{ name: string; tool: ReturnType }> { const selfHandle = shortestUniquePrefix(tabId); return [ @@ -1257,6 +1263,7 @@ export class AgentManager { this.deliverMessage(targetId, message, { origin: "agent" }), listOpenHandles: () => this.listOpenHandles(tabId), self: { id: tabId, handle: selfHandle }, + canReadTab, }), }, { diff --git a/packages/core/src/tools/send-to-tab.ts b/packages/core/src/tools/send-to-tab.ts index 84e5f25..50023a7 100644 --- a/packages/core/src/tools/send-to-tab.ts +++ b/packages/core/src/tools/send-to-tab.ts @@ -44,6 +44,13 @@ export interface SendToTabCallbacks { /** The calling tab's own id + handle — used to block self-sends and to * stamp provenance onto the delivered message. */ self: { id: string; handle: string }; + /** + * Whether THIS calling tab also has the `read_tab` tool granted. The + * tab-messaging permissions are split, so a tab can hold `send_to_tab` + * without `read_tab`. When false, the tool must NOT tell the agent to use + * `read_tab` (it doesn't have it) — replies only arrive on their own. + */ + canReadTab: boolean; } /** Render the "available tabs" hint shared by the none/ambiguous branches. */ @@ -54,6 +61,19 @@ function renderOpenHandles(handles: Array<{ handle: string; title: string }>): s } export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefinition { + // The `read_tab` follow-up hint is only truthful when this tab actually + // holds the `read_tab` tool (the permissions are split). When it doesn't, + // the only honest guidance is that a reply arrives on its own — never tell + // the agent to call a tool it wasn't granted. + const waitLine = callbacks.canReadTab + ? "money. If the target replies it arrives on its own as a new message in a later turn; you" + : "money. If the target replies it arrives on its own as a new message in a later turn."; + const readTabLine = callbacks.canReadTab + ? ["can also call 'read_tab' with the same ID in a FUTURE turn to check. If you have other"] + : []; + const keepGoingLine = callbacks.canReadTab + ? "work to do, keep going; if you are ONLY waiting for the reply, end your turn now." + : "If you have other work to do, keep going; if you are ONLY waiting for the reply, end your turn now."; return { name: "send_to_tab", description: [ @@ -65,9 +85,9 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti "", "This is fire-and-forget: it returns immediately and does NOT wait for a reply.", "Do NOT sleep, poll, or run shell commands to wait for a reply — that wastes turns and", - "money. If the target replies it arrives on its own as a new message in a later turn; you", - "can also call 'read_tab' with the same ID in a FUTURE turn to check. If you have other", - "work to do, keep going; if you are ONLY waiting for the reply, end your turn now.", + waitLine, + ...readTabLine, + keepGoingLine, "", "Your tab ID is auto-added to the top of the message so the recipient knows who to reply", "to. The recipient must use this same 'send_to_tab' tool (addressed to your ID) to answer;", @@ -132,7 +152,7 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti "", message, "", - `[To reply to tab ${callbacks.self.handle}, use the send_to_tab tool with tab_id "${callbacks.self.handle}". Only reply if this message asks you to, or your user tells you to — it may just be context or instructions. A plain text response goes to your own user, not to this agent.]`, + `[To reply to tab ${callbacks.self.handle}, use the send_to_tab tool with tab_id "${callbacks.self.handle}". ONLY reply if this message asks you to, or your user tells you to — it may just be context or instructions. A plain text response goes to your own user, not to this agent.]`, ].join("\n"); try { @@ -153,13 +173,22 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti result.status === "queued" ? "queued (target is busy; it will be picked up next turn)" : "delivered (target was idle; a new turn has started)"; + const tail = callbacks.canReadTab + ? [ + "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", + `arrives on its own as a new message later; you can also call read_tab with "${target.handle}"`, + "in a FUTURE turn to check. Keep working if you have other tasks; if you are ONLY", + "waiting for this reply, end your turn now.", + ] + : [ + "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", + "arrives on its own as a new message later. Keep working if you have other tasks; if", + "you are ONLY waiting for this reply, end your turn now.", + ]; return [ `Message ${verb}. Target tab: ${target.handle} (${target.title}).`, "", - "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", - `arrives on its own as a new message later; you can also call read_tab with "${target.handle}"`, - "in a FUTURE turn to check. Keep working if you have other tasks; if you are ONLY", - "waiting for this reply, end your turn now.", + ...tail, ].join("\n"); } catch (err) { return `Error delivering message: ${err instanceof Error ? err.message : String(err)}`; diff --git a/packages/core/tests/tools/send-to-tab.test.ts b/packages/core/tests/tools/send-to-tab.test.ts index 68f8fa0..48ff460 100644 --- a/packages/core/tests/tools/send-to-tab.test.ts +++ b/packages/core/tests/tools/send-to-tab.test.ts @@ -14,6 +14,7 @@ function makeCallbacks(overrides: Partial = {}): SendToTabCa deliver: () => ({ status: "started" }), listOpenHandles: () => [{ handle: "targ", title: "Target" }], self: { id: "self-id", handle: "self" }, + canReadTab: true, ...overrides, }; } @@ -28,6 +29,19 @@ describe("createSendToTabTool — schema & description", () => { expect(tool.description.toLowerCase()).toContain("do not sleep"); expect(tool.description.toLowerCase()).toContain("end your turn"); }); + + it("mentions read_tab in the description only when canReadTab is true", () => { + const tool = createSendToTabTool(makeCallbacks({ canReadTab: true })); + expect(tool.description).toContain("read_tab"); + }); + + it("never mentions read_tab in the description when canReadTab is false", () => { + const tool = createSendToTabTool(makeCallbacks({ canReadTab: false })); + expect(tool.description).not.toContain("read_tab"); + // Still tells the agent a reply arrives on its own + to end its turn. + expect(tool.description.toLowerCase()).toContain("arrives on its own"); + expect(tool.description.toLowerCase()).toContain("end your turn"); + }); }); describe("createSendToTabTool — execute()", () => { @@ -46,7 +60,7 @@ describe("createSendToTabTool — execute()", () => { // Reply contract: the recipient must answer via send_to_tab back to the // sender's handle, not as a plain text reply to its own user. expect(delivered).toContain('send_to_tab tool with tab_id "self"'); - expect(delivered.toLowerCase()).toContain("only reply if"); + expect(delivered).toContain("ONLY reply if"); expect(out).toContain("idle"); expect(out).toContain("targ"); // Sender is steered away from busy-waiting and told to end its turn. @@ -54,6 +68,23 @@ describe("createSendToTabTool — execute()", () => { expect(out.toLowerCase()).toContain("end your turn"); }); + it("points the sender at read_tab in the result only when canReadTab is true", async () => { + const deliver = vi.fn(() => ({ status: "started" as const })); + const tool = createSendToTabTool(makeCallbacks({ deliver, canReadTab: true })); + const out = await tool.execute({ tab_id: "targ", message: "hi" }); + expect(out).toContain("read_tab"); + }); + + it("omits read_tab from the result when canReadTab is false", async () => { + const deliver = vi.fn(() => ({ status: "started" as const })); + const tool = createSendToTabTool(makeCallbacks({ deliver, canReadTab: false })); + const out = await tool.execute({ tab_id: "targ", message: "hi" }); + expect(out).not.toContain("read_tab"); + // Still steers away from busy-waiting and toward ending the turn. + expect(out.toLowerCase()).toContain("do not sleep"); + expect(out.toLowerCase()).toContain("end your turn"); + }); + it("reports the queued status when the target is busy", async () => { const deliver = vi.fn(() => ({ status: "queued" as const })); const tool = createSendToTabTool(makeCallbacks({ deliver })); -- cgit v1.2.3 From 3ff2db698c2633023934d8477a9e995f78fa011e Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Tue, 2 Jun 2026 15:54:39 +0900 Subject: fix(perm): decouple perm_user_agent from perm_summon for spawning user agents Granting only the user-agent (top-level) permission without the subagent-summon permission left the agent unable to summon user agents: the whole summon tool was gated behind perm_summon, so perm_user_agent alone produced no summon tool. Register summon when EITHER perm_summon OR perm_user_agent is granted. createSummonTool now takes an independent subagentEnabled flag (mirrors perm_summon) alongside userAgentEnabled (mirrors perm_user_agent): - subagent-only -> ordinary subagents, no top_level - user-agent-only -> spawns ONLY top-level user agents (top_level forced, background/top_level params dropped, user-agent catalog only) - both -> unchanged full behavior retrieve stays bundled with perm_summon (user agents are fire-and-forget). Adds core summon tests (user-agent-only mode + legacy-default regression) and an agent-manager summon/user_agent permission-split suite. --- packages/api/src/agent-manager.ts | 36 ++++--- packages/api/tests/agent-manager.test.ts | 61 ++++++++++++ packages/core/src/tools/summon.ts | 163 ++++++++++++++++++++++--------- packages/core/tests/tools/summon.test.ts | 108 ++++++++++++++++++++ 4 files changed, 308 insertions(+), 60 deletions(-) (limited to 'packages/api/src') diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 85dd160..9499ce5 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -575,7 +575,13 @@ export class AgentManager { }); } toolEntries.push({ name: "todo", tool: createTaskListTool(tabAgent.taskList) }); - if (permSummon) { + // The `summon` tool is registered when EITHER the subagent + // permission (`perm_summon`) OR the user-agent permission + // (`perm_user_agent`) is granted — the two are independent. + // `perm_summon` enables ordinary subagent spawning; granting + // only `perm_user_agent` exposes summon in user-agent-only mode + // (spawns top-level user agents exclusively). + if (permSummon || permUserAgent) { // Capture parent's allowed tool names for child permission enforcement const parentAllowedTools = new Set(toolEntries.map((e) => e.name)); const allAgentDefs = loadAgents(workingDirectory); @@ -609,19 +615,25 @@ export class AgentManager { availableUserAgents, agentDirPaths, permUserAgent, + permSummon, ), }); - toolEntries.push({ - name: "retrieve", - tool: createRetrieveTool({ - getResult: (id) => - tabAgent.shellStore.has(id) - ? tabAgent.shellStore.getResult(id) - : tabAgent.transcriptStore.has(id) - ? tabAgent.transcriptStore.getResult(id) - : this.getChildResult(id), - }), - }); + // `retrieve` collects subagent results. User agents are + // fire-and-forget, so it is bundled with the subagent + // permission only — a user-agent-only grant doesn't get it. + if (permSummon) { + toolEntries.push({ + name: "retrieve", + tool: createRetrieveTool({ + getResult: (id) => + tabAgent.shellStore.has(id) + ? tabAgent.shellStore.getResult(id) + : tabAgent.transcriptStore.has(id) + ? tabAgent.transcriptStore.getResult(id) + : this.getChildResult(id), + }), + }); + } } if (permSendToTab || permReadTab) { const tabCommAllowed = new Set(); diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index 014022a..f3ea207 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -319,6 +319,22 @@ vi.mock("@dispatch/core", () => ({ execute: async () => "mock", }; }, + // Summon parent-path dependencies. The real implementations load agent + // definitions from disk; tests only need the summon/retrieve tool entries + // to appear, so these return empty projections. + loadAgents() { + return []; + }, + toAvailableSubagents() { + return []; + }, + toAvailableUserAgents() { + return []; + }, + getAgentDirPaths() { + return []; + }, + GLOBAL_AGENTS_DIR: "/tmp/global-agents", createTab() {}, getTab(id: string) { return fakeTabs.get(id) ?? null; @@ -1441,6 +1457,51 @@ describe("AgentManager", () => { }); }); + describe("summon / user_agent permission split", () => { + // Drives the real parent-path tool construction in + // getOrCreateAgentForTab by toggling perm_summon and perm_user_agent + // independently, then inspecting which tools the constructed Agent + // received. The summon tool must be registered when EITHER permission + // is granted; `retrieve` rides with the subagent permission only + // (user agents are fire-and-forget). + async function toolsForPerms(tabId: string, perms: Record): Promise { + for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v); + const manager = new AgentManager(); + await manager.processMessage(tabId, "go"); + return constructedAgents.at(-1)?.toolNames ?? []; + } + + it("grants summon + retrieve when only perm_summon is allowed", async () => { + const tools = await toolsForPerms("tab-summon-only", { perm_summon: "allow" }); + expect(tools).toContain("summon"); + expect(tools).toContain("retrieve"); + }); + + it("grants summon WITHOUT retrieve when only perm_user_agent is allowed", async () => { + // Regression: granting only the user-agent permission used to leave + // the agent unable to summon user agents because the whole summon + // tool was gated behind perm_summon. + const tools = await toolsForPerms("tab-user-agent-only", { perm_user_agent: "allow" }); + expect(tools).toContain("summon"); + expect(tools).not.toContain("retrieve"); + }); + + it("grants summon + retrieve when both permissions are allowed", async () => { + const tools = await toolsForPerms("tab-summon-both", { + perm_summon: "allow", + perm_user_agent: "allow", + }); + expect(tools).toContain("summon"); + expect(tools).toContain("retrieve"); + }); + + it("grants neither summon nor retrieve when both permissions are off", async () => { + const tools = await toolsForPerms("tab-summon-neither", {}); + expect(tools).not.toContain("summon"); + expect(tools).not.toContain("retrieve"); + }); + }); + // ─── Usage side-channel persistence ────────────────────────────── // // `usage` AgentEvents (one per LLM round-trip) are persisted as invisible diff --git a/packages/core/src/tools/summon.ts b/packages/core/src/tools/summon.ts index 4820e89..cfee8b8 100644 --- a/packages/core/src/tools/summon.ts +++ b/packages/core/src/tools/summon.ts @@ -60,10 +60,13 @@ function renderAgentGroup(label: string, agents: AvailableAgent[]): string[] { * the disk locations where they live, injected into the summon tool's * description. * - * When `userAgentEnabled` is false only subagents are shown (under the - * generic "Available agents" heading). When it is true, subagents and - * user agents are listed as two labelled groups so the LLM understands - * which slugs require `top_level=true`. + * `subagentEnabled` and `userAgentEnabled` independently control which + * groups are shown — they mirror the `perm_summon` and `perm_user_agent` + * permissions respectively: + * - subagents only → generic "Available agents" heading; + * - user agents only → a single user-agent group (top_level is implied); + * - both → two labelled groups so the LLM understands which slugs + * require `top_level=true`. * * Returns a compact "no agents defined" notice when nothing is visible. */ @@ -72,6 +75,7 @@ function buildAgentsCatalog( userAgents: AvailableAgent[], agentDirs: string[], userAgentEnabled: boolean, + subagentEnabled: boolean, ): string { const lines: string[] = []; lines.push(""); @@ -80,8 +84,9 @@ function buildAgentsCatalog( lines.push(` - ${d}`); } + const visibleSubagents = subagentEnabled ? subagents : []; const visibleUserAgents = userAgentEnabled ? userAgents : []; - if (subagents.length === 0 && visibleUserAgents.length === 0) { + if (visibleSubagents.length === 0 && visibleUserAgents.length === 0) { lines.push(""); lines.push("No agent definitions are currently defined."); return lines.join("\n"); @@ -93,12 +98,26 @@ function buildAgentsCatalog( lines.push("and working directory; the 'tools' parameter is ignored."); lines.push(""); + // User-agent-only mode: list just the user agents. top_level is implied + // (it is the only thing this grant can spawn), so the heading omits it. + if (!subagentEnabled && userAgentEnabled) { + lines.push( + ...renderAgentGroup( + "User agents (spawned as independent top-level tabs):", + visibleUserAgents, + ), + ); + return lines.join("\n"); + } + + // Subagent-only mode: single generic heading. if (!userAgentEnabled) { - lines.push(...renderAgentGroup("Available agents:", subagents)); + lines.push(...renderAgentGroup("Available agents:", visibleSubagents)); return lines.join("\n"); } - const subagentLines = renderAgentGroup("Subagents (spawned as child tabs):", subagents); + // Both enabled: two labelled groups. + const subagentLines = renderAgentGroup("Subagents (spawned as child tabs):", visibleSubagents); const userAgentLines = renderAgentGroup( "User agents (spawned as independent top-level tabs, requires top_level=true):", visibleUserAgents, @@ -122,9 +141,14 @@ function buildAgentsCatalog( * its description; this is information-only — the runtime resolves * slugs through `loadAgent` independently. * - * `userAgentEnabled` controls whether the `top_level` parameter and the - * user-agent catalog are surfaced to the LLM. It mirrors the - * `perm_user_agent` permission. + * `userAgentEnabled` mirrors the `perm_user_agent` permission and + * `subagentEnabled` mirrors the `perm_summon` permission. They are + * independent: the tool is registered whenever at least one is granted. + * - subagentEnabled only → spawn ordinary subagents (no `top_level`); + * - userAgentEnabled only → spawn ONLY top-level user agents + * (`top_level` is forced on, the `background` knob is dropped, and + * the catalog lists user agents only); + * - both → full behavior (subagents plus `top_level` user agents). */ export function createSummonTool( _defaultWorkingDirectory: string, @@ -133,39 +157,29 @@ export function createSummonTool( availableUserAgents: AvailableAgent[] = [], agentDirs: string[] = [], userAgentEnabled = false, + subagentEnabled = true, ): ToolDefinition { + // When only the user-agent permission is granted the tool spawns user + // agents exclusively: `top_level` is implied (and forced), subagent + // mechanics (background, retrieve, parallel work) are irrelevant. + const userAgentOnly = userAgentEnabled && !subagentEnabled; + const catalog = buildAgentsCatalog( availableSubagents, availableUserAgents, agentDirs, userAgentEnabled, + subagentEnabled, ); const subagentSlugs = availableSubagents.map((a) => a.slug); const userAgentSlugs = availableUserAgents.map((a) => a.slug); - const allSlugs = userAgentEnabled ? [...subagentSlugs, ...userAgentSlugs] : subagentSlugs; + const allSlugs = userAgentOnly + ? userAgentSlugs + : userAgentEnabled + ? [...subagentSlugs, ...userAgentSlugs] + : subagentSlugs; - const description = [ - "Spawn a new child agent to work on a task independently.", - "", - "By default, blocks until the child agent finishes and returns the result directly.", - "Set background=true to return immediately with an agent_id instead — use retrieve to collect the result later.", - "", - "The child agent runs in its own tab visible to the user. Use the 'retrieve' tool with the returned agent_id to get the result when needed.", - "", - "Pattern for parallel work:", - " 1. Call summon multiple times with background=true to start several agents", - " 2. Do your own work or wait", - " 3. Call retrieve for each agent_id to collect results", - ...(userAgentEnabled - ? [ - "", - "Set top_level=true to spawn an independent user agent — a first-class", - "top-level tab with no parent. User agents are fire-and-forget: you get", - "an agent_id back but cannot retrieve their result. top_level requires an", - "'agent' definition listed under 'User agents' below.", - ] - : []), - "", + const toolNamesList = [ "The 'tools' parameter controls what the child can do. Available tool names:", " - read_file: Read file contents", " - read_file_slice: Read a character-range slice of a single line", @@ -179,11 +193,50 @@ export function createSummonTool( " - youtube_transcribe: Fetch YouTube video transcripts", " - send_to_tab: Send a message to another tab/agent by its ID", " - read_tab: Read another tab/agent's latest response by its ID", - "", - "The 'agent' parameter is required — every spawned agent must use a definition.", - "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).", - catalog, - ].join("\n"); + ]; + + const description = userAgentOnly + ? [ + "Spawn an independent top-level user agent to work on a task.", + "", + "User agents are first-class top-level tabs with no parent. They are", + "fire-and-forget: you get an agent_id back but cannot retrieve their result.", + "The user agent runs in its own tab visible to the user.", + "", + ...toolNamesList, + "", + "The 'agent' parameter is required — every spawned agent must use a definition.", + "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).", + catalog, + ].join("\n") + : [ + "Spawn a new child agent to work on a task independently.", + "", + "By default, blocks until the child agent finishes and returns the result directly.", + "Set background=true to return immediately with an agent_id instead — use retrieve to collect the result later.", + "", + "The child agent runs in its own tab visible to the user. Use the 'retrieve' tool with the returned agent_id to get the result when needed.", + "", + "Pattern for parallel work:", + " 1. Call summon multiple times with background=true to start several agents", + " 2. Do your own work or wait", + " 3. Call retrieve for each agent_id to collect results", + ...(userAgentEnabled + ? [ + "", + "Set top_level=true to spawn an independent user agent — a first-class", + "top-level tab with no parent. User agents are fire-and-forget: you get", + "an agent_id back but cannot retrieve their result. top_level requires an", + "'agent' definition listed under 'User agents' below.", + ] + : []), + "", + ...toolNamesList, + "", + "The 'agent' parameter is required — every spawned agent must use a definition.", + "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).", + catalog, + ].join("\n"); const parametersShape = { task: z @@ -205,7 +258,10 @@ export function createSummonTool( .filter(Boolean) .join(" "), ), - ...(userAgentEnabled + // `top_level` is only an explicit choice when BOTH subagents and user + // agents are available. In user-agent-only mode it is implied (forced + // on), so the knob is omitted entirely. + ...(userAgentEnabled && !userAgentOnly ? { top_level: z .boolean() @@ -248,12 +304,18 @@ export function createSummonTool( .describe( "Absolute path for the child to work in. Defaults to the agent definition's cwd (or the spawning agent's directory).", ), - background: z - .boolean() - .optional() - .describe( - "If true, returns immediately with an agent_id for later retrieval. If false (default), blocks until the child agent finishes and returns the result directly. Ignored when top_level is true.", - ), + // `background` is meaningless for fire-and-forget user agents, so the + // knob is omitted in user-agent-only mode. + ...(userAgentOnly + ? {} + : { + background: z + .boolean() + .optional() + .describe( + "If true, returns immediately with an agent_id for later retrieval. If false (default), blocks until the child agent finishes and returns the result directly. Ignored when top_level is true.", + ), + }), }; return { @@ -266,9 +328,14 @@ export function createSummonTool( const tools = args.tools as string[] | undefined; const workingDirectory = args.working_directory as string | undefined; const background = (args.background as boolean | undefined) ?? false; - const topLevel = userAgentEnabled - ? ((args.top_level as boolean | undefined) ?? false) - : false; + // User-agent-only mode always spawns top-level user agents. When both + // capabilities are present the caller chooses via `top_level`. When + // only subagents are available, top-level spawning is unavailable. + const topLevel = userAgentOnly + ? true + : userAgentEnabled + ? ((args.top_level as boolean | undefined) ?? false) + : false; try { const agentId = await callbacks.spawn({ diff --git a/packages/core/tests/tools/summon.test.ts b/packages/core/tests/tools/summon.test.ts index f59f345..4885a94 100644 --- a/packages/core/tests/tools/summon.test.ts +++ b/packages/core/tests/tools/summon.test.ts @@ -239,3 +239,111 @@ describe("createSummonTool — execute() argument forwarding", () => { expect(getResult).toHaveBeenCalled(); }); }); + +describe("createSummonTool — user-agent-only mode (perm_user_agent without perm_summon)", () => { + // userAgentEnabled=true, subagentEnabled=false → the tool spawns ONLY + // top-level user agents. `top_level` is implied (and forced), the + // subagent/parallel-work prose is dropped, and only the user-agent + // catalog group is shown. + const subagents: AvailableAgent[] = [ + { + slug: "programmer", + name: "Programmer", + description: "Codes things", + path: "/agents/programmer.toml", + }, + ]; + const userAgents: AvailableAgent[] = [ + { + slug: "default", + name: "Default", + description: "Default agent", + path: "/agents/default.toml", + }, + ]; + + function userAgentOnlyTool( + spawn = vi.fn(async () => "ua-1"), + getResult = vi.fn(async () => ({ status: "done" as const, result: "nope" })), + ) { + return { + spawn, + getResult, + tool: createSummonTool( + "/tmp/work", + { spawn, getResult }, + subagents, + userAgents, + ["/agents"], + true, // userAgentEnabled + false, // subagentEnabled + ), + }; + } + + it("describes spawning user agents and omits subagent/parallel-work prose", () => { + const { tool } = userAgentOnlyTool(); + expect(tool.description).toContain("Spawn an independent top-level user agent"); + expect(tool.description).toContain("fire-and-forget"); + expect(tool.description).not.toContain("Pattern for parallel work"); + expect(tool.description).not.toContain("Set background=true"); + }); + + it("lists only the user-agent catalog group, not subagents", () => { + const { tool } = userAgentOnlyTool(); + expect(tool.description).toContain("User agents (spawned as independent top-level tabs):"); + expect(tool.description).toContain("default"); + // Subagents must not be advertised in user-agent-only mode. + expect(tool.description).not.toContain("Subagents (spawned as child tabs):"); + expect(tool.description).not.toContain("- programmer: Programmer"); + }); + + it("only lists user-agent slugs in the 'agent' parameter description", () => { + const { tool } = userAgentOnlyTool(); + const agentParam = (tool.parameters as unknown as { shape: { agent: { description: string } } }) + .shape.agent; + expect(agentParam.description).toContain("default"); + expect(agentParam.description).not.toContain("programmer"); + }); + + it("omits the top_level parameter (it is implied)", () => { + const { tool } = userAgentOnlyTool(); + const shape = (tool.parameters as unknown as { shape: Record }).shape; + expect("top_level" in shape).toBe(false); + }); + + it("omits the background parameter (user agents are fire-and-forget)", () => { + const { tool } = userAgentOnlyTool(); + const shape = (tool.parameters as unknown as { shape: Record }).shape; + expect("background" in shape).toBe(false); + }); + + it("forces topLevel=true on spawn even when top_level is not passed", async () => { + const spawn = vi.fn(async () => "ua-99"); + const getResult = vi.fn(async () => ({ status: "done" as const, result: "nope" })); + const { tool } = userAgentOnlyTool(spawn, getResult); + const out = await tool.execute({ task: "do stuff", agent: "default" }); + expect(out).toContain("User agent spawned successfully"); + expect(out).toContain("ua-99"); + expect(out).toContain("fire-and-forget"); + // Never blocks on a result for fire-and-forget user agents. + expect(getResult).not.toHaveBeenCalled(); + const callArg = spawn.mock.calls[0]?.[0]; + expect(callArg).toMatchObject({ topLevel: true, agentSlug: "default" }); + }); +}); + +describe("createSummonTool — subagentEnabled defaults preserve legacy behavior", () => { + it("defaults subagentEnabled=true so omitting it keeps subagent spawning", async () => { + const spawn = vi.fn(async () => "tab-1"); + const getResult = vi.fn(async () => ({ status: "done" as const, result: "child" })); + // No userAgentEnabled/subagentEnabled args → legacy subagent-only mode. + const tool = createSummonTool("/tmp/work", { spawn, getResult }, [], []); + const out = await tool.execute({ task: "x", agent: "programmer" }); + // Foreground subagent summon blocks and returns the child result. + expect(out).toBe("agent_id: tab-1\n\nchild"); + expect(getResult).toHaveBeenCalled(); + const callArg = spawn.mock.calls[0]?.[0]; + expect(callArg).not.toHaveProperty("topLevel"); + }); +}); -- cgit v1.2.3 From e4379da8d1e8c7a8a89c63bdaaef99a74bf56cf2 Mon Sep 17 00:00:00 2001 From: Adam Malczewski Date: Tue, 2 Jun 2026 15:55:12 +0900 Subject: fix(tabs): say a reply will WAKE you with a new message (clearer than 'arrives on its own') Matches actual behavior: a peer's reply wakes this tab with a new message in a later turn. Updated the send_to_tab description (both canReadTab branches), the delivery-result text (both branches), and the system-prompt one-liner; updated the test assertion accordingly. --- packages/api/src/agent-manager.ts | 2 +- packages/core/src/tools/send-to-tab.ts | 10 +++++----- packages/core/tests/tools/send-to-tab.test.ts | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'packages/api/src') diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 3d233fc..684f8ec 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -84,7 +84,7 @@ const TOOL_DESCRIPTIONS: Record = { youtube_transcribe: "Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.", send_to_tab: - "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — a reply arrives on its own in a later turn; if you are only waiting, end your turn.", + "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — if the target replies it will wake you with a new message in a later turn; if you are only waiting, end your turn.", read_tab: "Read another tab (agent)'s most recent completed response by its short ID. Returns a non-blocking snapshot; if the target is still running you get its previous completed turn. Use after send_to_tab to collect a reply.", }; diff --git a/packages/core/src/tools/send-to-tab.ts b/packages/core/src/tools/send-to-tab.ts index 50023a7..eae6bfa 100644 --- a/packages/core/src/tools/send-to-tab.ts +++ b/packages/core/src/tools/send-to-tab.ts @@ -63,11 +63,11 @@ function renderOpenHandles(handles: Array<{ handle: string; title: string }>): s export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefinition { // The `read_tab` follow-up hint is only truthful when this tab actually // holds the `read_tab` tool (the permissions are split). When it doesn't, - // the only honest guidance is that a reply arrives on its own — never tell + // the only honest guidance is that a reply will wake it as a new message — never tell // the agent to call a tool it wasn't granted. const waitLine = callbacks.canReadTab - ? "money. If the target replies it arrives on its own as a new message in a later turn; you" - : "money. If the target replies it arrives on its own as a new message in a later turn."; + ? "money. If the target replies it will WAKE you with a new message in a later turn; you" + : "money. If the target replies it will WAKE you with a new message in a later turn."; const readTabLine = callbacks.canReadTab ? ["can also call 'read_tab' with the same ID in a FUTURE turn to check. If you have other"] : []; @@ -176,13 +176,13 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti const tail = callbacks.canReadTab ? [ "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", - `arrives on its own as a new message later; you can also call read_tab with "${target.handle}"`, + `will WAKE you with a new message later; you can also call read_tab with "${target.handle}"`, "in a FUTURE turn to check. Keep working if you have other tasks; if you are ONLY", "waiting for this reply, end your turn now.", ] : [ "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it", - "arrives on its own as a new message later. Keep working if you have other tasks; if", + "will WAKE you with a new message later. Keep working if you have other tasks; if", "you are ONLY waiting for this reply, end your turn now.", ]; return [ diff --git a/packages/core/tests/tools/send-to-tab.test.ts b/packages/core/tests/tools/send-to-tab.test.ts index 48ff460..21d8032 100644 --- a/packages/core/tests/tools/send-to-tab.test.ts +++ b/packages/core/tests/tools/send-to-tab.test.ts @@ -38,8 +38,8 @@ describe("createSendToTabTool — schema & description", () => { it("never mentions read_tab in the description when canReadTab is false", () => { const tool = createSendToTabTool(makeCallbacks({ canReadTab: false })); expect(tool.description).not.toContain("read_tab"); - // Still tells the agent a reply arrives on its own + to end its turn. - expect(tool.description.toLowerCase()).toContain("arrives on its own"); + // Still tells the agent a reply will wake it + to end its turn. + expect(tool.description.toLowerCase()).toContain("wake you with a new message"); expect(tool.description.toLowerCase()).toContain("end your turn"); }); }); -- cgit v1.2.3