summaryrefslogtreecommitdiffhomepage
path: root/packages
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-02 16:06:13 +0900
committerAdam Malczewski <[email protected]>2026-06-02 16:06:13 +0900
commitb3aca3efe9e8cda79db6e2c7fa20482880ed16c3 (patch)
tree3480c1e670d78040bb03a9ec930d815575efc463 /packages
parent1541e8d9ecc305bb27cf004cb919ef9065eca8be (diff)
parent2b57c1af0247954ccf57d9ba3b0f4a45502ef3da (diff)
downloaddispatch-b3aca3efe9e8cda79db6e2c7fa20482880ed16c3.tar.gz
dispatch-b3aca3efe9e8cda79db6e2c7fa20482880ed16c3.zip
Merge branch 'dev' into feat/plus-button-sticky
Diffstat (limited to 'packages')
-rw-r--r--packages/api/src/agent-manager.ts51
-rw-r--r--packages/api/tests/agent-manager.test.ts132
-rw-r--r--packages/core/src/tools/send-to-tab.ts61
-rw-r--r--packages/core/src/tools/summon.ts163
-rw-r--r--packages/core/tests/tools/send-to-tab.test.ts47
-rw-r--r--packages/core/tests/tools/summon.test.ts108
6 files changed, 491 insertions, 71 deletions
diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts
index 85dd160..2795a6c 100644
--- a/packages/api/src/agent-manager.ts
+++ b/packages/api/src/agent-manager.ts
@@ -83,6 +83,10 @@ const TOOL_DESCRIPTIONS: Record<string, string> = {
web_search: "Search the web and optionally scrape full page content from results.",
youtube_transcribe:
"Fetch the transcript/subtitles for a YouTube video. Set background=true to start in the background and get a job_id for later retrieval.",
+ send_to_tab:
+ "Send a message to another tab (agent) by its short ID, as shown in the tab bar. Fire-and-forget: it queues/wakes the target and returns immediately without waiting for a reply. Do NOT sleep, poll, or run commands to wait — if the target replies it will wake you with a new message in a later turn; if you are only waiting, end your turn.",
+ read_tab:
+ "Read another tab (agent)'s most recent completed response by its short ID. Returns a non-blocking snapshot; if the target is still running you get its previous completed turn. Use after send_to_tab to collect a reply.",
};
/**
@@ -542,7 +546,7 @@ export class AgentManager {
}
// Tab-to-tab communication — gated on the child whitelist.
if (allowed.has("send_to_tab") || allowed.has("read_tab")) {
- for (const entry of this.buildTabCommToolEntries(tabId)) {
+ for (const entry of this.buildTabCommToolEntries(tabId, allowed.has("read_tab"))) {
if (allowed.has(entry.name)) toolEntries.push(entry);
}
}
@@ -575,7 +579,13 @@ export class AgentManager {
});
}
toolEntries.push({ name: "todo", tool: createTaskListTool(tabAgent.taskList) });
- if (permSummon) {
+ // The `summon` tool is registered when EITHER the subagent
+ // permission (`perm_summon`) OR the user-agent permission
+ // (`perm_user_agent`) is granted — the two are independent.
+ // `perm_summon` enables ordinary subagent spawning; granting
+ // only `perm_user_agent` exposes summon in user-agent-only mode
+ // (spawns top-level user agents exclusively).
+ if (permSummon || permUserAgent) {
// Capture parent's allowed tool names for child permission enforcement
const parentAllowedTools = new Set(toolEntries.map((e) => e.name));
const allAgentDefs = loadAgents(workingDirectory);
@@ -609,25 +619,31 @@ export class AgentManager {
availableUserAgents,
agentDirPaths,
permUserAgent,
+ permSummon,
),
});
- toolEntries.push({
- name: "retrieve",
- tool: createRetrieveTool({
- getResult: (id) =>
- tabAgent.shellStore.has(id)
- ? tabAgent.shellStore.getResult(id)
- : tabAgent.transcriptStore.has(id)
- ? tabAgent.transcriptStore.getResult(id)
- : this.getChildResult(id),
- }),
- });
+ // `retrieve` collects subagent results. User agents are
+ // fire-and-forget, so it is bundled with the subagent
+ // permission only — a user-agent-only grant doesn't get it.
+ if (permSummon) {
+ toolEntries.push({
+ name: "retrieve",
+ tool: createRetrieveTool({
+ getResult: (id) =>
+ tabAgent.shellStore.has(id)
+ ? tabAgent.shellStore.getResult(id)
+ : tabAgent.transcriptStore.has(id)
+ ? tabAgent.transcriptStore.getResult(id)
+ : this.getChildResult(id),
+ }),
+ });
+ }
}
if (permSendToTab || permReadTab) {
const tabCommAllowed = new Set<string>();
if (permSendToTab) tabCommAllowed.add("send_to_tab");
if (permReadTab) tabCommAllowed.add("read_tab");
- for (const entry of this.buildTabCommToolEntries(tabId)) {
+ for (const entry of this.buildTabCommToolEntries(tabId, permReadTab)) {
if (tabCommAllowed.has(entry.name)) toolEntries.push(entry);
}
}
@@ -1237,9 +1253,15 @@ export class AgentManager {
* both tool-construction paths (child whitelist + permission-gated parent).
* `selfHandle` is computed once so the calling tab can stamp provenance and
* reject self-sends.
+ *
+ * `canReadTab` reflects whether THIS tab will also be granted `read_tab`
+ * (the permissions are split). It is forwarded into `send_to_tab` so the
+ * tool only points the agent at `read_tab` when it actually has it — never
+ * advertising a tool the agent wasn't granted.
*/
private buildTabCommToolEntries(
tabId: string,
+ canReadTab: boolean,
): Array<{ name: string; tool: ReturnType<typeof createSendToTabTool> }> {
const selfHandle = shortestUniquePrefix(tabId);
return [
@@ -1253,6 +1275,7 @@ export class AgentManager {
this.deliverMessage(targetId, message, { origin: "agent" }),
listOpenHandles: () => this.listOpenHandles(tabId),
self: { id: tabId, handle: selfHandle },
+ canReadTab,
}),
},
{
diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts
index 014022a..3353aff 100644
--- a/packages/api/tests/agent-manager.test.ts
+++ b/packages/api/tests/agent-manager.test.ts
@@ -75,7 +75,11 @@ function makeRow(
// because the production code reassigns `agent.messages =
// rows.slice(...)` AFTER `new Agent()` returns — capturing a
// reference at construction would yield a stale empty array.
-const constructedAgents: Array<{ initialMessages: unknown[]; toolNames: string[] }> = [];
+const constructedAgents: Array<{
+ initialMessages: unknown[];
+ toolNames: string[];
+ systemPrompt: string;
+}> = [];
function resetConstructedAgents(): void {
constructedAgents.length = 0;
}
@@ -159,8 +163,10 @@ vi.mock("@dispatch/core", () => ({
status = "idle";
messages: unknown[] = [];
toolNames: string[] = [];
- constructor(config: { tools?: Array<{ name: string }> }) {
+ systemPrompt = "";
+ constructor(config: { tools?: Array<{ name: string }>; systemPrompt?: string }) {
this.toolNames = (config?.tools ?? []).map((t) => t.name);
+ this.systemPrompt = config?.systemPrompt ?? "";
}
async *run(message: string, options?: { reasoningEffort?: string }): AsyncGenerator<unknown> {
// Snapshot the post-construction pre-populated message list
@@ -170,6 +176,7 @@ vi.mock("@dispatch/core", () => ({
constructedAgents.push({
initialMessages: [...this.messages],
toolNames: [...this.toolNames],
+ systemPrompt: this.systemPrompt,
});
capturedRunOptions.push(options);
if (runImpl) {
@@ -319,6 +326,22 @@ vi.mock("@dispatch/core", () => ({
execute: async () => "mock",
};
},
+ // Summon parent-path dependencies. The real implementations load agent
+ // definitions from disk; tests only need the summon/retrieve tool entries
+ // to appear, so these return empty projections.
+ loadAgents() {
+ return [];
+ },
+ toAvailableSubagents() {
+ return [];
+ },
+ toAvailableUserAgents() {
+ return [];
+ },
+ getAgentDirPaths() {
+ return [];
+ },
+ GLOBAL_AGENTS_DIR: "/tmp/global-agents",
createTab() {},
getTab(id: string) {
return fakeTabs.get(id) ?? null;
@@ -1441,6 +1464,111 @@ describe("AgentManager", () => {
});
});
+ describe("summon / user_agent permission split", () => {
+ // Drives the real parent-path tool construction in
+ // getOrCreateAgentForTab by toggling perm_summon and perm_user_agent
+ // independently, then inspecting which tools the constructed Agent
+ // received. The summon tool must be registered when EITHER permission
+ // is granted; `retrieve` rides with the subagent permission only
+ // (user agents are fire-and-forget).
+ async function toolsForPerms(tabId: string, perms: Record<string, string>): Promise<string[]> {
+ for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v);
+ const manager = new AgentManager();
+ await manager.processMessage(tabId, "go");
+ return constructedAgents.at(-1)?.toolNames ?? [];
+ }
+
+ it("grants summon + retrieve when only perm_summon is allowed", async () => {
+ const tools = await toolsForPerms("tab-summon-only", { perm_summon: "allow" });
+ expect(tools).toContain("summon");
+ expect(tools).toContain("retrieve");
+ });
+
+ it("grants summon WITHOUT retrieve when only perm_user_agent is allowed", async () => {
+ // Regression: granting only the user-agent permission used to leave
+ // the agent unable to summon user agents because the whole summon
+ // tool was gated behind perm_summon.
+ const tools = await toolsForPerms("tab-user-agent-only", { perm_user_agent: "allow" });
+ expect(tools).toContain("summon");
+ expect(tools).not.toContain("retrieve");
+ });
+
+ it("grants summon + retrieve when both permissions are allowed", async () => {
+ const tools = await toolsForPerms("tab-summon-both", {
+ perm_summon: "allow",
+ perm_user_agent: "allow",
+ });
+ expect(tools).toContain("summon");
+ expect(tools).toContain("retrieve");
+ });
+
+ it("grants neither summon nor retrieve when both permissions are off", async () => {
+ const tools = await toolsForPerms("tab-summon-neither", {});
+ expect(tools).not.toContain("summon");
+ expect(tools).not.toContain("retrieve");
+ });
+ });
+
+ // Regression: granted tab-messaging tools must also be ADVERTISED in the
+ // agent's system prompt. The tools were registered in the API tool payload
+ // but `buildSystemPrompt` filtered its "You have access to the following
+ // tools" list through TOOL_DESCRIPTIONS, which lacked send_to_tab/read_tab
+ // — so the model was told it didn't have them and refused to use them. This
+ // locks the prompt's capability list to the granted toolset.
+ describe("send_to_tab / read_tab system-prompt advertisement", () => {
+ async function promptForPerms(tabId: string, perms: Record<string, string>): Promise<string> {
+ for (const [k, v] of Object.entries(perms)) setFakeSetting(k, v);
+ const manager = new AgentManager();
+ await manager.processMessage(tabId, "go");
+ return constructedAgents.at(-1)?.systemPrompt ?? "";
+ }
+
+ it("lists send_to_tab in the system prompt when granted", async () => {
+ const prompt = await promptForPerms("tab-prompt-send", { perm_send_to_tab: "allow" });
+ expect(prompt).toContain("- send_to_tab:");
+ expect(prompt).not.toContain("- read_tab:");
+ });
+
+ it("lists read_tab in the system prompt when granted", async () => {
+ const prompt = await promptForPerms("tab-prompt-read", { perm_read_tab: "allow" });
+ expect(prompt).toContain("- read_tab:");
+ expect(prompt).not.toContain("- send_to_tab:");
+ });
+
+ it("lists both tab-messaging tools when both are granted", async () => {
+ const prompt = await promptForPerms("tab-prompt-both", {
+ perm_send_to_tab: "allow",
+ perm_read_tab: "allow",
+ });
+ expect(prompt).toContain("- send_to_tab:");
+ expect(prompt).toContain("- read_tab:");
+ });
+
+ it("omits both from the system prompt when neither is granted", async () => {
+ const prompt = await promptForPerms("tab-prompt-neither", {});
+ expect(prompt).not.toContain("- send_to_tab:");
+ expect(prompt).not.toContain("- read_tab:");
+ });
+
+ it("advertises exactly the granted tab tools (prompt list matches schema)", async () => {
+ for (const [k, v] of Object.entries({
+ perm_send_to_tab: "allow",
+ perm_read_tab: "allow",
+ })) {
+ setFakeSetting(k, v);
+ }
+ const manager = new AgentManager();
+ await manager.processMessage("tab-prompt-match", "go");
+ const inst = constructedAgents.at(-1);
+ // Every granted tab-messaging tool surfaced in the schema must also be
+ // advertised in the prompt, so the model never believes it lacks one.
+ for (const name of ["send_to_tab", "read_tab"]) {
+ expect(inst?.toolNames).toContain(name);
+ expect(inst?.systemPrompt).toContain(`- ${name}:`);
+ }
+ });
+ });
+
// ─── Usage side-channel persistence ──────────────────────────────
//
// `usage` AgentEvents (one per LLM round-trip) are persisted as invisible
diff --git a/packages/core/src/tools/send-to-tab.ts b/packages/core/src/tools/send-to-tab.ts
index eb86b7e..eae6bfa 100644
--- a/packages/core/src/tools/send-to-tab.ts
+++ b/packages/core/src/tools/send-to-tab.ts
@@ -44,6 +44,13 @@ export interface SendToTabCallbacks {
/** The calling tab's own id + handle — used to block self-sends and to
* stamp provenance onto the delivered message. */
self: { id: string; handle: string };
+ /**
+ * Whether THIS calling tab also has the `read_tab` tool granted. The
+ * tab-messaging permissions are split, so a tab can hold `send_to_tab`
+ * without `read_tab`. When false, the tool must NOT tell the agent to use
+ * `read_tab` (it doesn't have it) — replies only arrive on their own.
+ */
+ canReadTab: boolean;
}
/** Render the "available tabs" hint shared by the none/ambiguous branches. */
@@ -54,6 +61,19 @@ function renderOpenHandles(handles: Array<{ handle: string; title: string }>): s
}
export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefinition {
+ // The `read_tab` follow-up hint is only truthful when this tab actually
+ // holds the `read_tab` tool (the permissions are split). When it doesn't,
+ // the only honest guidance is that a reply will wake it as a new message — never tell
+ // the agent to call a tool it wasn't granted.
+ const waitLine = callbacks.canReadTab
+ ? "money. If the target replies it will WAKE you with a new message in a later turn; you"
+ : "money. If the target replies it will WAKE you with a new message in a later turn.";
+ const readTabLine = callbacks.canReadTab
+ ? ["can also call 'read_tab' with the same ID in a FUTURE turn to check. If you have other"]
+ : [];
+ const keepGoingLine = callbacks.canReadTab
+ ? "work to do, keep going; if you are ONLY waiting for the reply, end your turn now."
+ : "If you have other work to do, keep going; if you are ONLY waiting for the reply, end your turn now.";
return {
name: "send_to_tab",
description: [
@@ -64,9 +84,14 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti
" - If the target tab is idle, your message WAKES it and starts a new turn.",
"",
"This is fire-and-forget: it returns immediately and does NOT wait for a reply.",
- "Use the 'read_tab' tool with the same ID later to read the target's latest response.",
+ "Do NOT sleep, poll, or run shell commands to wait for a reply — that wastes turns and",
+ waitLine,
+ ...readTabLine,
+ keepGoingLine,
"",
- "Your tab ID is auto-added to the top of the message so the recipient can reply to you.",
+ "Your tab ID is auto-added to the top of the message so the recipient knows who to reply",
+ "to. The recipient must use this same 'send_to_tab' tool (addressed to your ID) to answer;",
+ "a plain text response reaches only their own user, not you.",
"IDs are git-style prefixes: pass any length that uniquely identifies the target (min 4 chars).",
"If the ID is ambiguous you'll be asked to add a character.",
].join("\n"),
@@ -117,8 +142,18 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti
}
// Stamp provenance so the recipient (and the watching user) can see
- // which tab the message came from and reply back via its handle.
- const delivered = `[message from tab ${callbacks.self.handle}]\n\n${message}`;
+ // which tab the message came from and how to reply. The header makes
+ // clear this is a PEER AGENT, not the recipient's own user, and the
+ // footer states the reply contract: a reply (only if warranted) must
+ // go back through `send_to_tab`, since a plain text answer reaches
+ // only the recipient's own user — not this sender.
+ const delivered = [
+ `[message from tab ${callbacks.self.handle} — this is another agent, NOT your user]`,
+ "",
+ message,
+ "",
+ `[To reply to tab ${callbacks.self.handle}, use the send_to_tab tool with tab_id "${callbacks.self.handle}". ONLY reply if this message asks you to, or your user tells you to — it may just be context or instructions. A plain text response goes to your own user, not to this agent.]`,
+ ].join("\n");
try {
const result = await callbacks.deliver(target.id, delivered);
@@ -138,7 +173,23 @@ export function createSendToTabTool(callbacks: SendToTabCallbacks): ToolDefiniti
result.status === "queued"
? "queued (target is busy; it will be picked up next turn)"
: "delivered (target was idle; a new turn has started)";
- return `Message ${verb}. Target tab: ${target.handle} (${target.title}). Use read_tab with "${target.handle}" to read its reply later.`;
+ const tail = callbacks.canReadTab
+ ? [
+ "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it",
+ `will WAKE you with a new message later; you can also call read_tab with "${target.handle}"`,
+ "in a FUTURE turn to check. Keep working if you have other tasks; if you are ONLY",
+ "waiting for this reply, end your turn now.",
+ ]
+ : [
+ "Do NOT sleep, poll, or run commands to wait for a reply. If the target replies it",
+ "will WAKE you with a new message later. Keep working if you have other tasks; if",
+ "you are ONLY waiting for this reply, end your turn now.",
+ ];
+ return [
+ `Message ${verb}. Target tab: ${target.handle} (${target.title}).`,
+ "",
+ ...tail,
+ ].join("\n");
} catch (err) {
return `Error delivering message: ${err instanceof Error ? err.message : String(err)}`;
}
diff --git a/packages/core/src/tools/summon.ts b/packages/core/src/tools/summon.ts
index 4820e89..cfee8b8 100644
--- a/packages/core/src/tools/summon.ts
+++ b/packages/core/src/tools/summon.ts
@@ -60,10 +60,13 @@ function renderAgentGroup(label: string, agents: AvailableAgent[]): string[] {
* the disk locations where they live, injected into the summon tool's
* description.
*
- * When `userAgentEnabled` is false only subagents are shown (under the
- * generic "Available agents" heading). When it is true, subagents and
- * user agents are listed as two labelled groups so the LLM understands
- * which slugs require `top_level=true`.
+ * `subagentEnabled` and `userAgentEnabled` independently control which
+ * groups are shown — they mirror the `perm_summon` and `perm_user_agent`
+ * permissions respectively:
+ * - subagents only → generic "Available agents" heading;
+ * - user agents only → a single user-agent group (top_level is implied);
+ * - both → two labelled groups so the LLM understands which slugs
+ * require `top_level=true`.
*
* Returns a compact "no agents defined" notice when nothing is visible.
*/
@@ -72,6 +75,7 @@ function buildAgentsCatalog(
userAgents: AvailableAgent[],
agentDirs: string[],
userAgentEnabled: boolean,
+ subagentEnabled: boolean,
): string {
const lines: string[] = [];
lines.push("");
@@ -80,8 +84,9 @@ function buildAgentsCatalog(
lines.push(` - ${d}`);
}
+ const visibleSubagents = subagentEnabled ? subagents : [];
const visibleUserAgents = userAgentEnabled ? userAgents : [];
- if (subagents.length === 0 && visibleUserAgents.length === 0) {
+ if (visibleSubagents.length === 0 && visibleUserAgents.length === 0) {
lines.push("");
lines.push("No agent definitions are currently defined.");
return lines.join("\n");
@@ -93,12 +98,26 @@ function buildAgentsCatalog(
lines.push("and working directory; the 'tools' parameter is ignored.");
lines.push("");
+ // User-agent-only mode: list just the user agents. top_level is implied
+ // (it is the only thing this grant can spawn), so the heading omits it.
+ if (!subagentEnabled && userAgentEnabled) {
+ lines.push(
+ ...renderAgentGroup(
+ "User agents (spawned as independent top-level tabs):",
+ visibleUserAgents,
+ ),
+ );
+ return lines.join("\n");
+ }
+
+ // Subagent-only mode: single generic heading.
if (!userAgentEnabled) {
- lines.push(...renderAgentGroup("Available agents:", subagents));
+ lines.push(...renderAgentGroup("Available agents:", visibleSubagents));
return lines.join("\n");
}
- const subagentLines = renderAgentGroup("Subagents (spawned as child tabs):", subagents);
+ // Both enabled: two labelled groups.
+ const subagentLines = renderAgentGroup("Subagents (spawned as child tabs):", visibleSubagents);
const userAgentLines = renderAgentGroup(
"User agents (spawned as independent top-level tabs, requires top_level=true):",
visibleUserAgents,
@@ -122,9 +141,14 @@ function buildAgentsCatalog(
* its description; this is information-only — the runtime resolves
* slugs through `loadAgent` independently.
*
- * `userAgentEnabled` controls whether the `top_level` parameter and the
- * user-agent catalog are surfaced to the LLM. It mirrors the
- * `perm_user_agent` permission.
+ * `userAgentEnabled` mirrors the `perm_user_agent` permission and
+ * `subagentEnabled` mirrors the `perm_summon` permission. They are
+ * independent: the tool is registered whenever at least one is granted.
+ * - subagentEnabled only → spawn ordinary subagents (no `top_level`);
+ * - userAgentEnabled only → spawn ONLY top-level user agents
+ * (`top_level` is forced on, the `background` knob is dropped, and
+ * the catalog lists user agents only);
+ * - both → full behavior (subagents plus `top_level` user agents).
*/
export function createSummonTool(
_defaultWorkingDirectory: string,
@@ -133,39 +157,29 @@ export function createSummonTool(
availableUserAgents: AvailableAgent[] = [],
agentDirs: string[] = [],
userAgentEnabled = false,
+ subagentEnabled = true,
): ToolDefinition {
+ // When only the user-agent permission is granted the tool spawns user
+ // agents exclusively: `top_level` is implied (and forced), subagent
+ // mechanics (background, retrieve, parallel work) are irrelevant.
+ const userAgentOnly = userAgentEnabled && !subagentEnabled;
+
const catalog = buildAgentsCatalog(
availableSubagents,
availableUserAgents,
agentDirs,
userAgentEnabled,
+ subagentEnabled,
);
const subagentSlugs = availableSubagents.map((a) => a.slug);
const userAgentSlugs = availableUserAgents.map((a) => a.slug);
- const allSlugs = userAgentEnabled ? [...subagentSlugs, ...userAgentSlugs] : subagentSlugs;
+ const allSlugs = userAgentOnly
+ ? userAgentSlugs
+ : userAgentEnabled
+ ? [...subagentSlugs, ...userAgentSlugs]
+ : subagentSlugs;
- const description = [
- "Spawn a new child agent to work on a task independently.",
- "",
- "By default, blocks until the child agent finishes and returns the result directly.",
- "Set background=true to return immediately with an agent_id instead — use retrieve to collect the result later.",
- "",
- "The child agent runs in its own tab visible to the user. Use the 'retrieve' tool with the returned agent_id to get the result when needed.",
- "",
- "Pattern for parallel work:",
- " 1. Call summon multiple times with background=true to start several agents",
- " 2. Do your own work or wait",
- " 3. Call retrieve for each agent_id to collect results",
- ...(userAgentEnabled
- ? [
- "",
- "Set top_level=true to spawn an independent user agent — a first-class",
- "top-level tab with no parent. User agents are fire-and-forget: you get",
- "an agent_id back but cannot retrieve their result. top_level requires an",
- "'agent' definition listed under 'User agents' below.",
- ]
- : []),
- "",
+ const toolNamesList = [
"The 'tools' parameter controls what the child can do. Available tool names:",
" - read_file: Read file contents",
" - read_file_slice: Read a character-range slice of a single line",
@@ -179,11 +193,50 @@ export function createSummonTool(
" - youtube_transcribe: Fetch YouTube video transcripts",
" - send_to_tab: Send a message to another tab/agent by its ID",
" - read_tab: Read another tab/agent's latest response by its ID",
- "",
- "The 'agent' parameter is required — every spawned agent must use a definition.",
- "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).",
- catalog,
- ].join("\n");
+ ];
+
+ const description = userAgentOnly
+ ? [
+ "Spawn an independent top-level user agent to work on a task.",
+ "",
+ "User agents are first-class top-level tabs with no parent. They are",
+ "fire-and-forget: you get an agent_id back but cannot retrieve their result.",
+ "The user agent runs in its own tab visible to the user.",
+ "",
+ ...toolNamesList,
+ "",
+ "The 'agent' parameter is required — every spawned agent must use a definition.",
+ "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).",
+ catalog,
+ ].join("\n")
+ : [
+ "Spawn a new child agent to work on a task independently.",
+ "",
+ "By default, blocks until the child agent finishes and returns the result directly.",
+ "Set background=true to return immediately with an agent_id instead — use retrieve to collect the result later.",
+ "",
+ "The child agent runs in its own tab visible to the user. Use the 'retrieve' tool with the returned agent_id to get the result when needed.",
+ "",
+ "Pattern for parallel work:",
+ " 1. Call summon multiple times with background=true to start several agents",
+ " 2. Do your own work or wait",
+ " 3. Call retrieve for each agent_id to collect results",
+ ...(userAgentEnabled
+ ? [
+ "",
+ "Set top_level=true to spawn an independent user agent — a first-class",
+ "top-level tab with no parent. User agents are fire-and-forget: you get",
+ "an agent_id back but cannot retrieve their result. top_level requires an",
+ "'agent' definition listed under 'User agents' below.",
+ ]
+ : []),
+ "",
+ ...toolNamesList,
+ "",
+ "The 'agent' parameter is required — every spawned agent must use a definition.",
+ "Tools default to the agent definition's tools, intersected with your own tools (you can't grant capabilities you don't have).",
+ catalog,
+ ].join("\n");
const parametersShape = {
task: z
@@ -205,7 +258,10 @@ export function createSummonTool(
.filter(Boolean)
.join(" "),
),
- ...(userAgentEnabled
+ // `top_level` is only an explicit choice when BOTH subagents and user
+ // agents are available. In user-agent-only mode it is implied (forced
+ // on), so the knob is omitted entirely.
+ ...(userAgentEnabled && !userAgentOnly
? {
top_level: z
.boolean()
@@ -248,12 +304,18 @@ export function createSummonTool(
.describe(
"Absolute path for the child to work in. Defaults to the agent definition's cwd (or the spawning agent's directory).",
),
- background: z
- .boolean()
- .optional()
- .describe(
- "If true, returns immediately with an agent_id for later retrieval. If false (default), blocks until the child agent finishes and returns the result directly. Ignored when top_level is true.",
- ),
+ // `background` is meaningless for fire-and-forget user agents, so the
+ // knob is omitted in user-agent-only mode.
+ ...(userAgentOnly
+ ? {}
+ : {
+ background: z
+ .boolean()
+ .optional()
+ .describe(
+ "If true, returns immediately with an agent_id for later retrieval. If false (default), blocks until the child agent finishes and returns the result directly. Ignored when top_level is true.",
+ ),
+ }),
};
return {
@@ -266,9 +328,14 @@ export function createSummonTool(
const tools = args.tools as string[] | undefined;
const workingDirectory = args.working_directory as string | undefined;
const background = (args.background as boolean | undefined) ?? false;
- const topLevel = userAgentEnabled
- ? ((args.top_level as boolean | undefined) ?? false)
- : false;
+ // User-agent-only mode always spawns top-level user agents. When both
+ // capabilities are present the caller chooses via `top_level`. When
+ // only subagents are available, top-level spawning is unavailable.
+ const topLevel = userAgentOnly
+ ? true
+ : userAgentEnabled
+ ? ((args.top_level as boolean | undefined) ?? false)
+ : false;
try {
const agentId = await callbacks.spawn({
diff --git a/packages/core/tests/tools/send-to-tab.test.ts b/packages/core/tests/tools/send-to-tab.test.ts
index 4450fc5..21d8032 100644
--- a/packages/core/tests/tools/send-to-tab.test.ts
+++ b/packages/core/tests/tools/send-to-tab.test.ts
@@ -14,6 +14,7 @@ function makeCallbacks(overrides: Partial<SendToTabCallbacks> = {}): SendToTabCa
deliver: () => ({ status: "started" }),
listOpenHandles: () => [{ handle: "targ", title: "Target" }],
self: { id: "self-id", handle: "self" },
+ canReadTab: true,
...overrides,
};
}
@@ -24,6 +25,22 @@ describe("createSendToTabTool — schema & description", () => {
expect(tool.name).toBe("send_to_tab");
expect(tool.description).toContain("fire-and-forget");
expect(tool.description.toLowerCase()).toContain("queued");
+ // Description must steer the model away from busy-waiting for a reply.
+ expect(tool.description.toLowerCase()).toContain("do not sleep");
+ expect(tool.description.toLowerCase()).toContain("end your turn");
+ });
+
+ it("mentions read_tab in the description only when canReadTab is true", () => {
+ const tool = createSendToTabTool(makeCallbacks({ canReadTab: true }));
+ expect(tool.description).toContain("read_tab");
+ });
+
+ it("never mentions read_tab in the description when canReadTab is false", () => {
+ const tool = createSendToTabTool(makeCallbacks({ canReadTab: false }));
+ expect(tool.description).not.toContain("read_tab");
+ // Still tells the agent a reply will wake it + to end its turn.
+ expect(tool.description.toLowerCase()).toContain("wake you with a new message");
+ expect(tool.description.toLowerCase()).toContain("end your turn");
});
});
@@ -35,11 +52,37 @@ describe("createSendToTabTool — execute()", () => {
expect(deliver).toHaveBeenCalledTimes(1);
const [targetId, delivered] = deliver.mock.calls[0] ?? [];
expect(targetId).toBe("target-id");
- // Provenance prefix names the sending tab's handle.
- expect(delivered).toContain("[message from tab self]");
+ // Provenance header names the sending tab's handle and marks it as a
+ // peer agent (not the recipient's own user).
+ expect(delivered).toContain("[message from tab self");
+ expect(delivered).toContain("another agent");
expect(delivered).toContain("hello there");
+ // Reply contract: the recipient must answer via send_to_tab back to the
+ // sender's handle, not as a plain text reply to its own user.
+ expect(delivered).toContain('send_to_tab tool with tab_id "self"');
+ expect(delivered).toContain("ONLY reply if");
expect(out).toContain("idle");
expect(out).toContain("targ");
+ // Sender is steered away from busy-waiting and told to end its turn.
+ expect(out.toLowerCase()).toContain("do not sleep");
+ expect(out.toLowerCase()).toContain("end your turn");
+ });
+
+ it("points the sender at read_tab in the result only when canReadTab is true", async () => {
+ const deliver = vi.fn(() => ({ status: "started" as const }));
+ const tool = createSendToTabTool(makeCallbacks({ deliver, canReadTab: true }));
+ const out = await tool.execute({ tab_id: "targ", message: "hi" });
+ expect(out).toContain("read_tab");
+ });
+
+ it("omits read_tab from the result when canReadTab is false", async () => {
+ const deliver = vi.fn(() => ({ status: "started" as const }));
+ const tool = createSendToTabTool(makeCallbacks({ deliver, canReadTab: false }));
+ const out = await tool.execute({ tab_id: "targ", message: "hi" });
+ expect(out).not.toContain("read_tab");
+ // Still steers away from busy-waiting and toward ending the turn.
+ expect(out.toLowerCase()).toContain("do not sleep");
+ expect(out.toLowerCase()).toContain("end your turn");
});
it("reports the queued status when the target is busy", async () => {
diff --git a/packages/core/tests/tools/summon.test.ts b/packages/core/tests/tools/summon.test.ts
index f59f345..4885a94 100644
--- a/packages/core/tests/tools/summon.test.ts
+++ b/packages/core/tests/tools/summon.test.ts
@@ -239,3 +239,111 @@ describe("createSummonTool — execute() argument forwarding", () => {
expect(getResult).toHaveBeenCalled();
});
});
+
+describe("createSummonTool — user-agent-only mode (perm_user_agent without perm_summon)", () => {
+ // userAgentEnabled=true, subagentEnabled=false → the tool spawns ONLY
+ // top-level user agents. `top_level` is implied (and forced), the
+ // subagent/parallel-work prose is dropped, and only the user-agent
+ // catalog group is shown.
+ const subagents: AvailableAgent[] = [
+ {
+ slug: "programmer",
+ name: "Programmer",
+ description: "Codes things",
+ path: "/agents/programmer.toml",
+ },
+ ];
+ const userAgents: AvailableAgent[] = [
+ {
+ slug: "default",
+ name: "Default",
+ description: "Default agent",
+ path: "/agents/default.toml",
+ },
+ ];
+
+ function userAgentOnlyTool(
+ spawn = vi.fn(async () => "ua-1"),
+ getResult = vi.fn(async () => ({ status: "done" as const, result: "nope" })),
+ ) {
+ return {
+ spawn,
+ getResult,
+ tool: createSummonTool(
+ "/tmp/work",
+ { spawn, getResult },
+ subagents,
+ userAgents,
+ ["/agents"],
+ true, // userAgentEnabled
+ false, // subagentEnabled
+ ),
+ };
+ }
+
+ it("describes spawning user agents and omits subagent/parallel-work prose", () => {
+ const { tool } = userAgentOnlyTool();
+ expect(tool.description).toContain("Spawn an independent top-level user agent");
+ expect(tool.description).toContain("fire-and-forget");
+ expect(tool.description).not.toContain("Pattern for parallel work");
+ expect(tool.description).not.toContain("Set background=true");
+ });
+
+ it("lists only the user-agent catalog group, not subagents", () => {
+ const { tool } = userAgentOnlyTool();
+ expect(tool.description).toContain("User agents (spawned as independent top-level tabs):");
+ expect(tool.description).toContain("default");
+ // Subagents must not be advertised in user-agent-only mode.
+ expect(tool.description).not.toContain("Subagents (spawned as child tabs):");
+ expect(tool.description).not.toContain("- programmer: Programmer");
+ });
+
+ it("only lists user-agent slugs in the 'agent' parameter description", () => {
+ const { tool } = userAgentOnlyTool();
+ const agentParam = (tool.parameters as unknown as { shape: { agent: { description: string } } })
+ .shape.agent;
+ expect(agentParam.description).toContain("default");
+ expect(agentParam.description).not.toContain("programmer");
+ });
+
+ it("omits the top_level parameter (it is implied)", () => {
+ const { tool } = userAgentOnlyTool();
+ const shape = (tool.parameters as unknown as { shape: Record<string, unknown> }).shape;
+ expect("top_level" in shape).toBe(false);
+ });
+
+ it("omits the background parameter (user agents are fire-and-forget)", () => {
+ const { tool } = userAgentOnlyTool();
+ const shape = (tool.parameters as unknown as { shape: Record<string, unknown> }).shape;
+ expect("background" in shape).toBe(false);
+ });
+
+ it("forces topLevel=true on spawn even when top_level is not passed", async () => {
+ const spawn = vi.fn(async () => "ua-99");
+ const getResult = vi.fn(async () => ({ status: "done" as const, result: "nope" }));
+ const { tool } = userAgentOnlyTool(spawn, getResult);
+ const out = await tool.execute({ task: "do stuff", agent: "default" });
+ expect(out).toContain("User agent spawned successfully");
+ expect(out).toContain("ua-99");
+ expect(out).toContain("fire-and-forget");
+ // Never blocks on a result for fire-and-forget user agents.
+ expect(getResult).not.toHaveBeenCalled();
+ const callArg = spawn.mock.calls[0]?.[0];
+ expect(callArg).toMatchObject({ topLevel: true, agentSlug: "default" });
+ });
+});
+
+describe("createSummonTool — subagentEnabled defaults preserve legacy behavior", () => {
+ it("defaults subagentEnabled=true so omitting it keeps subagent spawning", async () => {
+ const spawn = vi.fn(async () => "tab-1");
+ const getResult = vi.fn(async () => ({ status: "done" as const, result: "child" }));
+ // No userAgentEnabled/subagentEnabled args → legacy subagent-only mode.
+ const tool = createSummonTool("/tmp/work", { spawn, getResult }, [], []);
+ const out = await tool.execute({ task: "x", agent: "programmer" });
+ // Foreground subagent summon blocks and returns the child result.
+ expect(out).toBe("agent_id: tab-1\n\nchild");
+ expect(getResult).toHaveBeenCalled();
+ const callArg = spawn.mock.calls[0]?.[0];
+ expect(callArg).not.toHaveProperty("topLevel");
+ });
+});