diff options
| author | Adam Malczewski <[email protected]> | 2026-05-19 23:20:41 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-05-19 23:20:41 +0900 |
| commit | a38d5b1279db6f9de5228c173019fc2ac08daec3 (patch) | |
| tree | 32c3a535d0b74872ef952b4a44d4d5ba2ec9d638 | |
| parent | 0ae805b28b5160b8d9fb43635fa172961f6550cc (diff) | |
| download | dispatch-a38d5b1279db6f9de5228c173019fc2ac08daec3.tar.gz dispatch-a38d5b1279db6f9de5228c173019fc2ac08daec3.zip | |
feat: Phase 2 — shell permissions, tree-sitter analysis, permission UI
Permission engine:
- Rule-based engine: wildcard matching, last-match-wins, reject cascade
- PermissionService with pending/approved state, PermissionChecker interface
- dispatch.yaml config loader with per-permission pattern rules
Shell tool:
- run_shell tool with child_process spawn, timeout, streaming output
- Tree-sitter static analysis (web-tree-sitter + tree-sitter-bash WASM)
- BashArity command normalization for 'always allow' patterns
- FILE_COMMANDS set: rm, cp, mv, mkdir, ls, find, grep, cat, etc.
Agent loop refactored:
- Removed maxSteps, manual step loop with tool execution
- Permission checks on shell commands (external_directory only)
- Permission checks on file tools outside workspace boundary
- Symlink bypass fix (realpathSync), .. false positive fix
- Shell output streaming via Promise.race + setImmediate polling
API layer:
- PermissionManager wraps PermissionService, broadcasts via WebSocket
- WebSocket handles permission-reply messages from frontend
- Config loaded from dispatch.yaml, converted to ruleset
Frontend:
- Permission prompt modal (native dialog, focus trap, ARIA)
- Always-allow confirmation flow with pattern preview
- Shell output display (live streaming + final parsed result)
- Permission log panel (fixed bottom-right overlay)
- Exit code badge (green 0, red non-zero)
134 tests, typecheck clean on all 3 packages
35 files changed, 2223 insertions, 186 deletions
@@ -27,6 +27,8 @@ "dependencies": { "@ai-sdk/openai-compatible": "^0.2.0", "ai": "^4.0.0", + "tree-sitter-bash": "^0.25.1", + "web-tree-sitter": "^0.26.8", "zod": "^3.23.0", }, "devDependencies": { @@ -372,6 +374,10 @@ "nanoid": ["[email protected]", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ=="], + "node-addon-api": ["[email protected]", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="], + + "node-gyp-build": ["[email protected]", "", { "bin": { "node-gyp-build": "bin.js", "node-gyp-build-optional": "optional.js", "node-gyp-build-test": "build-test.js" } }, "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ=="], + "pathe": ["[email protected]", "", {}, "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w=="], "pathval": ["[email protected]", "", {}, "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ=="], @@ -426,6 +432,8 @@ "tinyspy": ["[email protected]", "", {}, "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q=="], + "tree-sitter-bash": ["[email protected]", "", { "dependencies": { "node-addon-api": "^8.2.1", "node-gyp-build": "^4.8.2" }, "peerDependencies": { "tree-sitter": "^0.25.0" }, "optionalPeers": ["tree-sitter"] }, "sha512-7hMytuYIMoXOq24yRulgIxthE9YmggZIOHCyPTTuJcu6EU54tYD+4G39cUb28kxC6jMf/AbPfWGLQtgPTdh3xw=="], + "typescript": ["[email protected]", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], "undici-types": ["[email protected]", "", {}, "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg=="], @@ -440,6 +448,8 @@ "vitest": ["[email protected]", "", { "dependencies": { "@types/chai": "^5.2.2", "@vitest/expect": "3.2.4", "@vitest/mocker": "3.2.4", "@vitest/pretty-format": "^3.2.4", "@vitest/runner": "3.2.4", "@vitest/snapshot": "3.2.4", "@vitest/spy": "3.2.4", "@vitest/utils": "3.2.4", "chai": "^5.2.0", "debug": "^4.4.1", "expect-type": "^1.2.1", "magic-string": "^0.30.17", "pathe": "^2.0.3", "picomatch": "^4.0.2", "std-env": "^3.9.0", "tinybench": "^2.9.0", "tinyexec": "^0.3.2", "tinyglobby": "^0.2.14", "tinypool": "^1.1.1", "tinyrainbow": "^2.0.0", "vite": "^5.0.0 || ^6.0.0 || ^7.0.0-0", "vite-node": "3.2.4", "why-is-node-running": "^2.3.0" }, "peerDependencies": { "@edge-runtime/vm": "*", "@types/debug": "^4.1.12", "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", "@vitest/browser": "3.2.4", "@vitest/ui": "3.2.4", "happy-dom": "*", "jsdom": "*" }, "optionalPeers": ["@edge-runtime/vm", "@types/debug", "@types/node", "@vitest/browser", "@vitest/ui", "happy-dom", "jsdom"], "bin": { "vitest": "vitest.mjs" } }, "sha512-LUCP5ev3GURDysTWiP47wRRUpLKMOfPh+yKTx3kVIEiu5KOMeqzpnYNsKyOoVrULivR8tLcks4+lga33Whn90A=="], + "web-tree-sitter": ["[email protected]", "", {}, "sha512-4sUwi7ZyOrIk5KLgYLkc2A/F0LFMQnBhfb+2Cdl7ik4ePJ6JD+fk4ofI2sA5eGawBKBaK4Vntt7Ww5KcEsay4A=="], + "why-is-node-running": ["[email protected]", "", { "dependencies": { "siginfo": "^2.0.0", "stackback": "0.0.2" }, "bin": { "why-is-node-running": "cli.js" } }, "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w=="], "zimmerframe": ["[email protected]", "", {}, "sha512-B58NGBEoc8Y9MWWCQGl/gq9xBCe4IiKM0a2x7GZdQKOW5Exr8S1W24J6OgM1njK8xCRGvAJIL/MxXHf6SkmQKQ=="], diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index 45e195c..817ddbf 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -4,14 +4,19 @@ import { type AgentStatus, createListFilesTool, createReadFileTool, + createRunShellTool, createWriteFileTool, + loadConfig, + configToRuleset, } from "@dispatch/core"; +import type { PermissionManager } from "./permission-manager.js"; const SYSTEM_PROMPT = `You are Dispatch, a helpful AI coding assistant. You have access to the following tools for working with files in the current working directory: - read_file: Read the contents of a file - write_file: Write content to a file (creates parent directories if needed) - list_files: List files and directories +- run_shell: Execute shell commands in the working directory (bash). Returns stdout, stderr, and exit code. Use for running tests, builds, git operations, package management, and other development tasks. Do NOT run destructive or irreversible commands unless the user explicitly requests them. When asked to work with files, use these tools. Always confirm what you did after completing an action. Be concise and helpful.`; @@ -20,6 +25,15 @@ export class AgentManager { private status: AgentStatus = "idle"; private messageCount = 0; private eventListeners: Set<(event: AgentEvent) => void> = new Set(); + private permissionManager: PermissionManager | undefined; + + constructor(permissionManager?: PermissionManager) { + this.permissionManager = permissionManager; + } + + getPermissionManager(): PermissionManager | undefined { + return this.permissionManager; + } private getOrCreateAgent(): Agent { if (!this.agent) { @@ -31,8 +45,12 @@ export class AgentManager { createReadFileTool(workingDirectory), createWriteFileTool(workingDirectory), createListFilesTool(workingDirectory), + createRunShellTool(workingDirectory), ]; + const config = loadConfig(workingDirectory); + const ruleset = configToRuleset(config); + this.agent = new Agent({ model, apiKey, @@ -40,6 +58,8 @@ export class AgentManager { systemPrompt: SYSTEM_PROMPT, tools, workingDirectory, + permissionChecker: this.permissionManager ?? undefined, + ruleset, }); } return this.agent; diff --git a/packages/api/src/app.ts b/packages/api/src/app.ts index 9c31eab..b612c6a 100644 --- a/packages/api/src/app.ts +++ b/packages/api/src/app.ts @@ -1,8 +1,10 @@ import { Hono } from "hono"; import { cors } from "hono/cors"; import { AgentManager } from "./agent-manager.js"; +import { PermissionManager } from "./permission-manager.js"; -export const agentManager = new AgentManager(); +export const permissionManager = new PermissionManager(); +export const agentManager = new AgentManager(permissionManager); export const app = new Hono(); diff --git a/packages/api/src/index.ts b/packages/api/src/index.ts index 02b04b6..bf0f7f9 100644 --- a/packages/api/src/index.ts +++ b/packages/api/src/index.ts @@ -1,27 +1,63 @@ import { createBunWebSocket } from "hono/bun"; -import { agentManager, app } from "./app.js"; +import { agentManager, app, permissionManager } from "./app.js"; +import type { PermissionReply } from "@dispatch/core"; const { upgradeWebSocket, websocket } = createBunWebSocket(); +let clientIdCounter = 0; + app.get( "/ws", upgradeWebSocket((_c) => { + const clientId = String(++clientIdCounter); + return { onOpen(_event, ws) { // Send current status immediately ws.send(JSON.stringify({ type: "status", status: agentManager.getStatus() })); + // Send any pending permission prompts + const pending = permissionManager.getPending(); + if (pending.length > 0) { + ws.send(JSON.stringify({ type: "permission-prompt", pending })); + } + const unsubscribe = agentManager.onEvent((event) => { ws.send(JSON.stringify(event)); }); - // Store unsubscribe fn on the raw socket for cleanup - (ws as unknown as { _unsub?: () => void })._unsub = unsubscribe; + permissionManager.registerClient(clientId, (data) => { + ws.send(JSON.stringify(data)); + }); + + // Store cleanup on the raw socket + (ws as unknown as { _unsub?: () => void; _clientId?: string })._unsub = unsubscribe; + (ws as unknown as { _unsub?: () => void; _clientId?: string })._clientId = clientId; + }, + onMessage(event, _ws) { + try { + const message = JSON.parse(String(event.data)) as { + type?: string; + id?: string; + reply?: string; + }; + if (message.type === "permission-reply" && typeof message.id === "string" && typeof message.reply === "string") { + const validReplies: PermissionReply[] = ["once", "always", "reject"]; + if (validReplies.includes(message.reply as PermissionReply)) { + permissionManager.reply(message.id, message.reply as PermissionReply); + } + } + } catch { + // ignore malformed messages + } }, onClose(_event, ws) { - const unsub = (ws as unknown as { _unsub?: () => void })._unsub; - if (unsub) { - unsub(); + const raw = ws as unknown as { _unsub?: () => void; _clientId?: string }; + if (raw._unsub) { + raw._unsub(); + } + if (raw._clientId) { + permissionManager.unregisterClient(raw._clientId); } }, }; diff --git a/packages/api/src/permission-manager.ts b/packages/api/src/permission-manager.ts new file mode 100644 index 0000000..6a04d3d --- /dev/null +++ b/packages/api/src/permission-manager.ts @@ -0,0 +1,48 @@ +import { + PermissionService, + type PermissionReply, + type PermissionRequest, + type Ruleset, +} from "@dispatch/core"; + +export class PermissionManager { + private service = new PermissionService(); + private wsClients: Map<string, (data: unknown) => void> = new Map(); + + registerClient(id: string, send: (data: unknown) => void): void { + this.wsClients.set(id, send); + } + + unregisterClient(id: string): void { + this.wsClients.delete(id); + } + + private broadcastPending(pending: Array<{ id: string; request: PermissionRequest }>): void { + const message = { + type: "permission-prompt", + pending: pending.map((p) => ({ id: p.id, ...p.request })), + }; + for (const send of this.wsClients.values()) { + send(message); + } + } + + async ask(request: PermissionRequest, rulesets: Ruleset[] = []): Promise<PermissionReply> { + const promise = this.service.ask(request, rulesets); + this.broadcastPending(this.service.getPending()); + return promise; + } + + reply(id: string, reply: PermissionReply): void { + this.service.reply(id, reply); + this.broadcastPending(this.service.getPending()); + } + + getPending(): Array<{ id: string; request: PermissionRequest }> { + return this.service.getPending(); + } + + getService(): PermissionService { + return this.service; + } +} diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index 56cb818..2111b0e 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -18,6 +18,15 @@ vi.mock("@dispatch/core", () => ({ yield { type: "status", status: "idle" } as const; } }, + PermissionService: class MockPermissionService { + ask(_request: unknown, _rulesets: unknown[]) { + return Promise.resolve("once"); + } + reply(_id: string, _reply: unknown) {} + getPending() { + return []; + } + }, createReadFileTool(_wd: string): ToolDefinition { return { name: "read_file", @@ -42,6 +51,20 @@ vi.mock("@dispatch/core", () => ({ execute: async () => ["file1.ts"], }; }, + createRunShellTool(_wd: string): ToolDefinition { + return { + name: "run_shell", + description: "run shell command", + parameters: { _type: "z.ZodObject", shape: {} } as unknown as ToolDefinition["parameters"], + execute: async () => ({ stdout: "", stderr: "", exitCode: 0 }), + }; + }, + loadConfig(_dir: string) { + return { permissions: {} }; + }, + configToRuleset(_config: unknown) { + return []; + }, })); // Import after mock is defined (Vitest hoists vi.mock automatically) diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts index 9f852ee..87ff436 100644 --- a/packages/api/tests/routes.test.ts +++ b/packages/api/tests/routes.test.ts @@ -19,6 +19,15 @@ vi.mock("@dispatch/core", () => ({ yield { type: "status", status: "idle" } as const; } }, + PermissionService: class MockPermissionService { + ask(_request: unknown, _rulesets: unknown[]) { + return Promise.resolve("once"); + } + reply(_id: string, _reply: unknown) {} + getPending() { + return []; + } + }, createReadFileTool(_wd: string): ToolDefinition { return { name: "read_file", @@ -43,6 +52,20 @@ vi.mock("@dispatch/core", () => ({ execute: async () => ["file1.ts"], }; }, + createRunShellTool(_wd: string): ToolDefinition { + return { + name: "run_shell", + description: "run shell command", + parameters: { _type: "z.ZodObject", shape: {} } as unknown as ToolDefinition["parameters"], + execute: async () => ({ stdout: "", stderr: "", exitCode: 0 }), + }; + }, + loadConfig(_dir: string) { + return { permissions: {} }; + }, + configToRuleset(_config: unknown) { + return []; + }, })); const { app } = await import("../src/app.js"); diff --git a/packages/core/package.json b/packages/core/package.json index 3741041..66c8726 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -11,8 +11,10 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "ai": "^4.0.0", "@ai-sdk/openai-compatible": "^0.2.0", + "ai": "^4.0.0", + "tree-sitter-bash": "^0.25.1", + "web-tree-sitter": "^0.26.8", "zod": "^3.23.0" }, "devDependencies": { diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 8e89bb9..2fd6746 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -1,7 +1,10 @@ import type { CoreMessage } from "ai"; import { streamText } from "ai"; +import { dirname, isAbsolute, relative, resolve } from "node:path"; +import { realpathSync } from "node:fs"; import { createProvider } from "../llm/provider.js"; import { createToolRegistry } from "../tools/registry.js"; +import { analyzeCommand } from "../tools/shell-analyze.js"; import type { AgentConfig, AgentEvent, @@ -23,7 +26,7 @@ function toCoreMessages(messages: ChatMessage[]): CoreMessage[] { } result.push({ role: "assistant", content: parts }); for (const tr of msg.toolResults ?? []) { - result.push({ role: "tool", content: [{ type: "tool-result", toolCallId: tr.toolCallId, toolName: "", result: tr.result }] }); + result.push({ role: "tool", content: [{ type: "tool-result", toolCallId: tr.toolCallId, toolName: tr.toolName, result: tr.result }] }); } } } @@ -51,6 +54,8 @@ function formatError(err: unknown, config: AgentConfig): string { return `${String(err)} ${context}`; } +const MAX_STEPS = 10; + export class Agent { status: AgentStatus = "idle"; messages: ChatMessage[] = []; @@ -61,6 +66,131 @@ export class Agent { this.config = config; } + private async executeToolWithStreaming( + tc: ToolCall, + shellOutputQueue: Array<{ data: string; stream: "stdout" | "stderr" }>, + ): Promise<ToolResult> { + const registry = createToolRegistry(this.config.tools); + const tool = registry.getTool(tc.name); + if (!tool) { + return { toolCallId: tc.id, toolName: tc.name, result: `Unknown tool: ${tc.name}`, isError: true }; + } + + // Permission check for shell commands — only prompt for external directory access. + // Commands that stay within the working directory are auto-allowed. + if (tc.name === "run_shell" && this.config.permissionChecker) { + const command = typeof tc.arguments.command === "string" ? tc.arguments.command : ""; + const workingDirectory = this.config.workingDirectory; + const analysis = await analyzeCommand(command, workingDirectory); + const ruleset = this.config.ruleset ?? []; + + // Check for external directory access from shell command + if (analysis.dirs.length > 0) { + const dirRequest = { + permission: "external_directory", + patterns: analysis.dirs.map((d) => `${d}/*`), + always: analysis.dirs.map((d) => `${d}/*`), + description: `Shell command accesses external directory: ${analysis.dirs.join(", ")}`, + metadata: { command, dirs: analysis.dirs }, + }; + try { + const dirReply = await this.config.permissionChecker.ask(dirRequest, [ruleset]); + if (dirReply === "reject") { + return { + toolCallId: tc.id, + toolName: tc.name, + result: `Permission denied: access to external directories rejected`, + isError: true, + }; + } + } catch { + return { + toolCallId: tc.id, + toolName: tc.name, + result: `Permission denied: external directory access not allowed`, + isError: true, + }; + } + } + } + + // Permission check for file tools accessing paths outside workspace + if ( + this.config.permissionChecker && + (tc.name === "read_file" || tc.name === "write_file" || tc.name === "list_files") + ) { + const pathArg = typeof tc.arguments.path === "string" ? tc.arguments.path : "."; + let resolvedPath: string; + try { + resolvedPath = realpathSync(resolve(this.config.workingDirectory, pathArg)); + } catch { + // Path doesn't exist yet (e.g. write_file creating a new file) — fall back + resolvedPath = resolve(this.config.workingDirectory, pathArg); + } + + // Check if outside workspace + const rel = relative(this.config.workingDirectory, resolvedPath); + const isOutside = rel.startsWith("../") || rel.startsWith("..\\") || rel === ".." || isAbsolute(rel); + + if (isOutside) { + const permissionType = + tc.name === "read_file" ? "read" : tc.name === "write_file" ? "edit" : "list"; + + const parentDir = dirname(resolvedPath); + const request = { + permission: "external_directory", + patterns: [`${parentDir}/*`], + always: [`${parentDir}/*`], + description: `${permissionType} file outside workspace: ${resolvedPath}`, + metadata: { filepath: resolvedPath, parentDir, operation: permissionType }, + }; + + const ruleset = this.config.ruleset ?? []; + try { + const reply = await this.config.permissionChecker.ask(request, [ruleset]); + if (reply === "reject") { + return { + toolCallId: tc.id, + toolName: tc.name, + result: `Permission denied: ${permissionType} access to ${resolvedPath} rejected`, + isError: true, + }; + } + } catch { + return { + toolCallId: tc.id, + toolName: tc.name, + result: `Permission denied: ${permissionType} access to ${resolvedPath} not allowed`, + isError: true, + }; + } + } + } + + try { + const execPromise = tool.execute(tc.arguments, { + onOutput: (data: string, stream: "stdout" | "stderr") => { + shellOutputQueue.push({ data, stream }); + }, + }); + + const rawResult = await execPromise; + return { + toolCallId: tc.id, + toolName: tc.name, + result: typeof rawResult === "string" ? rawResult : JSON.stringify(rawResult), + isError: false, + }; + } catch (err) { + return { + toolCallId: tc.id, + toolName: tc.name, + result: err instanceof Error ? err.message : String(err), + isError: true, + }; + } + } + async *run(userMessage: string): AsyncGenerator<AgentEvent> { this.status = "running"; yield { type: "status", status: "running" }; @@ -74,70 +204,132 @@ export class Agent { }); try { - const result = streamText({ - model: providerFactory(this.config.model), - system: this.config.systemPrompt, - messages: toCoreMessages(this.messages), - tools: registry.getAISDKTools(), - maxSteps: 10, - providerOptions: { - openaiCompatible: { reasoningEffort: "max" }, - }, - }); + // Track the final assistant message across all steps + let finalText = ""; + const allToolCalls: ToolCall[] = []; + const allToolResults: ToolResult[] = []; - let fullText = ""; - const toolCalls: ToolCall[] = []; - const toolResults: ToolResult[] = []; - - for await (const event of result.fullStream) { - if (event.type === "text-delta") { - fullText += event.textDelta; - yield { type: "text-delta", delta: event.textDelta }; - } else if (event.type === "reasoning") { - yield { type: "reasoning-delta", delta: event.textDelta }; - } else if (event.type === "tool-call") { - const toolCall: ToolCall = { - id: event.toolCallId, - name: event.toolName, - arguments: event.args as Record<string, unknown>, - }; - toolCalls.push(toolCall); - yield { type: "tool-call", toolCall }; - } else if (event.type === "error") { - const errorMsg = formatError(event.error, this.config); - yield { type: "error", error: errorMsg }; - this.status = "error"; - yield { type: "status", status: "error" }; - return; + // We build up a local message list for multi-turn within one run() call + // that includes tool results fed back to the LLM + const stepMessages: ChatMessage[] = [...this.messages]; + + for (let step = 0; step < MAX_STEPS; step++) { + const result = streamText({ + model: providerFactory(this.config.model), + system: this.config.systemPrompt, + messages: toCoreMessages(stepMessages), + tools: registry.getAISDKTools(), + providerOptions: { + openaiCompatible: { reasoningEffort: "max" }, + }, + }); + + let stepText = ""; + const stepToolCalls: ToolCall[] = []; + + for await (const event of result.fullStream) { + if (event.type === "text-delta") { + stepText += event.textDelta; + finalText += event.textDelta; + yield { type: "text-delta", delta: event.textDelta }; + } else if (event.type === "reasoning") { + yield { type: "reasoning-delta", delta: event.textDelta }; + } else if (event.type === "tool-call") { + const toolCall: ToolCall = { + id: event.toolCallId, + name: event.toolName, + arguments: event.args as Record<string, unknown>, + }; + stepToolCalls.push(toolCall); + allToolCalls.push(toolCall); + yield { type: "tool-call", toolCall }; + } else if (event.type === "error") { + const errorMsg = formatError(event.error, this.config); + yield { type: "error", error: errorMsg }; + this.status = "error"; + yield { type: "status", status: "error" }; + return; + } + } + + // No tool calls means the agent is done + if (stepToolCalls.length === 0) { + // Add the final assistant message to step messages (for history) + stepMessages.push({ + role: "assistant", + content: stepText, + }); + break; + } + + // Add assistant message with tool calls to step messages + stepMessages.push({ + role: "assistant", + content: stepText, + toolCalls: stepToolCalls, + }); + + // Execute tool calls manually + const stepToolResults: ToolResult[] = []; + for (const tc of stepToolCalls) { + const shellOutputQueue: Array<{ data: string; stream: "stdout" | "stderr" }> = []; + + const execPromise = this.executeToolWithStreaming(tc, shellOutputQueue); + + // Poll for shell output while the tool is running, using Promise.race + // so we can yield shell-output events as they arrive rather than buffering + // them all until tool completion. + let toolResult: ToolResult | undefined; + while (toolResult === undefined) { + if (shellOutputQueue.length > 0) { + const item = shellOutputQueue.shift()!; + yield { type: "shell-output", data: item.data, stream: item.stream }; + continue; + } + const raceResult = await Promise.race([ + execPromise.then((r) => ({ done: true as const, value: r })), + new Promise<{ done: false }>((resolve) => setImmediate(() => resolve({ done: false }))), + ]); + if (raceResult.done) { + toolResult = raceResult.value; + } + } + + // Drain any remaining shell output emitted before we read the result + while (shellOutputQueue.length > 0) { + const item = shellOutputQueue.shift()!; + yield { type: "shell-output", data: item.data, stream: item.stream }; + } + + stepToolResults.push(toolResult); + allToolResults.push(toolResult); + yield { type: "tool-result", toolResult }; + } + + // Add tool results back to step messages so LLM can see them + // We append them to the last assistant message's toolResults + const lastMsg = stepMessages[stepMessages.length - 1]; + if (lastMsg) { + lastMsg.toolResults = stepToolResults; } } - // Tool results are available from completed steps after streaming. - // The generic TOOLS type resolves to never[] at compile time, so - // we cast through unknown to access the runtime shape. - const steps = await result.steps; - for (const step of steps) { - const stepToolResults = step.toolResults as unknown as Array<{ - toolCallId: string; - result: unknown; - isError?: boolean; - }>; - for (const tr of stepToolResults) { - const toolResult: ToolResult = { - toolCallId: tr.toolCallId, - result: typeof tr.result === "string" ? tr.result : JSON.stringify(tr.result), - isError: tr.isError ?? false, + // If we exhausted MAX_STEPS and there were pending tool calls, surface an error + if (stepMessages.length > 0) { + const lastMsg = stepMessages[stepMessages.length - 1]; + if (lastMsg?.toolCalls && lastMsg.toolCalls.length > 0 && !lastMsg.toolResults) { + yield { + type: "error", + error: `Agent reached MAX_STEPS (${MAX_STEPS}) with unresolved tool calls`, }; - toolResults.push(toolResult); - yield { type: "tool-result", toolResult }; } } const assistantMessage: ChatMessage = { role: "assistant", - content: fullText, - toolCalls: toolCalls.length > 0 ? toolCalls : undefined, - toolResults: toolResults.length > 0 ? toolResults : undefined, + content: finalText, + toolCalls: allToolCalls.length > 0 ? allToolCalls : undefined, + toolResults: allToolResults.length > 0 ? allToolResults : undefined, }; this.messages.push(assistantMessage); yield { type: "done", message: assistantMessage }; diff --git a/packages/core/src/config/loader.ts b/packages/core/src/config/loader.ts new file mode 100644 index 0000000..0e58ad2 --- /dev/null +++ b/packages/core/src/config/loader.ts @@ -0,0 +1,138 @@ +import { readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { join } from "node:path"; +import type { PermissionRule, Ruleset } from "../permission/index.js"; + +// Strip inline comments that appear outside of quoted strings. +// Handles both single- and double-quoted values. +function stripInlineComment(line: string): string { + // Walk character by character; track whether we're inside quotes. + let inQuote: string | null = null; + for (let i = 0; i < line.length; i++) { + const ch = line[i]; + if (inQuote) { + if (ch === inQuote) inQuote = null; + } else if (ch === '"' || ch === "'") { + inQuote = ch; + } else if (ch === "#") { + return line.slice(0, i).trimEnd(); + } + } + return line; +} + +const VALID_ACTIONS = new Set(["allow", "deny", "ask"]); + +function validateAction(raw: string): "allow" | "deny" | "ask" { + if (VALID_ACTIONS.has(raw)) return raw as "allow" | "deny" | "ask"; + console.warn(`dispatch: unrecognized action "${raw}", defaulting to "ask"`); + return "ask"; +} + +export interface DispatchConfig { + permissions: Record<string, string | Record<string, string>>; +} + +// Load dispatch.yaml from the given directory +export function loadConfig(dir: string): DispatchConfig { + const yamlPath = join(dir, "dispatch.yaml"); + try { + const content = readFileSync(yamlPath, "utf-8"); + return parseYaml(content); + } catch { + return { permissions: {} }; + } +} + +function expandHome(value: string): string { + const home = homedir(); + return value.replace(/^\$HOME(?=[\/\\]|$)/, home).replace(/^~(?=[\/\\]|$)/, home); +} + +function stripQuotes(s: string): string { + if ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) { + return s.slice(1, -1); + } + return s; +} + +// Parse simple YAML for the permissions structure +function parseYaml(content: string): DispatchConfig { + const permissions: Record<string, string | Record<string, string>> = {}; + const lines = content.split("\n"); + + let inPermissions = false; + let currentKey: string | null = null; + + for (const raw of lines) { + // Skip comments and blank lines + const trimmed = raw.trimEnd(); + const stripped = trimmed.trimStart(); + if (stripped === "" || stripped.startsWith("#")) continue; + + const indent = trimmed.length - stripped.length; + + if (indent === 0) { + // Top-level key + inPermissions = stripped.startsWith("permissions:"); + currentKey = null; + continue; + } + + if (!inPermissions) continue; + + if (indent === 2) { + // permission key line: " key: value" or " key:" + const colonIdx = stripped.indexOf(":"); + if (colonIdx === -1) continue; + const key = stripQuotes(stripped.slice(0, colonIdx).trim()); + const valueRaw = stripInlineComment(stripped.slice(colonIdx + 1).trim()).trim(); + if (valueRaw === "" || valueRaw === null) { + // nested map follows + currentKey = key; + permissions[currentKey] = {}; + } else { + // inline value + const value = stripQuotes(valueRaw); + permissions[key] = value; + currentKey = null; + } + continue; + } + + if (indent >= 4 && currentKey !== null) { + // sub-key line: " "pattern": action" + const colonIdx = stripped.indexOf(":"); + if (colonIdx === -1) continue; + const pattern = expandHome(stripQuotes(stripped.slice(0, colonIdx).trim())); + const actionRaw = stripQuotes(stripInlineComment(stripped.slice(colonIdx + 1).trim()).trim()); + const action = validateAction(actionRaw); + (permissions[currentKey] as Record<string, string>)[pattern] = action; + } + } + + return { permissions }; +} + +// Convert the config's permission block to a Ruleset +export function configToRuleset(config: DispatchConfig): Ruleset { + const home = homedir(); + const rules: PermissionRule[] = []; + + for (const [permission, value] of Object.entries(config.permissions)) { + if (typeof value === "string") { + const action = validateAction(value); + rules.push({ permission, pattern: "*", action }); + } else { + for (const [rawPattern, rawAction] of Object.entries(value)) { + const pattern = rawPattern + .replace(/^\$HOME(?=[\/\\]|$)/, home) + .replace(/^~(?=[\/\\]|$)/, home); + const action = validateAction(rawAction); + rules.push({ permission, pattern, action }); + } + } + } + + return rules; +} diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index b3907fa..233cb10 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -3,7 +3,13 @@ export { Agent } from "./agent/agent.js"; export { createProvider } from "./llm/provider.js"; export { createListFilesTool } from "./tools/list-files.js"; +export { createRunShellTool } from "./tools/run-shell.js"; +export { analyzeCommand } from "./tools/shell-analyze.js"; +export { prefix as bashArityPrefix } from "./tools/bash-arity.js"; export { createReadFileTool } from "./tools/read-file.js"; export { createToolRegistry } from "./tools/registry.js"; export { createWriteFileTool } from "./tools/write-file.js"; export * from "./types/index.js"; +export * from "./permission/index.js"; +export { loadConfig, configToRuleset } from "./config/loader.js"; +export type { DispatchConfig } from "./config/loader.js"; diff --git a/packages/core/src/permission/evaluate.ts b/packages/core/src/permission/evaluate.ts new file mode 100644 index 0000000..7a0d235 --- /dev/null +++ b/packages/core/src/permission/evaluate.ts @@ -0,0 +1,15 @@ +import type { PermissionRule, Ruleset } from "./index.js"; +import { Wildcard } from "./wildcard.js"; + +export function evaluate( + permission: string, + pattern: string, + ...rulesets: Ruleset[] +): PermissionRule { + const flat = ([] as PermissionRule[]).concat(...rulesets); + const match = flat.findLast( + (rule) => Wildcard.match(rule.permission, permission) && Wildcard.match(rule.pattern, pattern), + ); + if (match) return match; + return { action: "ask", permission, pattern }; +} diff --git a/packages/core/src/permission/index.ts b/packages/core/src/permission/index.ts new file mode 100644 index 0000000..bf6efaf --- /dev/null +++ b/packages/core/src/permission/index.ts @@ -0,0 +1,26 @@ +export interface PermissionRule { + permission: string; + pattern: string; + action: "allow" | "deny" | "ask"; +} + +export type Ruleset = PermissionRule[]; + +export type PermissionReply = "once" | "always" | "reject"; + +export interface PermissionRequest { + permission: string; + patterns: string[]; + always: string[]; + description: string; + metadata: Record<string, unknown>; +} + +export interface PermissionChecker { + ask(request: PermissionRequest, rulesets: Ruleset[]): Promise<PermissionReply>; + getPending(): Array<{ id: string; request: PermissionRequest }>; +} + +export { evaluate } from "./evaluate.js"; +export { PermissionService } from "./service.js"; +export { Wildcard } from "./wildcard.js"; diff --git a/packages/core/src/permission/service.ts b/packages/core/src/permission/service.ts new file mode 100644 index 0000000..1c8c6f0 --- /dev/null +++ b/packages/core/src/permission/service.ts @@ -0,0 +1,76 @@ +import { evaluate } from "./evaluate.js"; +import type { PermissionReply, PermissionRequest, PermissionRule, Ruleset } from "./index.js"; + +export class PermissionService { + private pending: Map< + string, + { + request: PermissionRequest; + resolve: (reply: PermissionReply) => void; + reject: (err: Error) => void; + } + > = new Map(); + private approved: Ruleset = []; + private idCounter = 0; + + approve(rules: PermissionRule[]): void { + this.approved.push(...rules); + } + + async ask(request: PermissionRequest, rulesets: Ruleset[]): Promise<PermissionReply> { + // Evaluate against all rulesets + approved (approved overrides) for ALL patterns + const patterns = request.always.length > 0 ? request.always : ["*"]; + const results = patterns.map((pattern) => + evaluate(request.permission, pattern, ...rulesets, this.approved), + ); + + // Any deny → deny + const denied = results.find((r) => r.action === "deny"); + if (denied) { + throw new Error(`Permission denied: ${request.permission} ${denied.pattern}`); + } + + // All allow → allow + if (results.every((r) => r.action === "allow")) { + return "once"; + } + + // action === "ask" — create a pending request + const id = String(++this.idCounter); + return new Promise<PermissionReply>((resolve, reject) => { + this.pending.set(id, { request, resolve, reject }); + }); + } + + reply(id: string, reply: PermissionReply): void { + if (reply === "reject") { + // Reject cascade — reject all pending + for (const [pendingId, entry] of this.pending) { + entry.reject(new Error(`Permission rejected: ${entry.request.permission}`)); + this.pending.delete(pendingId); + } + return; + } + + this.resolvePending(id, reply); + } + + getPending(): Array<{ id: string; request: PermissionRequest }> { + return Array.from(this.pending.entries()).map(([id, { request }]) => ({ id, request })); + } + + private resolvePending(id: string, reply: PermissionReply): void { + const entry = this.pending.get(id); + if (!entry) return; + + if (reply === "always") { + // Add rules for each pattern in request.patterns + for (const pattern of entry.request.patterns) { + this.approved.push({ permission: entry.request.permission, pattern, action: "allow" }); + } + } + + entry.resolve(reply); + this.pending.delete(id); + } +} diff --git a/packages/core/src/permission/wildcard.ts b/packages/core/src/permission/wildcard.ts new file mode 100644 index 0000000..7874680 --- /dev/null +++ b/packages/core/src/permission/wildcard.ts @@ -0,0 +1,10 @@ +export const Wildcard = { + match(pattern: string, value: string): boolean { + // Escape regex special chars except * and ? + const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&"); + // Convert wildcards + const regexStr = escaped.replace(/\*/g, ".*").replace(/\?/g, "."); + const regex = new RegExp(`^${regexStr}$`, "s"); + return regex.test(value); + }, +}; diff --git a/packages/core/src/tools/bash-arity.ts b/packages/core/src/tools/bash-arity.ts new file mode 100644 index 0000000..5dde955 --- /dev/null +++ b/packages/core/src/tools/bash-arity.ts @@ -0,0 +1,48 @@ +// Hardcoded dictionary of well-known commands and their arity +// (number of tokens that form the "human-understandable" prefix) +const ARITY: Record<string, number> = { + "git": 2, // "git checkout", "git commit", etc. + "npm": 3, // "npm run dev", "npm install -g" + "docker": 2, // "docker compose", "docker build" + "kubectl": 2, // "kubectl get", "kubectl apply" + "bun": 2, // "bun install", "bun test" + "cargo": 2, // "cargo build", "cargo test" + "go": 2, // "go build", "go test" + "python": 2, // "python -m", "python script.py" + "python3": 2, + "pip": 2, + "pip3": 2, + "brew": 2, + "apt": 2, + "apt-get": 2, + "dnf": 2, + "yum": 2, + "pacman": 2, + "systemctl": 2, + "journalctl": 2, + "ssh": 2, + "scp": 2, + "rsync": 2, + "curl": 2, // "curl -X", "curl https://" + "wget": 2, + "tar": 2, // "tar -xzf", "tar -czf" + "zip": 2, + "unzip": 2, + "chown": 2, + "chmod": 2, + "mount": 2, + "umount": 2, + // Default: all other commands are arity 1 +}; + +// Get the normalized prefix for a list of tokens +export function prefix(tokens: string[]): string[] { + if (tokens.length === 0) return []; + const first = tokens[0]; + if (!first) return []; + const arity = ARITY[first.toLowerCase()]; + if (arity === undefined) { + return [first]; + } + return tokens.slice(0, arity); +} diff --git a/packages/core/src/tools/run-shell.ts b/packages/core/src/tools/run-shell.ts new file mode 100644 index 0000000..d549316 --- /dev/null +++ b/packages/core/src/tools/run-shell.ts @@ -0,0 +1,74 @@ +import { spawn } from "node:child_process"; +import { z } from "zod"; +import type { ToolDefinition, ToolExecuteContext } from "../types/index.js"; + +const DEFAULT_TIMEOUT = 2 * 60 * 1000; // 2 minutes + +export function createRunShellTool(workingDirectory: string): ToolDefinition { + return { + name: "run_shell", + description: + "Execute a shell command in the working directory. Returns stdout, stderr, and exit code. Use for running tests, builds, git operations, package management, and other development tasks.", + parameters: z.object({ + command: z.string().describe("The shell command to execute"), + timeout: z + .number() + .optional() + .describe("Timeout in milliseconds (default 2 minutes)"), + }), + execute: async (args: Record<string, unknown>, context?: ToolExecuteContext): Promise<string> => { + const command = args.command as string; + const timeout = (args.timeout as number | undefined) ?? DEFAULT_TIMEOUT; + + const [shell, shellArgs] = getShell(); + // NOTE (MVP limitation): `spawn` timeout sends SIGTERM only to the shell + // process itself, not to any child processes it may have spawned. If the + // command forks sub-processes they will continue running after timeout. + // A full fix would require spawning with `detached: true` and killing the + // entire process group (process.kill(-child.pid, "SIGTERM")). + const child = spawn(shell, [...shellArgs, command], { + cwd: workingDirectory, + env: process.env, + timeout, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + + const result = await new Promise<{ + stdout: string; + stderr: string; + exitCode: number; + error?: string; + }>((resolve) => { + child.stdout?.on("data", (data: Buffer) => { + const chunk = data.toString(); + stdout += chunk; + context?.onOutput?.(chunk, "stdout"); + }); + child.stderr?.on("data", (data: Buffer) => { + const chunk = data.toString(); + stderr += chunk; + context?.onOutput?.(chunk, "stderr"); + }); + + child.on("close", (exitCode) => { + resolve({ stdout, stderr, exitCode: exitCode ?? 1 }); + }); + + child.on("error", (err) => { + resolve({ stdout, stderr, exitCode: 1, error: err.message }); + }); + }); + + return JSON.stringify(result); + }, + }; +} + +function getShell(): [string, string[]] { + return process.platform === "win32" + ? ["powershell", ["-Command"]] + : ["bash", ["-c"]]; +} diff --git a/packages/core/src/tools/shell-analyze.ts b/packages/core/src/tools/shell-analyze.ts new file mode 100644 index 0000000..23bbfc9 --- /dev/null +++ b/packages/core/src/tools/shell-analyze.ts @@ -0,0 +1,147 @@ +import { dirname, isAbsolute, relative, resolve, sep } from "node:path"; +import { readFile } from "node:fs/promises"; +import { createRequire } from "node:module"; +import * as BashArity from "./bash-arity.js"; + +// Commands that touch files — triggers external_directory check. +// Includes any command that takes file paths as arguments and could leak +// information about external directories. +// +// Known gaps (not currently checked): +// - Redirections: `echo x > /etc/file` — the redirect target is not inspected +// - `cd` state changes: we don't track cwd mutations across pipeline stages +// - Interpreter escapes: `python -c "open('/etc/passwd')"`, `node -e "..."` bypass this entirely +const FILE_COMMANDS = new Set([ + "rm", "cp", "mv", "mkdir", "touch", "chmod", "chown", + "cat", "ls", "find", "grep", + "head", "tail", "less", "more", "wc", "diff", "file", "stat", "du", "df", +]); + +// Lazy-initialized parser +let parserPromise: Promise<Parsers> | null = null; + +interface Parsers { + bash: import("web-tree-sitter").Parser; +} + +async function getParser(): Promise<Parsers> { + if (parserPromise) return parserPromise; + parserPromise = initParser(); + return parserPromise; +} + +async function initParser(): Promise<Parsers> { + const { Parser, Language } = await import("web-tree-sitter"); + + // Load the main WASM binary from node_modules + const require = createRequire(import.meta.url); + const webTreeSitterPath = require.resolve("web-tree-sitter/web-tree-sitter.wasm"); + const wasmBinary = await readFile(webTreeSitterPath); + await Parser.init({ wasmBinary }); + + const bashWasmPath = require.resolve("tree-sitter-bash/tree-sitter-bash.wasm"); + const bashLang = await Language.load(bashWasmPath); + + const bash = new Parser(); + bash.setLanguage(bashLang); + + return { bash }; +} + +// Analyze a shell command and return permission patterns +export async function analyzeCommand( + command: string, + workingDirectory: string, +): Promise<{ dirs: string[]; patterns: string[]; always: string[] }> { + try { + const parsers = await getParser(); + const tree = parsers.bash.parse(command); + if (!tree) return { dirs: [], patterns: [command], always: [] }; + + return collect(tree.rootNode, command, workingDirectory); + } catch { + // Parse failure — return basic patterns + return { dirs: [], patterns: [command], always: [] }; + } +} + +function collect( + node: import("web-tree-sitter").Node, + _source: string, + wd: string, +): { dirs: string[]; patterns: string[]; always: string[] } { + const dirs: string[] = []; + const patterns: string[] = []; + const always: string[] = []; + + // Walk all command nodes + const commands = node.descendantsOfType("command"); + + for (const cmd of commands) { + const parts = extractParts(cmd); + const name = parts[0]?.toLowerCase(); + if (!name) continue; + + // Get the command source text + const cmdText = cmd.text; + patterns.push(cmdText); + + // Normalize to always pattern + always.push(`${BashArity.prefix(parts).join(" ")} *`); + + // Check if this is a file-touching command + if (FILE_COMMANDS.has(name)) { + // Extract path arguments (skip flags starting with -) + const pathArgs = parts.slice(1).filter((a) => !a.startsWith("-")); + for (const arg of pathArgs) { + const resolved = resolvePath(arg, wd); + if (resolved && !isInsideWorkspace(resolved, wd)) { + const parent = dirname(resolved); + dirs.push(parent); + } + } + } + } + + return { + dirs: [...new Set(dirs)], + patterns: [...new Set(patterns)], + always: [...new Set(always)], + }; +} + +// Helper to extract command parts from a command AST node +function extractParts(cmd: import("web-tree-sitter").Node): string[] { + const parts: string[] = []; + for (const child of cmd.children) { + if ( + child.type === "command_name" || + child.type === "word" || + child.type === "string" || + child.type === "raw_string" + ) { + const text = child.text.replace(/^['"]|['"]$/g, ""); + if (text) parts.push(text); + } + } + return parts; +} + +function resolvePath(arg: string, wd: string): string | null { + try { + if (isAbsolute(arg)) return arg; + return resolve(wd, arg); + } catch { + return null; + } +} + +function isInsideWorkspace(filePath: string, wd: string): boolean { + const normalizedWd = resolve(wd); + const rel = relative(normalizedWd, filePath); + // rel === "" means filePath IS the workspace root — that is inside. + // If relative path starts with "../" or is ".." exactly, or is an absolute path + // (on Windows when drives differ), the file is outside the workspace. + const isOutside = rel.startsWith(`..${sep}`) || rel.startsWith("../") || rel === ".." || isAbsolute(rel); + return !isOutside; +} diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index 28e79ae..6262d2b 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -1,4 +1,5 @@ import type { ZodType } from "zod"; +import type { PermissionChecker, Ruleset } from "../permission/index.js"; // Message types for the agent conversation export type MessageRole = "user" | "assistant" | "tool"; @@ -18,6 +19,7 @@ export interface ToolCall { export interface ToolResult { toolCallId: string; + toolName: string; result: string; isError: boolean; } @@ -32,15 +34,21 @@ export type AgentEvent = | { type: "reasoning-delta"; delta: string } | { type: "tool-call"; toolCall: ToolCall } | { type: "tool-result"; toolResult: ToolResult } + | { type: "shell-output"; data: string; stream: "stdout" | "stderr" } | { type: "error"; error: string } | { type: "done"; message: ChatMessage }; +// Context passed to tool execute functions +export interface ToolExecuteContext { + onOutput?: (data: string, stream: "stdout" | "stderr") => void; +} + // Tool definition interface export interface ToolDefinition { name: string; description: string; parameters: ZodType; - execute: (args: Record<string, unknown>) => Promise<string>; + execute: (args: Record<string, unknown>, context?: ToolExecuteContext) => Promise<string>; } // Agent configuration @@ -51,4 +59,6 @@ export interface AgentConfig { systemPrompt: string; tools: ToolDefinition[]; workingDirectory: string; + permissionChecker?: PermissionChecker; + ruleset?: Ruleset; } diff --git a/packages/core/tests/agent/agent.test.ts b/packages/core/tests/agent/agent.test.ts index 92df90e..5be210a 100644 --- a/packages/core/tests/agent/agent.test.ts +++ b/packages/core/tests/agent/agent.test.ts @@ -40,15 +40,9 @@ async function* makeFullStream( } } -interface MockStreamOptions { - events: Array<{ type: string; [key: string]: unknown }>; - steps?: Array<{ toolResults: Array<{ toolCallId: string; result: unknown }> }>; -} - -function makeMockStreamResult(opts: MockStreamOptions) { +function makeMockStreamResult(events: Array<{ type: string; [key: string]: unknown }>) { return { - fullStream: makeFullStream(opts.events), - steps: Promise.resolve(opts.steps ?? []), + fullStream: makeFullStream(events), } as ReturnType<typeof import("ai").streamText>; } @@ -66,18 +60,16 @@ describe("Agent", () => { it("yields running then idle status events around a simple message", async () => { const { streamText } = await import("ai"); vi.mocked(streamText).mockReturnValue( - makeMockStreamResult({ - events: [ - { type: "text-delta", textDelta: "Hello!" }, - { - type: "finish", - finishReason: "stop", - usage: {}, - providerMetadata: undefined, - response: {}, - }, - ], - }), + makeMockStreamResult([ + { type: "text-delta", textDelta: "Hello!" }, + { + type: "finish", + finishReason: "stop", + usage: {}, + providerMetadata: undefined, + response: {}, + }, + ]), ); const agent = new Agent(makeConfig()); @@ -97,19 +89,17 @@ describe("Agent", () => { it("yields text-delta events", async () => { const { streamText } = await import("ai"); vi.mocked(streamText).mockReturnValue( - makeMockStreamResult({ - events: [ - { type: "text-delta", textDelta: "Hello" }, - { type: "text-delta", textDelta: " world" }, - { - type: "finish", - finishReason: "stop", - usage: {}, - providerMetadata: undefined, - response: {}, - }, - ], - }), + makeMockStreamResult([ + { type: "text-delta", textDelta: "Hello" }, + { type: "text-delta", textDelta: " world" }, + { + type: "finish", + finishReason: "stop", + usage: {}, + providerMetadata: undefined, + response: {}, + }, + ]), ); const agent = new Agent(makeConfig()); @@ -127,18 +117,16 @@ describe("Agent", () => { it("adds user message and assistant message to history", async () => { const { streamText } = await import("ai"); vi.mocked(streamText).mockReturnValue( - makeMockStreamResult({ - events: [ - { type: "text-delta", textDelta: "Response" }, - { - type: "finish", - finishReason: "stop", - usage: {}, - providerMetadata: undefined, - response: {}, - }, - ], - }), + makeMockStreamResult([ + { type: "text-delta", textDelta: "Response" }, + { + type: "finish", + finishReason: "stop", + usage: {}, + providerMetadata: undefined, + response: {}, + }, + ]), ); const agent = new Agent(makeConfig()); @@ -160,18 +148,16 @@ describe("Agent", () => { it("yields done event with final message", async () => { const { streamText } = await import("ai"); vi.mocked(streamText).mockReturnValue( - makeMockStreamResult({ - events: [ - { type: "text-delta", textDelta: "Done!" }, - { - type: "finish", - finishReason: "stop", - usage: {}, - providerMetadata: undefined, - response: {}, - }, - ], - }), + makeMockStreamResult([ + { type: "text-delta", textDelta: "Done!" }, + { + type: "finish", + finishReason: "stop", + usage: {}, + providerMetadata: undefined, + response: {}, + }, + ]), ); const agent = new Agent(makeConfig()); @@ -190,31 +176,39 @@ describe("Agent", () => { it("yields tool-call and tool-result events", async () => { const { streamText } = await import("ai"); - vi.mocked(streamText).mockReturnValue( - makeMockStreamResult({ - events: [ + + // First call: LLM emits a tool-call + // Second call (after tool execution): LLM emits text response with no tool calls + vi.mocked(streamText) + .mockReturnValueOnce( + makeMockStreamResult([ { type: "tool-call", toolCallId: "tc1", toolName: "read_file", args: { path: "hello.txt" }, }, - { type: "text-delta", textDelta: "Here is the file." }, { type: "finish", - finishReason: "stop", + finishReason: "tool-calls", usage: {}, providerMetadata: undefined, response: {}, }, - ], - steps: [ + ]), + ) + .mockReturnValueOnce( + makeMockStreamResult([ + { type: "text-delta", textDelta: "Here is the file." }, { - toolResults: [{ toolCallId: "tc1", result: "file contents" }], + type: "finish", + finishReason: "stop", + usage: {}, + providerMetadata: undefined, + response: {}, }, - ], - }), - ); + ]), + ); const toolDef = { name: "read_file", diff --git a/packages/core/tests/config/loader.test.ts b/packages/core/tests/config/loader.test.ts new file mode 100644 index 0000000..9a358fe --- /dev/null +++ b/packages/core/tests/config/loader.test.ts @@ -0,0 +1,166 @@ +import { homedir } from "node:os"; +import { join, sep } from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { writeFileSync, mkdirSync, rmSync } from "node:fs"; +import { configToRuleset, loadConfig } from "../../src/config/loader.js"; + +const TMP = join("/tmp/opencode", "dispatch-config-test"); + +beforeEach(() => { + mkdirSync(TMP, { recursive: true }); +}); + +afterEach(() => { + rmSync(TMP, { recursive: true, force: true }); +}); + +function writeYaml(content: string): void { + writeFileSync(join(TMP, "dispatch.yaml"), content, "utf-8"); +} + +describe("loadConfig", () => { + it("returns empty permissions when dispatch.yaml is missing", () => { + const config = loadConfig(TMP); + expect(config.permissions).toEqual({}); + }); + + it("parses simple string permissions", () => { + writeYaml(`permissions:\n read: allow\n edit: deny\n`); + const config = loadConfig(TMP); + expect(config.permissions["read"]).toBe("allow"); + expect(config.permissions["edit"]).toBe("deny"); + }); + + it("parses nested pattern permissions", () => { + writeYaml(`permissions:\n bash:\n "npm test": allow\n "*": ask\n`); + const config = loadConfig(TMP); + const bash = config.permissions["bash"] as Record<string, string>; + expect(bash["npm test"]).toBe("allow"); + expect(bash["*"]).toBe("ask"); + }); + + it("ignores comment lines", () => { + writeYaml(`# this is a comment\npermissions:\n # another comment\n read: allow\n`); + const config = loadConfig(TMP); + expect(config.permissions["read"]).toBe("allow"); + }); + + it("handles ~ expansion in nested keys", () => { + const home = homedir(); + writeYaml(`permissions:\n read:\n "~/projects/*": allow\n`); + const config = loadConfig(TMP); + const read = config.permissions["read"] as Record<string, string>; + expect(read[`${home}/projects/*`]).toBe("allow"); + }); + + it("handles $HOME expansion in nested keys", () => { + const home = homedir(); + writeYaml(`permissions:\n read:\n "$HOME/docs/*": allow\n`); + const config = loadConfig(TMP); + const read = config.permissions["read"] as Record<string, string>; + expect(read[`${home}/docs/*`]).toBe("allow"); + }); + + it("parses quoted keys", () => { + writeYaml(`permissions:\n bash:\n "git commit *": allow\n "rm *": deny\n`); + const config = loadConfig(TMP); + const bash = config.permissions["bash"] as Record<string, string>; + expect(bash["git commit *"]).toBe("allow"); + expect(bash["rm *"]).toBe("deny"); + }); + + it("handles multiple permission groups", () => { + writeYaml( + `permissions:\n read: allow\n edit:\n "*": ask\n "src/**": allow\n bash:\n "npm test": allow\n "*": ask\n`, + ); + const config = loadConfig(TMP); + expect(config.permissions["read"]).toBe("allow"); + const edit = config.permissions["edit"] as Record<string, string>; + expect(edit["*"]).toBe("ask"); + expect(edit["src/**"]).toBe("allow"); + const bash = config.permissions["bash"] as Record<string, string>; + expect(bash["npm test"]).toBe("allow"); + expect(bash["*"]).toBe("ask"); + }); + + it("preserves # inside quoted string values", () => { + writeYaml(`permissions:\n bash:\n '"file#1"': allow\n`); + const config = loadConfig(TMP); + const bash = config.permissions["bash"] as Record<string, string>; + expect(bash['"file#1"']).toBe("allow"); + }); + + it("strips inline comments on nested map keys (empty value after comment)", () => { + writeYaml(`permissions:\n bash: # scripts\n "*": allow\n`); + const config = loadConfig(TMP); + const bash = config.permissions["bash"] as Record<string, string>; + expect(bash["*"]).toBe("allow"); + }); + + it("expands ~ with platform path separator", () => { + const home = homedir(); + // Simulate a path using the OS separator + const pattern = `~${sep}projects${sep}*`; + writeYaml(`permissions:\n read:\n "${pattern}": allow\n`); + const config = loadConfig(TMP); + const read = config.permissions["read"] as Record<string, string>; + expect(read[`${home}${sep}projects${sep}*`]).toBe("allow"); + }); +}); + +describe("configToRuleset — new validations", () => { + it("falls back to ask and warns for invalid action in string value", () => { + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + const rules = configToRuleset({ permissions: { read: "allw" } }); + expect(rules[0]?.action).toBe("ask"); + expect(warn).toHaveBeenCalledWith(expect.stringContaining("allw")); + warn.mockRestore(); + }); + + it("falls back to ask and warns for invalid action in nested pattern", () => { + const warn = vi.spyOn(console, "warn").mockImplementation(() => {}); + const rules = configToRuleset({ permissions: { bash: { "*": "INVALID" } } }); + expect(rules[0]?.action).toBe("ask"); + expect(warn).toHaveBeenCalledWith(expect.stringContaining("INVALID")); + warn.mockRestore(); + }); + + it("expands ~ with backslash separator in patterns", () => { + const home = homedir(); + // Force backslash path even on Linux to test the regex + const rules = configToRuleset({ permissions: { read: { "~\\foo\\*": "allow" } } }); + expect(rules[0]?.pattern).toBe(`${home}\\foo\\*`); + }); +}); + +describe("configToRuleset", () => { + it("produces a rule with pattern * for string value", () => { + const rules = configToRuleset({ permissions: { read: "allow" } }); + expect(rules).toEqual([{ permission: "read", pattern: "*", action: "allow" }]); + }); + + it("produces rules for each pattern in an object value", () => { + const rules = configToRuleset({ + permissions: { bash: { "npm test": "allow", "*": "ask" } }, + }); + expect(rules).toContainEqual({ permission: "bash", pattern: "npm test", action: "allow" }); + expect(rules).toContainEqual({ permission: "bash", pattern: "*", action: "ask" }); + }); + + it("expands ~ in patterns", () => { + const home = homedir(); + const rules = configToRuleset({ permissions: { read: { "~/foo/*": "allow" } } }); + expect(rules[0]?.pattern).toBe(`${home}/foo/*`); + }); + + it("expands $HOME in patterns", () => { + const home = homedir(); + const rules = configToRuleset({ permissions: { read: { "$HOME/bar/*": "deny" } } }); + expect(rules[0]?.pattern).toBe(`${home}/bar/*`); + }); + + it("handles empty permissions", () => { + const rules = configToRuleset({ permissions: {} }); + expect(rules).toEqual([]); + }); +}); diff --git a/packages/core/tests/permission/evaluate.test.ts b/packages/core/tests/permission/evaluate.test.ts new file mode 100644 index 0000000..c8f4541 --- /dev/null +++ b/packages/core/tests/permission/evaluate.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from "vitest"; +import { evaluate } from "../../src/permission/evaluate.js"; +import type { Ruleset } from "../../src/permission/index.js"; + +describe("evaluate", () => { + it("returns default ask when no rules match", () => { + const result = evaluate("bash", "ls -la"); + expect(result.action).toBe("ask"); + expect(result.permission).toBe("bash"); + expect(result.pattern).toBe("ls -la"); + }); + + it("returns allow when matching rule is allow", () => { + const rules: Ruleset = [{ permission: "bash", pattern: "ls *", action: "allow" }]; + const result = evaluate("bash", "ls -la", rules); + expect(result.action).toBe("allow"); + }); + + it("returns deny when matching rule is deny", () => { + const rules: Ruleset = [{ permission: "bash", pattern: "rm *", action: "deny" }]; + const result = evaluate("bash", "rm -rf /", rules); + expect(result.action).toBe("deny"); + }); + + it("last-match-wins: later deny overrides earlier allow", () => { + const rules: Ruleset = [ + { permission: "bash", pattern: "*", action: "allow" }, + { permission: "bash", pattern: "rm *", action: "deny" }, + ]; + const result = evaluate("bash", "rm -rf /", rules); + expect(result.action).toBe("deny"); + }); + + it("last-match-wins: later allow overrides earlier deny", () => { + const rules: Ruleset = [ + { permission: "bash", pattern: "rm *", action: "deny" }, + { permission: "bash", pattern: "*", action: "allow" }, + ]; + const result = evaluate("bash", "rm -rf /", rules); + expect(result.action).toBe("allow"); + }); + + it("matches permission wildcard", () => { + const rules: Ruleset = [{ permission: "*", pattern: "*", action: "allow" }]; + const result = evaluate("read", "anything", rules); + expect(result.action).toBe("allow"); + }); + + it("multiple rulesets are concatenated, last match wins across rulesets", () => { + const baseRules: Ruleset = [{ permission: "bash", pattern: "*", action: "ask" }]; + const overrideRules: Ruleset = [{ permission: "bash", pattern: "git *", action: "allow" }]; + const result = evaluate("bash", "git status", baseRules, overrideRules); + expect(result.action).toBe("allow"); + }); + + it("second ruleset can deny what first ruleset allows", () => { + const baseRules: Ruleset = [{ permission: "bash", pattern: "*", action: "allow" }]; + const overrideRules: Ruleset = [{ permission: "bash", pattern: "rm *", action: "deny" }]; + const result = evaluate("bash", "rm -rf /", baseRules, overrideRules); + expect(result.action).toBe("deny"); + }); + + it("non-matching permission returns default ask", () => { + const rules: Ruleset = [{ permission: "bash", pattern: "*", action: "allow" }]; + const result = evaluate("read", "/some/path", rules); + expect(result.action).toBe("ask"); + }); +}); diff --git a/packages/core/tests/permission/service.test.ts b/packages/core/tests/permission/service.test.ts new file mode 100644 index 0000000..d1b39d9 --- /dev/null +++ b/packages/core/tests/permission/service.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it } from "vitest"; +import type { PermissionRequest, Ruleset } from "../../src/permission/index.js"; +import { PermissionService } from "../../src/permission/service.js"; + +function makeRequest(overrides: Partial<PermissionRequest> = {}): PermissionRequest { + return { + permission: "bash", + patterns: ["git *"], + always: ["git status"], + description: "Run git status", + metadata: {}, + ...overrides, + }; +} + +describe("PermissionService", () => { + it("resolves immediately with 'once' when rule is allow", async () => { + const svc = new PermissionService(); + const rules: Ruleset = [{ permission: "bash", pattern: "*", action: "allow" }]; + const reply = await svc.ask(makeRequest(), [rules]); + expect(reply).toBe("once"); + }); + + it("rejects immediately when rule is deny", async () => { + const svc = new PermissionService(); + const rules: Ruleset = [{ permission: "bash", pattern: "*", action: "deny" }]; + await expect(svc.ask(makeRequest(), [rules])).rejects.toThrow("Permission denied"); + }); + + it("creates pending request when rule is ask", () => { + const svc = new PermissionService(); + svc.ask(makeRequest(), []); + expect(svc.getPending()).toHaveLength(1); + }); + + it("reply 'once' resolves the specific pending request", async () => { + const svc = new PermissionService(); + const promise = svc.ask(makeRequest(), []); + const pending = svc.getPending(); + expect(pending).toHaveLength(1); + svc.reply(pending[0].id, "once"); + const result = await promise; + expect(result).toBe("once"); + expect(svc.getPending()).toHaveLength(0); + }); + + it("reply 'always' adds approved rules and resolves", async () => { + const svc = new PermissionService(); + const promise = svc.ask(makeRequest({ patterns: ["git *"] }), []); + const pending = svc.getPending(); + svc.reply(pending[0].id, "always"); + const result = await promise; + expect(result).toBe("always"); + + // Now the same permission should be immediately allowed + const reply2 = await svc.ask(makeRequest({ always: ["git commit"] }), []); + expect(reply2).toBe("once"); + }); + + it("reply 'reject' rejects all pending requests (cascade)", async () => { + const svc = new PermissionService(); + const p1 = svc.ask(makeRequest(), []); + const p2 = svc.ask(makeRequest({ permission: "read" }), []); + + const pending = svc.getPending(); + expect(pending).toHaveLength(2); + + // Reject using the first id — should cascade to all + svc.reply(pending[0].id, "reject"); + + await expect(p1).rejects.toThrow("Permission rejected"); + await expect(p2).rejects.toThrow("Permission rejected"); + expect(svc.getPending()).toHaveLength(0); + }); + + it("approved rules override config rulesets", async () => { + const svc = new PermissionService(); + svc.approve([{ permission: "bash", pattern: "git *", action: "allow" }]); + + // Config says deny, but approved says allow — approved wins (last) + const configRules: Ruleset = [{ permission: "bash", pattern: "*", action: "deny" }]; + const reply = await svc.ask(makeRequest({ always: ["git status"] }), [configRules]); + expect(reply).toBe("once"); + }); + + it("getPending returns all pending requests with id and request", () => { + const svc = new PermissionService(); + const req = makeRequest(); + svc.ask(req, []); + const pending = svc.getPending(); + expect(pending).toHaveLength(1); + expect(pending[0].id).toBeDefined(); + expect(pending[0].request.permission).toBe("bash"); + }); +}); diff --git a/packages/core/tests/permission/wildcard.test.ts b/packages/core/tests/permission/wildcard.test.ts new file mode 100644 index 0000000..8fa30a7 --- /dev/null +++ b/packages/core/tests/permission/wildcard.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from "vitest"; +import { Wildcard } from "../../src/permission/wildcard.js"; + +describe("Wildcard.match", () => { + it("matches exact string", () => { + expect(Wildcard.match("bash", "bash")).toBe(true); + expect(Wildcard.match("bash", "read")).toBe(false); + }); + + it("matches * wildcard (any characters)", () => { + expect(Wildcard.match("*", "bash")).toBe(true); + expect(Wildcard.match("*", "anything")).toBe(true); + expect(Wildcard.match("ba*", "bash")).toBe(true); + expect(Wildcard.match("ba*", "ba")).toBe(true); + expect(Wildcard.match("ba*", "read")).toBe(false); + }); + + it("matches ? wildcard (single character)", () => { + expect(Wildcard.match("ba?h", "bash")).toBe(true); + expect(Wildcard.match("ba?h", "bath")).toBe(true); + expect(Wildcard.match("ba?h", "baXXh")).toBe(false); + expect(Wildcard.match("?", "a")).toBe(true); + expect(Wildcard.match("?", "ab")).toBe(false); + }); + + it("matches nested * patterns with path-like strings", () => { + expect(Wildcard.match("/home/*", "/home/user")).toBe(true); + expect(Wildcard.match("/home/*/file.txt", "/home/user/file.txt")).toBe(true); + expect(Wildcard.match("/home/*/file.txt", "/home/user/subdir/file.txt")).toBe(true); + expect(Wildcard.match("/home/*/file.txt", "/tmp/user/file.txt")).toBe(false); + }); + + it("escapes regex special characters in pattern", () => { + expect(Wildcard.match("git add .", "git add .")).toBe(true); + expect(Wildcard.match("git add .", "git add X")).toBe(false); + expect(Wildcard.match("foo(bar)", "foo(bar)")).toBe(true); + expect(Wildcard.match("foo(bar)", "fooXbar")).toBe(false); + }); + + it("is case-sensitive", () => { + expect(Wildcard.match("Bash", "bash")).toBe(false); + expect(Wildcard.match("BASH", "BASH")).toBe(true); + }); + + it("handles empty pattern and value", () => { + expect(Wildcard.match("", "")).toBe(true); + expect(Wildcard.match("", "x")).toBe(false); + expect(Wildcard.match("*", "")).toBe(true); + }); +}); diff --git a/packages/core/tests/tools/bash-arity.test.ts b/packages/core/tests/tools/bash-arity.test.ts new file mode 100644 index 0000000..a01a6a5 --- /dev/null +++ b/packages/core/tests/tools/bash-arity.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from "vitest"; +import { prefix } from "../../src/tools/bash-arity.js"; + +describe("BashArity.prefix", () => { + it("returns arity-2 prefix for known command 'git'", () => { + expect(prefix(["git", "checkout", "main"])).toEqual(["git", "checkout"]); + }); + + it("returns arity-3 prefix for npm", () => { + expect(prefix(["npm", "run", "dev"])).toEqual(["npm", "run", "dev"]); + }); + + it("returns arity-2 prefix for bun", () => { + expect(prefix(["bun", "install", "--frozen-lockfile"])).toEqual(["bun", "install"]); + }); + + it("returns just the command for unknown command", () => { + expect(prefix(["unknowncmd", "arg1", "arg2"])).toEqual(["unknowncmd"]); + }); + + it("returns empty array for empty tokens", () => { + expect(prefix([])).toEqual([]); + }); + + it("handles single token for unknown command", () => { + expect(prefix(["ls"])).toEqual(["ls"]); + }); + + it("handles git with fewer tokens than arity", () => { + expect(prefix(["git"])).toEqual(["git"]); + }); + + it("handles case-insensitive matching", () => { + expect(prefix(["GIT", "checkout", "main"])).toEqual(["GIT", "checkout"]); + }); +}); diff --git a/packages/core/tests/tools/run-shell.test.ts b/packages/core/tests/tools/run-shell.test.ts new file mode 100644 index 0000000..cb66d1c --- /dev/null +++ b/packages/core/tests/tools/run-shell.test.ts @@ -0,0 +1,78 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { createRunShellTool } from "../../src/tools/run-shell.js"; + +describe("run_shell tool", () => { + let workDir: string; + + beforeEach(async () => { + workDir = await mkdtemp(join(tmpdir(), "dispatch-test-")); + }); + + afterEach(async () => { + await rm(workDir, { recursive: true, force: true }); + }); + + it("executes a simple echo command", async () => { + const tool = createRunShellTool(workDir); + const raw = await tool.execute({ command: "echo hello" }); + const result = JSON.parse(raw); + expect(result.stdout.trim()).toBe("hello"); + expect(result.exitCode).toBe(0); + }); + + it("returns non-zero exit code on failure", async () => { + const tool = createRunShellTool(workDir); + const raw = await tool.execute({ command: "exit 42" }); + const result = JSON.parse(raw); + expect(result.exitCode).toBe(42); + }); + + it("captures stderr", async () => { + const tool = createRunShellTool(workDir); + const raw = await tool.execute({ command: "echo errormsg >&2" }); + const result = JSON.parse(raw); + expect(result.stderr.trim()).toBe("errormsg"); + }); + + it("handles timeout", async () => { + const tool = createRunShellTool(workDir); + const raw = await tool.execute({ command: "sleep 10", timeout: 100 }); + const result = JSON.parse(raw); + // Either times out (non-zero exit) or returns an error + expect(result.exitCode !== 0 || result.error !== undefined).toBe(true); + }, 5000); + + it("executes in the working directory", async () => { + const tool = createRunShellTool(workDir); + const raw = await tool.execute({ command: "pwd" }); + const result = JSON.parse(raw); + // On macOS /tmp is symlinked; use includes check + expect(result.stdout.trim()).toContain(workDir.replace(/^\/private/, "")); + }); + + it("calls onOutput callback with stdout chunks", async () => { + const tool = createRunShellTool(workDir); + const onOutput = vi.fn(); + const raw = await tool.execute({ command: "echo streaming" }, { onOutput }); + const result = JSON.parse(raw); + expect(result.stdout.trim()).toBe("streaming"); + expect(onOutput).toHaveBeenCalledWith(expect.stringContaining("streaming"), "stdout"); + }); + + it("calls onOutput callback with stderr chunks", async () => { + const tool = createRunShellTool(workDir); + const onOutput = vi.fn(); + await tool.execute({ command: "echo errdata >&2" }, { onOutput }); + expect(onOutput).toHaveBeenCalledWith(expect.stringContaining("errdata"), "stderr"); + }); + + it("works without context (backward compatible)", async () => { + const tool = createRunShellTool(workDir); + const raw = await tool.execute({ command: "echo nocontext" }); + const result = JSON.parse(raw); + expect(result.stdout.trim()).toBe("nocontext"); + }); +}); diff --git a/packages/frontend/src/App.svelte b/packages/frontend/src/App.svelte index 038fb09..c8e3803 100644 --- a/packages/frontend/src/App.svelte +++ b/packages/frontend/src/App.svelte @@ -3,6 +3,9 @@ import { onMount } from "svelte"; import ChatInput from "./lib/components/ChatInput.svelte"; import ChatPanel from "./lib/components/ChatPanel.svelte"; import Header from "./lib/components/Header.svelte"; +import PermissionPrompt from "./lib/components/PermissionPrompt.svelte"; +import PermissionLog from "./lib/components/PermissionLog.svelte"; +import { chatStore } from "./lib/chat.svelte.js"; import { wsClient } from "./lib/ws.svelte.js"; const STORAGE_KEY = "dispatch-theme"; @@ -30,3 +33,12 @@ onMount(() => { </div> <ChatInput /> </div> + +<PermissionPrompt + pending={chatStore.pendingPermissions} + onReply={(id, reply) => chatStore.replyPermission(id, reply)} +/> + +<div class="fixed bottom-24 right-4 w-80 z-10"> + <PermissionLog entries={chatStore.permissionLog} /> +</div> diff --git a/packages/frontend/src/lib/chat.svelte.ts b/packages/frontend/src/lib/chat.svelte.ts index 78fb2db..c0f0a98 100644 --- a/packages/frontend/src/lib/chat.svelte.ts +++ b/packages/frontend/src/lib/chat.svelte.ts @@ -1,5 +1,5 @@ import { config } from "./config.js"; -import type { AgentEvent, ChatMessage, ContentSegment, DebugInfo } from "./types.js"; +import type { AgentEvent, ChatMessage, ContentSegment, DebugInfo, LogEntry, PermissionPrompt } from "./types.js"; import { wsClient } from "./ws.svelte.js"; function generateId() { @@ -69,6 +69,8 @@ function createChatStore() { let agentStatus: "idle" | "running" | "error" = $state("idle"); let isConnected = $state(false); let currentAssistantId: string | null = null; + let pendingPermissions: PermissionPrompt[] = $state([]); + let permissionLog: LogEntry[] = $state([]); wsClient.onEvent((event) => { handleEvent(event); @@ -112,16 +114,16 @@ function createChatStore() { } break; } - case "reasoning-delta": { - ensureCurrentAssistantMessage(); - messages = messages.map((m) => { - if (m.id === currentAssistantId) { - return { ...m, thinking: (m.thinking ?? "") + event.delta }; - } - return m; - }); - break; - } + case "reasoning-delta": { + ensureCurrentAssistantMessage(); + messages = messages.map((m) => { + if (m.id === currentAssistantId) { + return { ...m, thinking: (m.thinking ?? "") + event.delta }; + } + return m; + }); + break; + } case "text-delta": { ensureCurrentAssistantMessage(); messages = messages.map((m) => { @@ -203,6 +205,37 @@ function createChatStore() { agentStatus = "error"; break; } + case "permission-prompt": { + pendingPermissions = event.pending; + break; + } + case "shell-output": { + messages = messages.map((m) => { + if (m.id === currentAssistantId) { + // Find the last tool-call segment + const segments = [...m.content]; + let found = false; + for (let i = segments.length - 1; i >= 0; i--) { + const seg = segments[i]; + if (seg && seg.type === "tool-call") { + segments[i] = { + ...seg, + shellOutput: { + stdout: (seg.shellOutput?.stdout ?? "") + (event.stream === "stdout" ? event.data : ""), + stderr: (seg.shellOutput?.stderr ?? "") + (event.stream === "stderr" ? event.data : ""), + }, + }; + found = true; + break; + } + } + if (!found) return m; // no tool-call segment yet + return { ...m, content: segments }; + } + return m; + }); + break; + } } } @@ -258,6 +291,27 @@ function createChatStore() { return formatConversation(messages); } + function replyPermission(id: string, reply: "once" | "always" | "reject") { + if (wsClient.connectionStatus !== "connected") { + // WebSocket is not connected; skip optimistic removal to avoid losing the prompt + return; + } + const prompt = pendingPermissions.find((p) => p.id === id); + wsClient.send({ type: "permission-reply", id, reply }); + pendingPermissions = pendingPermissions.filter((p) => p.id !== id); + if (prompt) { + const entry: LogEntry = { + id: generateId(), + permission: prompt.permission, + patterns: prompt.patterns, + action: reply, + timestamp: new Date().toISOString(), + description: prompt.description, + }; + permissionLog = [...permissionLog, entry]; + } + } + function clear() { messages = []; currentAssistantId = null; @@ -274,8 +328,15 @@ function createChatStore() { get isConnected() { return isConnected; }, + get pendingPermissions() { + return pendingPermissions; + }, + get permissionLog() { + return permissionLog; + }, sendMessage, handleEvent, + replyPermission, copyConversation, clear, }; diff --git a/packages/frontend/src/lib/components/PermissionLog.svelte b/packages/frontend/src/lib/components/PermissionLog.svelte new file mode 100644 index 0000000..f6e07f6 --- /dev/null +++ b/packages/frontend/src/lib/components/PermissionLog.svelte @@ -0,0 +1,28 @@ +<script lang="ts"> +import type { LogEntry } from "../types.js"; + +const { entries }: { entries: LogEntry[] } = $props(); +</script> + +<div class="collapse collapse-arrow bg-base-200 mt-4"> + <input type="checkbox" /> + <div class="collapse-title text-sm font-medium"> + Permission Log ({entries.length}) + </div> + <div class="collapse-content text-xs max-h-40 overflow-y-auto"> + {#if entries.length === 0} + <p class="text-base-content/50 italic">No permissions granted or denied yet.</p> + {:else} + {#each entries as entry (entry.id)} + <div class="flex items-center gap-2 py-1 border-b border-base-300"> + <span class="badge badge-sm {entry.action === 'reject' ? 'badge-error' : 'badge-success'}"> + {entry.action} + </span> + <span class="text-base-content/70">{entry.permission}</span> + <span class="text-base-content/50 ml-auto text-xs">{entry.timestamp}</span> + </div> + <p class="text-base-content/60 pl-2 pb-1">{entry.description}</p> + {/each} + {/if} + </div> +</div> diff --git a/packages/frontend/src/lib/components/PermissionPrompt.svelte b/packages/frontend/src/lib/components/PermissionPrompt.svelte new file mode 100644 index 0000000..ce7afd7 --- /dev/null +++ b/packages/frontend/src/lib/components/PermissionPrompt.svelte @@ -0,0 +1,97 @@ +<script lang="ts"> +import type { PermissionPrompt } from "../types.js"; + +const { pending, onReply }: { + pending: PermissionPrompt[]; + onReply: (id: string, reply: "once" | "always" | "reject") => void; +} = $props(); + +let current = $derived(pending[0]); + +let showAlwaysConfirmation = $state(false); + +let dialogEl: HTMLDialogElement | undefined = $state(); + +$effect(() => { + if (!dialogEl) return; + if (current) { + if (!dialogEl.open) dialogEl.showModal(); + } else { + if (dialogEl.open) dialogEl.close(); + } +}); + +function handleAlways() { + showAlwaysConfirmation = true; +} + +function confirmAlways() { + if (current) onReply(current.id, "always"); + showAlwaysConfirmation = false; +} + +function handleOnce() { + if (current) onReply(current.id, "once"); +} + +function handleReject() { + showAlwaysConfirmation = false; + if (current) onReply(current.id, "reject"); +} +</script> + +<dialog class="modal" bind:this={dialogEl} oncancel={handleReject}> + {#if current} + {#if !showAlwaysConfirmation} + <div class="modal-box"> + <h3 class="text-lg font-bold"> + {#if current.permission === "bash"} + Run command + {:else if current.permission === "external_directory"} + Access external directory + {:else if current.permission === "read"} + Read file + {:else if current.permission === "edit"} + Edit file + {:else} + Permission required + {/if} + </h3> + + <p class="py-2 text-sm opacity-70">{current.description}</p> + + {#if current.permission === "bash" && current.metadata.command} + <div class="mockup-code my-2"> + <pre><code>$ {current.metadata.command as string}</code></pre> + </div> + {/if} + + {#if current.metadata.filepath} + <p class="text-sm font-mono">{current.metadata.filepath as string}</p> + {/if} + + <div class="modal-action gap-2"> + <button class="btn btn-sm btn-ghost" aria-label="Deny permission" onclick={handleReject}>Deny</button> + <button class="btn btn-sm" aria-label="Allow once" onclick={handleOnce}>Allow once</button> + <button class="btn btn-sm btn-primary" aria-label="Always allow" onclick={handleAlways}>Always allow</button> + </div> + </div> + {:else} + <div class="modal-box"> + <h3 class="text-lg font-bold">Always allow?</h3> + <p class="py-2 text-sm"> + The following patterns will be permanently allowed until you restart Dispatch: + </p> + <div class="mockup-code my-2"> + {#each current.always as pattern} + <pre><code>{pattern}</code></pre> + {/each} + </div> + <div class="modal-action gap-2"> + <button class="btn btn-sm btn-ghost" aria-label="Go back" onclick={() => showAlwaysConfirmation = false}>Back</button> + <button class="btn btn-sm btn-primary" aria-label="Confirm always allow" onclick={confirmAlways}>Confirm</button> + </div> + </div> + {/if} + {/if} +</dialog> diff --git a/packages/frontend/src/lib/components/ToolCallDisplay.svelte b/packages/frontend/src/lib/components/ToolCallDisplay.svelte index 83d21ba..070a8e3 100644 --- a/packages/frontend/src/lib/components/ToolCallDisplay.svelte +++ b/packages/frontend/src/lib/components/ToolCallDisplay.svelte @@ -8,6 +8,39 @@ let isExpanded = $state(toolCall.isExpanded); function toggle() { isExpanded = !isExpanded; } + +interface ShellResult { + stdout: string; + stderr: string; + exitCode: number; +} + +function parseShellResult(result: string): ShellResult | null { + try { + const parsed = JSON.parse(result) as unknown; + if ( + parsed !== null && + typeof parsed === "object" && + "stdout" in parsed && + "stderr" in parsed && + "exitCode" in parsed + ) { + return { + stdout: String((parsed as Record<string, unknown>).stdout ?? ""), + stderr: String((parsed as Record<string, unknown>).stderr ?? ""), + exitCode: Number((parsed as Record<string, unknown>).exitCode ?? 0), + }; + } + return null; + } catch { + return null; + } +} + +const isShell = $derived(toolCall.name === "run_shell"); +const shellResult = $derived( + isShell && toolCall.result !== undefined ? parseShellResult(toolCall.result) : null, +); </script> <div class="collapse collapse-arrow bg-base-200 my-1 rounded-lg border border-base-300 {isExpanded ? 'collapse-open' : ''}"> @@ -20,7 +53,11 @@ function toggle() { <span class="badge badge-neutral badge-sm">tool</span> <span class="font-mono">{toolCall.name}</span> {#if toolCall.result !== undefined} - {#if toolCall.isError} + {#if isShell && shellResult !== null} + <span class="badge badge-sm ml-auto {shellResult.exitCode === 0 ? 'badge-success' : 'badge-error'}"> + exit {shellResult.exitCode} + </span> + {:else if toolCall.isError} <span class="badge badge-error badge-sm ml-auto">error</span> {:else} <span class="badge badge-success badge-sm ml-auto">done</span> @@ -36,15 +73,51 @@ function toggle() { <p class="font-semibold text-base-content/70 mb-1">Arguments</p> <pre class="bg-base-300 rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all">{JSON.stringify(toolCall.arguments, null, 2)}</pre> </div> - {#if toolCall.result !== undefined} - <div class="mt-2"> - <p class="font-semibold text-base-content/70 mb-1">Result</p> - <pre - class="rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all {toolCall.isError - ? 'bg-error/20 text-error' - : 'bg-base-300'}">{toolCall.result}</pre> - </div> - {/if} + {#if isShell && toolCall.result !== undefined} + {#if shellResult !== null} + <div class="mt-2"> + <p class="font-semibold text-base-content/70 mb-1">stdout:</p> + <pre class="bg-base-300 rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all font-mono">{shellResult.stdout || "(empty)"}</pre> + </div> + {#if shellResult.stderr} + <div class="mt-2"> + <p class="font-semibold text-error/80 mb-1">stderr:</p> + <pre class="bg-error/10 text-error rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all font-mono">{shellResult.stderr}</pre> + </div> + {/if} + <div class="mt-2 flex items-center gap-2"> + <span class="font-semibold text-base-content/70">exit code:</span> + <span class="badge badge-sm {shellResult.exitCode === 0 ? 'badge-success' : 'badge-error'}">{shellResult.exitCode}</span> + </div> + {:else} + <div class="mt-2"> + <p class="font-semibold text-base-content/70 mb-1">Result</p> + <pre class="rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all {toolCall.isError ? 'bg-error/20 text-error' : 'bg-base-300'}">{toolCall.result}</pre> + </div> + {/if} + {:else if isShell && toolCall.shellOutput} + {#if toolCall.shellOutput.stdout} + <div class="mt-2"> + <p class="font-semibold text-base-content/70 mb-1">stdout</p> + <pre class="bg-base-300 rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all text-xs">{toolCall.shellOutput.stdout}</pre> + </div> + {/if} + {#if toolCall.shellOutput.stderr} + <div class="mt-2"> + <p class="font-semibold text-error/70 mb-1">stderr</p> + <pre class="bg-error/10 rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all text-xs text-error">{toolCall.shellOutput.stderr}</pre> + </div> + {/if} + <span class="text-xs text-base-content/50 italic">Running...</span> + {:else if toolCall.result !== undefined} + <div class="mt-2"> + <p class="font-semibold text-base-content/70 mb-1">Result</p> + <pre + class="rounded p-2 overflow-auto max-h-40 whitespace-pre-wrap break-all {toolCall.isError + ? 'bg-error/20 text-error' + : 'bg-base-300'}">{toolCall.result}</pre> + </div> + {/if} </div> {/if} </div> diff --git a/packages/frontend/src/lib/types.ts b/packages/frontend/src/lib/types.ts index 93bc477..3626bbe 100644 --- a/packages/frontend/src/lib/types.ts +++ b/packages/frontend/src/lib/types.ts @@ -5,6 +5,7 @@ export interface ToolCallDisplay { result?: string; isError?: boolean; isExpanded: boolean; + shellOutput?: { stdout: string; stderr: string }; } export interface DebugInfo { @@ -59,4 +60,24 @@ export type AgentEvent = toolCalls?: unknown[]; toolResults?: unknown[]; }; - }; + } + | { type: "permission-prompt"; pending: PermissionPrompt[] } + | { type: "shell-output"; data: string; stream: "stdout" | "stderr" }; + +export interface PermissionPrompt { + id: string; + permission: string; + patterns: string[]; + always: string[]; + description: string; + metadata: Record<string, unknown>; +} + +export interface LogEntry { + id: string; + permission: string; + patterns: string[]; + action: "once" | "always" | "reject"; + timestamp: string; + description: string; +} diff --git a/packages/frontend/src/lib/ws.svelte.ts b/packages/frontend/src/lib/ws.svelte.ts index 76c7ef5..2ca97be 100644 --- a/packages/frontend/src/lib/ws.svelte.ts +++ b/packages/frontend/src/lib/ws.svelte.ts @@ -76,6 +76,12 @@ function createWebSocketClient(url: string) { }; } + function send(data: unknown): void { + if (ws && ws.readyState === WebSocket.OPEN) { + ws.send(JSON.stringify(data)); + } + } + return { get connectionStatus() { return connectionStatus; @@ -83,6 +89,7 @@ function createWebSocketClient(url: string) { connect, disconnect, onEvent, + send, }; } diff --git a/packages/frontend/tests/chat-store.test.ts b/packages/frontend/tests/chat-store.test.ts index 11b7deb..db1132c 100644 --- a/packages/frontend/tests/chat-store.test.ts +++ b/packages/frontend/tests/chat-store.test.ts @@ -1,5 +1,5 @@ -import { beforeEach, describe, expect, it } from "vitest"; -import type { AgentEvent, ContentSegment } from "../src/lib/types.js"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import type { AgentEvent, ContentSegment, LogEntry, PermissionPrompt } from "../src/lib/types.js"; // We test the logic inline since runes require svelte compilation context. // The chat store logic is tested via a plain reimplementation of the same logic. @@ -17,10 +17,12 @@ interface ChatMessage { } // Plain JS version of the chat store logic (no runes) for unit testing -function createTestStore() { +function createTestStore(wsSend?: (data: unknown) => void) { let messages: ChatMessage[] = []; let agentStatus: "idle" | "running" | "error" = "idle"; let currentAssistantId: string | null = null; + let pendingPermissions: PermissionPrompt[] = []; + let permissionLog: LogEntry[] = []; function getCurrentAssistantMessage(): ChatMessage | null { if (!currentAssistantId) return null; @@ -142,6 +144,34 @@ function createTestStore() { agentStatus = "error"; break; } + case "permission-prompt": { + pendingPermissions = event.pending; + break; + } + case "shell-output": { + messages = messages.map((m) => { + if (m.id === currentAssistantId) { + return { + ...m, + content: m.content.map((seg, i) => { + if (seg.type === "tool-call" && i === m.content.length - 1) { + const prev = seg.shellOutput ?? { stdout: "", stderr: "" }; + return { + ...seg, + shellOutput: + event.stream === "stdout" + ? { ...prev, stdout: prev.stdout + event.data } + : { ...prev, stderr: prev.stderr + event.data }, + }; + } + return seg; + }), + }; + } + return m; + }); + break; + } } } @@ -155,6 +185,23 @@ function createTestStore() { currentAssistantId = null; } + function replyPermission(id: string, reply: "once" | "always" | "reject") { + const prompt = pendingPermissions.find((p) => p.id === id); + if (wsSend) wsSend({ type: "permission-reply", id, reply }); + pendingPermissions = pendingPermissions.filter((p) => p.id !== id); + if (prompt) { + const entry: LogEntry = { + id: generateId(), + permission: prompt.permission, + patterns: prompt.patterns, + action: reply, + timestamp: new Date().toISOString(), + description: prompt.description, + }; + permissionLog = [...permissionLog, entry]; + } + } + function clear() { messages = []; currentAssistantId = null; @@ -168,8 +215,15 @@ function createTestStore() { get agentStatus() { return agentStatus; }, + get pendingPermissions() { + return pendingPermissions; + }, + get permissionLog() { + return permissionLog; + }, handleEvent, sendMessage, + replyPermission, clear, }; } @@ -314,3 +368,251 @@ describe("chat store logic", () => { expect(store.messages[0]?.thinking).toBe("First thought. Second thought."); }); }); + +describe("permission-prompt handling", () => { + let store: ReturnType<typeof createTestStore>; + + beforeEach(() => { + store = createTestStore(); + }); + + it("permission-prompt sets pendingPermissions", () => { + const prompt: PermissionPrompt = { + id: "p1", + permission: "bash", + patterns: ["*"], + always: ["*"], + description: "Run a command", + metadata: { command: "ls" }, + }; + store.handleEvent({ type: "permission-prompt", pending: [prompt] }); + expect(store.pendingPermissions).toHaveLength(1); + expect(store.pendingPermissions[0]?.id).toBe("p1"); + }); + + it("permission-prompt replaces previous pending permissions", () => { + const p1: PermissionPrompt = { + id: "p1", + permission: "bash", + patterns: [], + always: [], + description: "First", + metadata: {}, + }; + const p2: PermissionPrompt = { + id: "p2", + permission: "read", + patterns: [], + always: [], + description: "Second", + metadata: {}, + }; + store.handleEvent({ type: "permission-prompt", pending: [p1] }); + store.handleEvent({ type: "permission-prompt", pending: [p2] }); + expect(store.pendingPermissions).toHaveLength(1); + expect(store.pendingPermissions[0]?.id).toBe("p2"); + }); + + it("replyPermission removes the permission from pending and calls wsSend", () => { + const mockSend = vi.fn(); + const storeWithSend = createTestStore(mockSend); + const prompt: PermissionPrompt = { + id: "p1", + permission: "bash", + patterns: [], + always: [], + description: "Run command", + metadata: { command: "echo hi" }, + }; + storeWithSend.handleEvent({ type: "permission-prompt", pending: [prompt] }); + storeWithSend.replyPermission("p1", "once"); + expect(storeWithSend.pendingPermissions).toHaveLength(0); + expect(mockSend).toHaveBeenCalledWith({ type: "permission-reply", id: "p1", reply: "once" }); + }); + + it("replyPermission with 'always' sends correct payload", () => { + const mockSend = vi.fn(); + const storeWithSend = createTestStore(mockSend); + const prompt: PermissionPrompt = { + id: "p2", + permission: "read", + patterns: ["src/**"], + always: ["src/**"], + description: "Read a file", + metadata: { filepath: "src/foo.ts" }, + }; + storeWithSend.handleEvent({ type: "permission-prompt", pending: [prompt] }); + storeWithSend.replyPermission("p2", "always"); + expect(mockSend).toHaveBeenCalledWith({ type: "permission-reply", id: "p2", reply: "always" }); + expect(storeWithSend.pendingPermissions).toHaveLength(0); + }); + + it("replyPermission with 'reject' removes the permission", () => { + const mockSend = vi.fn(); + const storeWithSend = createTestStore(mockSend); + const prompt: PermissionPrompt = { + id: "p3", + permission: "edit", + patterns: [], + always: [], + description: "Edit a file", + metadata: {}, + }; + storeWithSend.handleEvent({ type: "permission-prompt", pending: [prompt] }); + storeWithSend.replyPermission("p3", "reject"); + expect(storeWithSend.pendingPermissions).toHaveLength(0); + expect(mockSend).toHaveBeenCalledWith({ type: "permission-reply", id: "p3", reply: "reject" }); + }); +}); + +describe("permission log", () => { + let store: ReturnType<typeof createTestStore>; + + beforeEach(() => { + store = createTestStore(vi.fn()); + }); + + it("starts with empty permission log", () => { + expect(store.permissionLog).toHaveLength(0); + }); + + it("replyPermission adds an entry to permissionLog", () => { + const mockSend = vi.fn(); + const s = createTestStore(mockSend); + const prompt: PermissionPrompt = { + id: "p1", + permission: "bash", + patterns: ["*"], + always: ["*"], + description: "Run a command", + metadata: {}, + }; + s.handleEvent({ type: "permission-prompt", pending: [prompt] }); + s.replyPermission("p1", "once"); + expect(s.permissionLog).toHaveLength(1); + expect(s.permissionLog[0]?.permission).toBe("bash"); + expect(s.permissionLog[0]?.action).toBe("once"); + expect(s.permissionLog[0]?.description).toBe("Run a command"); + }); + + it("permissionLog accumulates multiple entries", () => { + const mockSend = vi.fn(); + const s = createTestStore(mockSend); + const p1: PermissionPrompt = { + id: "p1", + permission: "bash", + patterns: [], + always: [], + description: "First", + metadata: {}, + }; + const p2: PermissionPrompt = { + id: "p2", + permission: "read", + patterns: [], + always: [], + description: "Second", + metadata: {}, + }; + s.handleEvent({ type: "permission-prompt", pending: [p1, p2] }); + s.replyPermission("p1", "always"); + s.replyPermission("p2", "reject"); + expect(s.permissionLog).toHaveLength(2); + expect(s.permissionLog[0]?.action).toBe("always"); + expect(s.permissionLog[1]?.action).toBe("reject"); + }); + + it("replyPermission for unknown id does not add to log", () => { + const s = createTestStore(vi.fn()); + s.replyPermission("nonexistent", "once"); + expect(s.permissionLog).toHaveLength(0); + }); +}); + +// Shell output parsing logic (mirrors ToolCallDisplay logic) +function parseShellResult(result: string): { stdout: string; stderr: string; exitCode: number } | null { + try { + const parsed = JSON.parse(result) as unknown; + if ( + parsed !== null && + typeof parsed === "object" && + "stdout" in parsed && + "stderr" in parsed && + "exitCode" in parsed + ) { + const p = parsed as Record<string, unknown>; + return { + stdout: String(p.stdout ?? ""), + stderr: String(p.stderr ?? ""), + exitCode: Number(p.exitCode ?? 0), + }; + } + return null; + } catch { + return null; + } +} + +describe("shell output parsing", () => { + it("parses a valid shell result JSON", () => { + const result = JSON.stringify({ stdout: "hello\n", stderr: "", exitCode: 0 }); + const parsed = parseShellResult(result); + expect(parsed).not.toBeNull(); + expect(parsed?.stdout).toBe("hello\n"); + expect(parsed?.stderr).toBe(""); + expect(parsed?.exitCode).toBe(0); + }); + + it("parses non-zero exit code and stderr", () => { + const result = JSON.stringify({ stdout: "", stderr: "error: not found", exitCode: 1 }); + const parsed = parseShellResult(result); + expect(parsed?.exitCode).toBe(1); + expect(parsed?.stderr).toBe("error: not found"); + }); + + it("returns null for invalid JSON", () => { + expect(parseShellResult("not json")).toBeNull(); + }); + + it("returns null for JSON that lacks required fields", () => { + expect(parseShellResult(JSON.stringify({ stdout: "foo" }))).toBeNull(); + }); + + it("returns null for non-object JSON", () => { + expect(parseShellResult(JSON.stringify(42))).toBeNull(); + }); +}); + +describe("shell-output event handling", () => { + it("shell-output stdout appends to last tool-call shellOutput", () => { + const s = createTestStore(); + s.handleEvent({ + type: "tool-call", + toolCall: { id: "tc1", name: "run_shell", arguments: { command: "ls" } }, + }); + s.handleEvent({ type: "shell-output", data: "file1\n", stream: "stdout" }); + s.handleEvent({ type: "shell-output", data: "file2\n", stream: "stdout" }); + const seg = s.messages[0]?.content[0]; + if (seg?.type === "tool-call") { + expect(seg.shellOutput?.stdout).toBe("file1\nfile2\n"); + expect(seg.shellOutput?.stderr).toBe(""); + } else { + expect.fail("Expected tool-call segment"); + } + }); + + it("shell-output stderr appends to last tool-call shellOutput stderr", () => { + const s = createTestStore(); + s.handleEvent({ + type: "tool-call", + toolCall: { id: "tc1", name: "run_shell", arguments: { command: "ls" } }, + }); + s.handleEvent({ type: "shell-output", data: "err line\n", stream: "stderr" }); + const seg = s.messages[0]?.content[0]; + if (seg?.type === "tool-call") { + expect(seg.shellOutput?.stderr).toBe("err line\n"); + } else { + expect.fail("Expected tool-call segment"); + } + }); +}); @@ -66,51 +66,94 @@ Open a browser, type "read the contents of package.json and summarize it," see t **Goal:** Agent can run shell commands with directory-scoped permission controls. Usable on real projects. -**Effort:** 1-2 weeks +**Effort:** 2-3 weeks -### Backend +### Backend — Permission Engine -- [ ] Shell tool: - - `run_shell` — execute arbitrary commands, capture stdout/stderr/exit code - - Streaming output for long-running commands - - Working directory parameter (defaults to project root) -- [ ] Directory permission system: - - Current working directory + subdirectories: always allowed (read, write, execute) - - Auto-allow list loaded from config file - - All other directories: prompt user for permission before access +- [ ] Rule-based permission engine: + - Rules: `{ permission, pattern, action }` where action is `allow | deny | ask` + - Wildcard glob matching on both permission name and target pattern + - Ordered ruleset, last-match-wins (user config overrides defaults) + - Default action when no rule matches: `"ask"` - [ ] Permission grant types: - - Per-request — allow this one operation - - Per-session — allow this directory for the rest of the session - - Permanent — add to auto-allow list in config -- [ ] Permission prompt flow: - - Agent calls a tool that touches an out-of-scope directory - - API holds the request open (agent pauses) - - WebSocket pushes a permission prompt to the frontend - - User responds (approve/deny/always-allow) - - API resolves, agent continues or gets a denial message -- [ ] Basic config file loading (`dispatch.yaml`) for auto-allow list: + - Per-request — allow this one operation (no rule stored) + - Per-session — add to in-memory approved set for process lifetime + - Permanent — write to `dispatch.yaml` config +- [ ] Reject cascade: rejecting one pending request auto-rejects all pending requests in that session +- [ ] Path resolution: + - Working directory + all subdirectories: always in-scope (no prompt) + - All other paths: `external_directory` permission check + - `~` / `$HOME` expansion in config patterns + +### Backend — Config Format + +- [ ] `dispatch.yaml` permission block supporting per-permission patterns: ```yaml permissions: - auto_allow: - - /tmp - - ~/.config/dispatch + read: allow # shorthand: "read:*" = allow + edit: + "*": ask # prompt for edits by default + "src/**": allow # auto-allow edits inside src/ + "/tmp/*": allow + external_directory: + "~/projects/*": allow + "/tmp/*": allow + bash: + "npm test": allow + "git commit *": allow + "git push *": ask + "*": ask # prompt for unknown commands ``` -- [ ] Apply permission checks to existing file tools (`read_file`, `write_file`) as well +- [ ] Config hot-reload via chokidar + +### Backend — Shell Tool with Tree-Sitter Analysis + +- [ ] Dependencies: `web-tree-sitter` (WASM runtime), `tree-sitter-bash` (grammar WASM) + - ~2.5 MB total, loaded lazily on first shell command call +- [ ] `run_shell` tool: + - Executes arbitrary commands via `child_process`, captures stdout/stderr/exit code + - Streaming output for long-running commands + - Configurable timeout (default 2 minutes) + - Working directory parameter (defaults to project root) +- [ ] Tree-sitter static analysis pipeline: + 1. Parse command string into AST using `tree-sitter-bash` + 2. Walk all `command` nodes (recurses into pipelines, subshells, `&&`, `if` bodies) + 3. For file-touching commands (`rm`, `cp`, `mv`, `mkdir`, `touch`, `chmod`, `chown`, `cat`): + - Extract path arguments (skip flags like `-rf`) + - Resolve to absolute paths, check workspace boundary + - If outside workspace → add dir to `external_directory` permission ask + 4. Normalize command to pattern via `BashArity`: + - `git checkout main` → `"git checkout *"` (arity 2) + - `npm run dev --watch` → `"npm run dev *"` (arity 3) + - Unknown commands → fallback to command name only + 5. Fire two permission requests: + - `external_directory` — for any out-of-workspace paths detected + - `bash` — for the command pattern itself +- [ ] Known gaps (documented, no fix needed for MVP): + - `find -exec rm`, `xargs`, `sudo` — can't see through these wrappers + - `$(...)` substitution paths — dynamic, skipped (still prompts for the command) + - Variable-stored paths — skipped + - Parse failure → hard error (no fallback), tool call aborts +- [ ] Apply permission checks to existing file tools (`read_file`, `write_file`, `list_files`) ### Frontend - [ ] Permission prompt modal: - - Agent name - - Target path - - Operation type (read / write / execute) + - Agent name and AI's description of the operation + - For file ops: target path, operation type (read / write / execute) + - For shell commands: `$ command text` display with normalized "always allow" pattern preview + - For external directories: directory path and glob pattern - Buttons: Approve / Deny / Always Allow + - "Always Allow" shows secondary confirmation listing the patterns that will be permanently approved - [ ] Permission log panel: scrollable history of grants and denials - [ ] Shell output display in chat: stdout/stderr with monospace formatting, exit code indicator - [ ] Visual distinction between tool calls (file ops vs shell commands) ### Done When -Ask the agent to "run the test suite." It executes `npm test` in the project dir (allowed). Then ask it to "check what's in /etc/hosts." Permission prompt appears. You approve. It reads the file and reports back. Next time it tries `/etc/`, it remembers your per-session grant. +Ask the agent to "run the test suite." It executes `npm test` in the project dir (allowed). Then ask it to "check what's in /etc/hosts." Permission prompt appears showing the target path. You approve. It reads the file and reports back. Next time it tries `/etc/`, it remembers your per-session grant. + +Ask the agent to "clean up build artifacts with rm -rf dist." Permission prompt shows `$ rm -rf dist` with "always allow" pattern `"rm *"`. You click "Always Allow." Future `rm` commands auto-approve. --- @@ -397,7 +440,7 @@ An agent edits a TypeScript file and introduces a type error. You see the error | Phase | Scope | Effort | Cumulative | |---|---|---|---| | 1. Single Agent + UI | One agent, chat in browser | 2-3w | 2-3w | -| 2. Shell Permissions | Safe shell access, permission prompts | 1-2w | 3-5w | +| 2. Shell Permissions | Rule engine, tree-sitter shell analysis, permission prompts | 2-3w | 4-6w | | 3. Config + Skills + Models | YAML config, skills dirs, model groups, key fallback | 2-3w | 5-8w | | 4. Spawning + Tree | Multi-agent hierarchy, tree UI, user messaging | 2-3w | 7-11w | | 5. Sessions | Persistence, fork, resume, model switch | 1-2w | 8-13w | |
