diff options
| author | Adam Malczewski <[email protected]> | 2026-06-02 14:49:49 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-02 14:49:49 +0900 |
| commit | ecb001ec7a2e573d8dedf5064e860e5a3e7788fd (patch) | |
| tree | 531371ba6f449a144b5bb45a3d226b025983bd4b | |
| parent | 7c527b4d8a72159954405e720d5bf776802dc0ff (diff) | |
| download | dispatch-ecb001ec7a2e573d8dedf5064e860e5a3e7788fd.tar.gz dispatch-ecb001ec7a2e573d8dedf5064e860e5a3e7788fd.zip | |
feat(todo): port opencode's declarative whole-list todo tool
Replace the imperative id-based CRUD todo tool (add/update/list/get/remove)
with opencode's declarative whole-list design: a single `todos` param that
replaces the entire list each call. No model-visible ids, no delta reasoning,
no "task not found" spirals.
- core: TaskItem { id, content, status }; statuses pending|in_progress|
completed|cancelled. TaskList.setTasks/getTasks/onChange. New rich
TODO_DESCRIPTION adapted from opencode's todowrite.txt.
- api: TASK_MANAGEMENT_GUIDANCE system-prompt section (from anthropic.txt);
updated TOOL_DESCRIPTIONS.todo. Reload fix: TabStatusSnapshot now carries
per-tab tasks so getAllStatuses rehydrates the panel on reconnect.
- frontend: mirror types; hydrate tasks from snapshot in both restore paths;
upgrade sidebar Tasks panel to render content + all four statuses + progress.
- tests: new core task-list.test.ts (15); updated api TaskList mocks +
getAllStatuses task-snapshot coverage.
bun run check clean; 569 tests pass; all packages typecheck.
| -rw-r--r-- | notes/todo-tool-redesign-plan.md | 26 | ||||
| -rw-r--r-- | packages/api/src/agent-manager.ts | 78 | ||||
| -rw-r--r-- | packages/api/tests/agent-manager.test.ts | 61 | ||||
| -rw-r--r-- | packages/api/tests/routes.test.ts | 21 | ||||
| -rw-r--r-- | packages/core/src/index.ts | 2 | ||||
| -rw-r--r-- | packages/core/src/tools/task-list.ts | 206 | ||||
| -rw-r--r-- | packages/core/src/types/index.ts | 18 | ||||
| -rw-r--r-- | packages/core/tests/tools/task-list.test.ts | 158 | ||||
| -rw-r--r-- | packages/frontend/src/lib/components/TaskListPanel.svelte | 68 | ||||
| -rw-r--r-- | packages/frontend/src/lib/tabs.svelte.ts | 10 | ||||
| -rw-r--r-- | packages/frontend/src/lib/types.ts | 8 |
11 files changed, 453 insertions, 203 deletions
diff --git a/notes/todo-tool-redesign-plan.md b/notes/todo-tool-redesign-plan.md index 7e3af48..78963eb 100644 --- a/notes/todo-tool-redesign-plan.md +++ b/notes/todo-tool-redesign-plan.md @@ -84,3 +84,29 @@ in the allowlist/summon/loader/permission wiring and existing agent TOMLs. untouched. - Persistence to DB (opencode stores todos in SQLite) is **not** added — Dispatch keeps the existing in-memory per-tab `TaskList`; the visible/UX behaviour is what was failing, and that's what we fix. + +--- + +## As-built (implemented on branch td/todo-fix) + +Implemented the opencode-style declarative whole-list `todo` tool. **Deviations from the plan above:** + +- **No `priority`.** Dropped per product decision. `TaskItem = { id, content, status }`; + the tool param is `todos: Array<{ content, status }>`. +- **Reload reliability fix (new).** Todos previously blanked on page reload because they were + broadcast only via the `task-list-update` change event and were absent from the reconnect + snapshot. `TabStatusSnapshot` now carries an optional `tasks` field (core + frontend mirror); + `getAllStatuses()` includes each tab's `taskList.getTasks()` for ALL tabs (omitted when empty). + The frontend hydrates `tasks` from the snapshot in both restore paths (initial `GET /status` + map and the `statuses` WS handler) instead of hardcoding `tasks: []`. Still in-memory per-tab + (no DB; does not survive a server restart). +- **Statuses:** `pending | in_progress | completed | cancelled` (as planned). +- **UI:** the existing sidebar **Tasks** panel (`TaskListPanel.svelte`) was upgraded to render + `content`, all four statuses (completed→checked+strikethrough, in_progress→indeterminate+bold, + cancelled→dim+strikethrough, pending→empty) and a `completed/active` progress counter. No new + UI surfaces were added (panel only). +- **System prompt:** `TODO_GUIDANCE` replaced by a `TASK_MANAGEMENT_GUIDANCE` "Task Management" + section adapted from opencode's `anthropic.txt`; `TOOL_DESCRIPTIONS.todo` and the tool's own + `TODO_DESCRIPTION` adapted from `todowrite.txt`. + +Verification: `bun run check` clean; `bun run test` 569 passing; all three packages typecheck. diff --git a/packages/api/src/agent-manager.ts b/packages/api/src/agent-manager.ts index d339fbd..85dd160 100644 --- a/packages/api/src/agent-manager.ts +++ b/packages/api/src/agent-manager.ts @@ -75,7 +75,7 @@ const TOOL_DESCRIPTIONS: Record<string, string> = { write_file: "Write content to a file (creates parent directories if needed)", run_shell: "Execute shell commands in the working directory (bash). Returns stdout, stderr, and exit code. Set background=true to run in the background and get a job_id for later retrieval. Do NOT run destructive or irreversible commands unless the user explicitly requests them.", - todo: "Manage a todo list for planning and tracking work. Actions: add, update, list, get, remove. Statuses: pending, in_progress, done.", + todo: "Create/maintain a todo list to plan and track work. Declarative whole-list write: send the entire list in `todos` each call (it replaces the previous list). Statuses: pending, in_progress, completed, cancelled.", summon: "Spawn a child agent to work on a task independently. By default blocks until the child finishes. Set background=true to return immediately with an agent_id for later retrieval.", retrieve: @@ -98,44 +98,47 @@ const MAX_AGENT_AUTO_WAKES = 6; const DEFAULT_SYSTEM_PROMPT = "You are Dispatch, an agent designed to help with any task that the user asks for. Be helpful and concise."; -const TODO_GUIDANCE = ` -## Todo List +const TASK_MANAGEMENT_GUIDANCE = ` +## Task Management -The user can see your todo list in real-time. Use it to communicate your plan and progress. +You have access to the \`todo\` tool to plan and track tasks. Use it VERY frequently so the user can see your plan and progress in real time. It is also a powerful planning aid: breaking larger work into smaller steps keeps you from forgetting important tasks — that is unacceptable. + +The \`todo\` tool is DECLARATIVE: every call sends the ENTIRE list in the \`todos\` parameter and replaces the previous list. There are no ids and no per-item actions — to change one item, resend the whole list with that item updated. To clear the list, send an empty array. ### When to use -- Tasks that require 3 or more steps -- When the user provides multiple things to do -- Complex work that benefits from planning before starting -- After receiving new instructions, capture them as todos immediately +- A task needs 3+ distinct steps, or benefits from planning +- The user gives multiple tasks (numbered or comma-separated) or asks for a todo list +- New instructions arrive — capture them as todos +- You start a task — mark it in_progress (only one at a time) before working +- You finish a task — mark it completed and add any follow-ups discovered ### When NOT to use -- Single, straightforward tasks that need no tracking -- Purely conversational or informational responses -- Anything completable in under 3 trivial steps - -### State management -- Only ONE item should be "in_progress" at a time. Finish current work before starting the next item. -- Mark items "done" IMMEDIATELY after completing them. Do not batch completions. -- When starting work on an item, mark it "in_progress" first. -- Add new items as you discover sub-tasks during execution. +- A single, straightforward task (or fewer than 3 trivial steps) +- Purely informational or conversational requests +- When tracking adds no organizational value + +### States +- pending — not started +- in_progress — actively working (exactly ONE at a time) +- completed — finished successfully +- cancelled — no longer needed + +### Rules +- Send the full desired list every time; the tool replaces the stored list +- Update status in real time; do NOT batch completions +- Mark completed only after the work is actually done (including any required verification), never on intent +- Keep exactly one in_progress while work remains; if blocked, keep it in_progress and add a follow-up todo describing the blocker ### Examples User: "Run the build and fix any type errors" -Good approach: -1. Add todo: "Run the build" -> mark in_progress -> run build -> mark done -2. If 5 errors found, add 5 todos for each error -3. Work through each one sequentially, marking in_progress then done - -User: "What does the git status command do?" -No todo needed — this is a simple informational question. - -User: "Rename the function getUser to fetchUser across the project" -Good approach: -1. Add todo: "Search for all occurrences of getUser" -2. After searching, add a todo per file that needs changes -3. Work through each file sequentially +Write the list, then work it: send [{content:"Run the build", status:"in_progress"}, {content:"Fix any type errors", status:"pending"}]. Run the build. If it surfaces 10 errors, resend the whole list — the build item completed, plus one item per error — then drive each to completed one at a time. + +User: "How do I print Hello World in Python?" +No todo needed — this is a single informational question. + +User: "Rename getUser to fetchUser across the project" +Send [{content:"Search for all occurrences of getUser", status:"in_progress"}, ...]. After the grep reveals the files, resend the whole list with one item per file, then work through them, resending the list as each flips to completed. `.trim(); /** @@ -160,7 +163,7 @@ function buildSystemPrompt(toolNames: string[], basePrompt?: string): string { const hasSummon = toolNames.includes("summon"); let prompt = `${base}\n\nYou have access to the following tools:\n\n${toolList}\n\nWhen asked to work with files, use these tools. Always confirm what you did after completing an action.`; if (hasTodo) { - prompt += `\n\n${TODO_GUIDANCE}`; + prompt += `\n\n${TASK_MANAGEMENT_GUIDANCE}`; } if (hasSummon) { prompt += @@ -847,7 +850,11 @@ export class AgentManager { * row. The frontend aligns its local assistant message id with * this so the next `done` event lands on the right message. * - * For idle/error tabs, only `status` is present. Tabs not in + * Every tab additionally carries its `tasks` (the current todo list) when + * non-empty, so a reloaded frontend rehydrates the Tasks panel from the + * backend rather than blanking it. + * + * For idle/error tabs, only `status` (plus any `tasks`) is present. Tabs not in * `this.tabAgents` (e.g. tabs in the DB that have never been touched * since server start) are absent from the returned record — the * caller infers their status from the DB row (always "idle" at rest). @@ -856,6 +863,13 @@ export class AgentManager { const result: Record<string, TabStatusSnapshot> = {}; for (const [tabId, tabAgent] of this.tabAgents.entries()) { const snap: TabStatusSnapshot = { status: tabAgent.status }; + // Include the tab's todo list (for ALL tabs, not just running ones) + // so a reloaded frontend rehydrates the Tasks panel from the backend + // instead of blanking it. Omit when empty to keep the payload lean. + const tasks = tabAgent.taskList.getTasks(); + if (tasks.length > 0) { + snap.tasks = tasks; + } if (tabAgent.status === "running") { if (tabAgent.currentChunks) { // Defensive shallow copy: callers may serialize/mutate. diff --git a/packages/api/tests/agent-manager.test.ts b/packages/api/tests/agent-manager.test.ts index 9da6a70..014022a 100644 --- a/packages/api/tests/agent-manager.test.ts +++ b/packages/api/tests/agent-manager.test.ts @@ -279,20 +279,17 @@ vi.mock("@dispatch/core", () => ({ } }, TaskList: class MockTaskList { + private tasks: Array<{ id: string; content: string; status: string }> = []; getTasks() { - return []; - } - getTask() { - return undefined; - } - addTask() { - return { id: "task-1", title: "", description: "", status: "pending" }; - } - updateTask() { - return undefined; + return this.tasks.map((t) => ({ ...t })); } - removeTask() { - return false; + setTasks(items: Array<{ content: string; status?: string }>) { + this.tasks = items.map((item, i) => ({ + id: `task-${i + 1}`, + content: item.content, + status: item.status ?? "pending", + })); + return this.getTasks(); } onChange(_cb: unknown) { return () => {}; @@ -907,7 +904,7 @@ describe("AgentManager", () => { status: "running" | "idle" | "error"; keyId: null; modelId: null; - taskList: { onChange: (cb: unknown) => void }; + taskList: { onChange: (cb: unknown) => void; getTasks: () => unknown[] }; messageQueue: unknown[]; queueListeners: unknown[]; shellStore: unknown; @@ -922,7 +919,7 @@ describe("AgentManager", () => { status: "running", keyId: null, modelId: null, - taskList: { onChange: () => {} }, + taskList: { onChange: () => {}, getTasks: () => [] }, messageQueue: [], queueListeners: [], shellStore: {}, @@ -954,7 +951,7 @@ describe("AgentManager", () => { status: "running"; keyId: null; modelId: null; - taskList: { onChange: (cb: unknown) => void }; + taskList: { onChange: (cb: unknown) => void; getTasks: () => unknown[] }; messageQueue: unknown[]; queueListeners: unknown[]; shellStore: unknown; @@ -970,7 +967,7 @@ describe("AgentManager", () => { status: "running", keyId: null, modelId: null, - taskList: { onChange: () => {} }, + taskList: { onChange: () => {}, getTasks: () => [] }, messageQueue: [], queueListeners: [], shellStore: {}, @@ -996,7 +993,7 @@ describe("AgentManager", () => { status: "running"; keyId: null; modelId: null; - taskList: { onChange: (cb: unknown) => void }; + taskList: { onChange: (cb: unknown) => void; getTasks: () => unknown[] }; messageQueue: unknown[]; queueListeners: unknown[]; shellStore: unknown; @@ -1011,7 +1008,7 @@ describe("AgentManager", () => { status: "running", keyId: null, modelId: null, - taskList: { onChange: () => {} }, + taskList: { onChange: () => {}, getTasks: () => [] }, messageQueue: [], queueListeners: [], shellStore: {}, @@ -1026,6 +1023,30 @@ describe("AgentManager", () => { expect(snap["tab-early"]).not.toHaveProperty("currentAssistantId"); }); + it("getAllStatuses includes a tab's todo list (for reload rehydration)", () => { + const manager = new AgentManager(); + // Public API: getTaskList creates+returns the tab's list. setTasks is + // the declarative whole-list write. + const list = manager.getTaskList("tab-todos"); + list.setTasks([ + { content: "plan", status: "completed" }, + { content: "build", status: "in_progress" }, + ]); + const snap = manager.getAllStatuses(); + expect(snap["tab-todos"]?.tasks).toEqual([ + { id: "task-1", content: "plan", status: "completed" }, + { id: "task-2", content: "build", status: "in_progress" }, + ]); + }); + + it("getAllStatuses omits tasks for a tab with an empty todo list", () => { + const manager = new AgentManager(); + manager.getTaskList("tab-empty"); + const snap = manager.getAllStatuses(); + expect(snap["tab-empty"]).toBeDefined(); + expect(snap["tab-empty"]).not.toHaveProperty("tasks"); + }); + // ─── Tab-to-tab communication ───────────────────────────────── describe("deliverMessage", () => { @@ -1054,7 +1075,7 @@ describe("AgentManager", () => { status: "running", keyId: null, modelId: null, - taskList: { onChange: () => {} }, + taskList: { onChange: () => {}, getTasks: () => [] }, messageQueue: [], queueListeners: [], shellStore: {}, @@ -1175,7 +1196,7 @@ describe("AgentManager", () => { status: "running", keyId: null, modelId: null, - taskList: { onChange: () => {} }, + taskList: { onChange: () => {}, getTasks: () => [] }, messageQueue: [], queueListeners: [], shellStore: {}, diff --git a/packages/api/tests/routes.test.ts b/packages/api/tests/routes.test.ts index c1971b0..a8db5ce 100644 --- a/packages/api/tests/routes.test.ts +++ b/packages/api/tests/routes.test.ts @@ -140,20 +140,17 @@ vi.mock("@dispatch/core", () => ({ } }, TaskList: class MockTaskList { + private tasks: Array<{ id: string; content: string; status: string }> = []; getTasks() { - return []; - } - getTask() { - return undefined; - } - addTask() { - return { id: "task-1", title: "", description: "", status: "pending" }; + return this.tasks.map((t) => ({ ...t })); } - updateTask() { - return undefined; - } - removeTask() { - return false; + setTasks(items: Array<{ content: string; status?: string }>) { + this.tasks = items.map((item, i) => ({ + id: `task-${i + 1}`, + content: item.content, + status: item.status ?? "pending", + })); + return this.getTasks(); } onChange(_cb: unknown) { return () => {}; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 7818024..a7b1cad 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -107,7 +107,7 @@ export { toAvailableSubagents, toAvailableUserAgents, } from "./tools/summon.js"; -export { createTaskListTool, TaskList } from "./tools/task-list.js"; +export { createTaskListTool, TaskList, TODO_DESCRIPTION } from "./tools/task-list.js"; export { clearSpillForTab } from "./tools/truncate.js"; export { createWebSearchTool } from "./tools/web-search.js"; export { createWriteFileTool } from "./tools/write-file.js"; diff --git a/packages/core/src/tools/task-list.ts b/packages/core/src/tools/task-list.ts index 29f1543..98dcf01 100644 --- a/packages/core/src/tools/task-list.ts +++ b/packages/core/src/tools/task-list.ts @@ -1,9 +1,37 @@ import { z } from "zod"; import type { TaskItem, TaskStatus, ToolDefinition } from "../types/index.js"; +/** + * Valid task statuses. Matches opencode's todo lifecycle: + * - pending not started + * - in_progress actively working (exactly ONE at a time) + * - completed finished successfully + * - cancelled no longer needed + */ +const VALID_STATUSES: ReadonlySet<TaskStatus> = new Set<TaskStatus>([ + "pending", + "in_progress", + "completed", + "cancelled", +]); + +function normalizeStatus(value: unknown): TaskStatus { + return typeof value === "string" && VALID_STATUSES.has(value as TaskStatus) + ? (value as TaskStatus) + : "pending"; +} + +/** + * Declarative, whole-list task store (ported from opencode's `todowrite`). + * + * The model never sees ids and never issues per-item mutations. Instead it + * sends the ENTIRE desired list on every call and {@link setTasks} rebuilds the + * stored list, assigning fresh positional ids. This is idempotent and + * eliminates the id-bookkeeping / "task not found" / delta-reasoning failure + * modes of the old imperative CRUD interface. + */ export class TaskList { private tasks: TaskItem[] = []; - private counter = 0; private listeners: Array<(tasks: TaskItem[]) => void> = []; private notify(): void { @@ -14,40 +42,22 @@ export class TaskList { } getTasks(): TaskItem[] { - return [...this.tasks]; - } - - getTask(id: string): TaskItem | undefined { - return this.tasks.find((t) => t.id === id); - } - - addTask(title: string, description: string): TaskItem { - this.counter++; - const task: TaskItem = { - id: `task-${this.counter}`, - title, - description, - status: "pending", - }; - this.tasks.push(task); - this.notify(); - return task; - } - - updateTask(id: string, status: TaskStatus): TaskItem | undefined { - const task = this.tasks.find((t) => t.id === id); - if (!task) return undefined; - task.status = status; - this.notify(); - return { ...task }; + return this.tasks.map((t) => ({ ...t })); } - removeTask(id: string): boolean { - const index = this.tasks.findIndex((t) => t.id === id); - if (index === -1) return false; - this.tasks.splice(index, 1); + /** + * Replace the entire list. Each item is assigned a fresh positional id + * (`task-1`, `task-2`, …). Invalid/missing statuses fall back to + * `pending`; an empty array clears the list. Always notifies listeners. + */ + setTasks(items: Array<{ content: string; status?: unknown }>): TaskItem[] { + this.tasks = items.map((item, index) => ({ + id: `task-${index + 1}`, + content: item.content, + status: normalizeStatus(item.status), + })); this.notify(); - return true; + return this.getTasks(); } onChange(callback: (tasks: TaskItem[]) => void): () => void { @@ -58,86 +68,82 @@ export class TaskList { } } +/** + * Rich tool description adapted from opencode's `todowrite.txt`. Teaches the + * declarative whole-list cadence and the status lifecycle. + */ +export const TODO_DESCRIPTION = `Create and maintain a structured todo list for the current session to track progress and surface your plan to the user. + +This is a DECLARATIVE, whole-list tool. There are no ids and no per-item actions: every call sends the ENTIRE list in the \`todos\` parameter and REPLACES the previous list. To change one item, resend the whole list with that item changed. To clear the list, send an empty array. + +## When to use +- The task requires 3+ distinct steps and benefits from planning +- The user provides multiple tasks (numbered or comma-separated) or asks for a todo list +- New instructions arrive — capture them as todos +- You start a task — mark it in_progress (only one at a time) before working +- You finish a task — mark it completed and add any follow-ups discovered + +## When NOT to use +- A single, straightforward task (or fewer than 3 trivial steps) +- Purely informational or conversational requests +- When tracking adds no organizational value + +## States +- pending — not started +- in_progress — actively working (exactly ONE at a time) +- completed — finished successfully +- cancelled — no longer needed + +## Rules +- Send the full desired list every time; the tool replaces the stored list +- Update status in real time; do not batch completions +- Mark completed only after the work is actually done (including any required verification), never on intent +- Keep exactly one in_progress while work remains +- If blocked or partial, keep it in_progress and add a follow-up todo describing the blocker +- Items should be specific and actionable; break large work into smaller steps`; + export function createTaskListTool(taskList: TaskList): ToolDefinition { return { name: "todo", - description: - "Manage a todo list for planning and tracking work. Add items, update their status, list all items, or get details on a specific item.", + description: TODO_DESCRIPTION, parameters: z.object({ - action: z.enum(["add", "update", "list", "get", "remove"]).describe("The action to perform"), - title: z.string().optional().describe("Task title (required for 'add')"), - description: z - .string() - .optional() - .describe("Task description (for 'add', defaults to empty)"), - task_id: z.string().optional().describe("Task ID (required for 'update', 'get', 'remove')"), - status: z - .enum(["pending", "in_progress", "done"]) - .optional() - .describe("New status (required for 'update')"), + todos: z + .array( + z.object({ + content: z.string().describe("Brief, actionable description of the task"), + status: z + .enum(["pending", "in_progress", "completed", "cancelled"]) + .describe("Current status of the task"), + }), + ) + .describe("The complete, updated todo list. Replaces the previous list entirely."), }), execute: async (args: Record<string, unknown>): Promise<string> => { - const action = args.action as string; - - if (action === "add") { - const title = args.title as string | undefined; - if (!title) { - return "Error: 'title' is required for the 'add' action."; - } - const description = (args.description as string | undefined) ?? ""; - const task = taskList.addTask(title, description); - return JSON.stringify(task); - } - - if (action === "update") { - const task_id = args.task_id as string | undefined; - const status = args.status as TaskStatus | undefined; - if (!task_id) { - return "Error: 'task_id' is required for the 'update' action."; - } - if (!status) { - return "Error: 'status' is required for the 'update' action."; - } - const updated = taskList.updateTask(task_id, status); - if (!updated) { - return `Error: Task with ID '${task_id}' not found.`; - } - return JSON.stringify(updated); - } - - if (action === "get") { - const task_id = args.task_id as string | undefined; - if (!task_id) { - return "Error: 'task_id' is required for the 'get' action."; - } - const task = taskList.getTask(task_id); - if (!task) { - return `Error: Task with ID '${task_id}' not found.`; - } - return JSON.stringify(task); - } - - if (action === "list") { - const tasks = taskList.getTasks(); - if (tasks.length === 0) { - return "No tasks."; - } - return JSON.stringify(tasks); + const rawTodos = args.todos; + if (!Array.isArray(rawTodos)) { + return "Error: 'todos' must be an array of { content, status } items (send the whole list)."; } - if (action === "remove") { - const task_id = args.task_id as string | undefined; - if (!task_id) { - return "Error: 'task_id' is required for the 'remove' action."; + const items: Array<{ content: string; status?: unknown }> = []; + for (const entry of rawTodos) { + if (!entry || typeof entry !== "object") { + return "Error: each todo must be an object with a 'content' string and a 'status'."; } - const removed = taskList.removeTask(task_id); - if (!removed) { - return `Error: Task with ID '${task_id}' not found.`; + const content = (entry as Record<string, unknown>).content; + if (typeof content !== "string" || content.trim() === "") { + return "Error: each todo requires a non-empty 'content' string."; } - return `Task '${task_id}' removed successfully.`; + items.push({ + content, + status: (entry as Record<string, unknown>).status, + }); } - return `Error: Unknown action '${action}'.`; + const stored = taskList.setTasks(items); + // Echo the canonical stored list back WITHOUT ids — the model must + // never start tracking ids; it always resends the whole list. + const echo = stored.map((t) => ({ content: t.content, status: t.status })); + return JSON.stringify(echo); }, }; } diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index 30afbd9..a22b2b7 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -252,6 +252,12 @@ export interface TabStatusSnapshot { * way `turn-start` would, so they reconcile cleanly when the turn seals. */ currentTurnId?: string; + /** + * The tab's current todo list. Included for ALL tabs (not just running + * ones) so a freshly-reloaded frontend rehydrates the Tasks panel from the + * backend instead of blanking it. Omitted when the list is empty. + */ + tasks?: TaskItem[]; } export type AgentEvent = @@ -473,12 +479,18 @@ export interface AgentSkillMapping { // ─── Task List Types ───────────────────────────────────────────── -export type TaskStatus = "pending" | "in_progress" | "done" | "blocked"; +export type TaskStatus = "pending" | "in_progress" | "completed" | "cancelled"; export interface TaskItem { + /** + * Stable positional id used purely for UI keying and the + * `task-list-update` event contract. It is NEVER exposed to the model: + * the `todo` tool is a declarative whole-list write (the model sends the + * entire desired list every call), so there are no ids for the model to + * track. Ids are reassigned positionally on every `setTasks`. + */ id: string; - title: string; - description: string; + content: string; status: TaskStatus; } diff --git a/packages/core/tests/tools/task-list.test.ts b/packages/core/tests/tools/task-list.test.ts new file mode 100644 index 0000000..5903fec --- /dev/null +++ b/packages/core/tests/tools/task-list.test.ts @@ -0,0 +1,158 @@ +import { describe, expect, it, vi } from "vitest"; +import { createTaskListTool, TaskList } from "../../src/tools/task-list.js"; +import type { TaskItem } from "../../src/types/index.js"; + +describe("TaskList (declarative store)", () => { + it("starts empty", () => { + const list = new TaskList(); + expect(list.getTasks()).toEqual([]); + }); + + it("setTasks replaces the whole list and assigns positional ids", () => { + const list = new TaskList(); + const result = list.setTasks([ + { content: "first", status: "in_progress" }, + { content: "second", status: "pending" }, + ]); + expect(result).toEqual([ + { id: "task-1", content: "first", status: "in_progress" }, + { id: "task-2", content: "second", status: "pending" }, + ]); + expect(list.getTasks()).toEqual(result); + }); + + it("a second setTasks fully replaces the previous list (no append)", () => { + const list = new TaskList(); + list.setTasks([ + { content: "a", status: "completed" }, + { content: "b", status: "completed" }, + { content: "c", status: "pending" }, + ]); + const next = list.setTasks([{ content: "only", status: "in_progress" }]); + expect(next).toEqual([{ id: "task-1", content: "only", status: "in_progress" }]); + expect(list.getTasks()).toHaveLength(1); + }); + + it("preserves all four statuses", () => { + const list = new TaskList(); + const result = list.setTasks([ + { content: "p", status: "pending" }, + { content: "i", status: "in_progress" }, + { content: "c", status: "completed" }, + { content: "x", status: "cancelled" }, + ]); + expect(result.map((t) => t.status)).toEqual([ + "pending", + "in_progress", + "completed", + "cancelled", + ]); + }); + + it("defaults missing/invalid status to pending", () => { + const list = new TaskList(); + const result = list.setTasks([ + { content: "no status" }, + { content: "bogus", status: "done" }, + { content: "junk", status: 42 }, + ]); + expect(result.map((t) => t.status)).toEqual(["pending", "pending", "pending"]); + }); + + it("an empty array clears the list", () => { + const list = new TaskList(); + list.setTasks([{ content: "x", status: "pending" }]); + expect(list.setTasks([])).toEqual([]); + expect(list.getTasks()).toEqual([]); + }); + + it("getTasks returns copies (no external mutation leaks in)", () => { + const list = new TaskList(); + list.setTasks([{ content: "x", status: "pending" }]); + const snapshot = list.getTasks(); + snapshot[0].content = "mutated"; + expect(list.getTasks()[0].content).toBe("x"); + }); + + it("onChange fires on every setTasks with the new snapshot", () => { + const list = new TaskList(); + const seen: TaskItem[][] = []; + const unsubscribe = list.onChange((tasks) => seen.push(tasks)); + list.setTasks([{ content: "a", status: "pending" }]); + list.setTasks([{ content: "b", status: "completed" }]); + expect(seen).toHaveLength(2); + expect(seen[0]).toEqual([{ id: "task-1", content: "a", status: "pending" }]); + expect(seen[1]).toEqual([{ id: "task-1", content: "b", status: "completed" }]); + unsubscribe(); + list.setTasks([{ content: "c", status: "pending" }]); + expect(seen).toHaveLength(2); + }); +}); + +describe("createTaskListTool", () => { + it("exposes a single declarative `todos` parameter and the name `todo`", () => { + const tool = createTaskListTool(new TaskList()); + expect(tool.name).toBe("todo"); + // One top-level param: the whole-list `todos` array. + const shape = (tool.parameters as { shape: Record<string, unknown> }).shape; + expect(Object.keys(shape)).toEqual(["todos"]); + }); + + it("execute updates the store and echoes the list WITHOUT ids", async () => { + const list = new TaskList(); + const tool = createTaskListTool(list); + const out = await tool.execute({ + todos: [ + { content: "plan", status: "completed" }, + { content: "build", status: "in_progress" }, + ], + }); + expect(JSON.parse(out)).toEqual([ + { content: "plan", status: "completed" }, + { content: "build", status: "in_progress" }, + ]); + // Store has ids; the echo does not. + expect(list.getTasks()).toEqual([ + { id: "task-1", content: "plan", status: "completed" }, + { id: "task-2", content: "build", status: "in_progress" }, + ]); + }); + + it("execute fires onChange so the UI broadcast is wired", async () => { + const list = new TaskList(); + const cb = vi.fn(); + list.onChange(cb); + const tool = createTaskListTool(list); + await tool.execute({ todos: [{ content: "x", status: "pending" }] }); + expect(cb).toHaveBeenCalledTimes(1); + }); + + it("execute with an empty array clears the store", async () => { + const list = new TaskList(); + list.setTasks([{ content: "x", status: "pending" }]); + const tool = createTaskListTool(list); + const out = await tool.execute({ todos: [] }); + expect(JSON.parse(out)).toEqual([]); + expect(list.getTasks()).toEqual([]); + }); + + it("execute defaults invalid status to pending in both store and echo", async () => { + const list = new TaskList(); + const tool = createTaskListTool(list); + const out = await tool.execute({ todos: [{ content: "x", status: "done" }] }); + expect(JSON.parse(out)).toEqual([{ content: "x", status: "pending" }]); + expect(list.getTasks()[0].status).toBe("pending"); + }); + + it("execute rejects a non-array todos param", async () => { + const tool = createTaskListTool(new TaskList()); + const out = await tool.execute({ todos: "nope" }); + expect(out).toMatch(/Error/); + }); + + it("execute rejects items missing a content string", async () => { + const tool = createTaskListTool(new TaskList()); + const out = await tool.execute({ todos: [{ status: "pending" }] }); + expect(out).toMatch(/Error/); + }); +}); diff --git a/packages/frontend/src/lib/components/TaskListPanel.svelte b/packages/frontend/src/lib/components/TaskListPanel.svelte index 17ade55..1f84bb8 100644 --- a/packages/frontend/src/lib/components/TaskListPanel.svelte +++ b/packages/frontend/src/lib/components/TaskListPanel.svelte @@ -1,34 +1,55 @@ <script lang="ts"> -interface TaskItem { - id: string; - title: string; - description: string; - status: "pending" | "in_progress" | "done"; -} +import type { TaskItem } from "../types.js"; const { tasks }: { tasks: TaskItem[] } = $props(); -const doneCount = $derived(tasks.filter((t) => t.status === "done").length); +type Status = TaskItem["status"]; + +const completedCount = $derived(tasks.filter((t) => t.status === "completed").length); const inProgressCount = $derived(tasks.filter((t) => t.status === "in_progress").length); +const cancelledCount = $derived(tasks.filter((t) => t.status === "cancelled").length); +// "Active" total excludes cancelled items, so progress reads as work that still counts. +const activeTotal = $derived(tasks.length - cancelledCount); -function checkboxClass(status: TaskItem["status"]): string { +function checkboxClass(status: Status): string { switch (status) { case "pending": return "checkbox checkbox-sm rounded-sm checkbox-secondary"; case "in_progress": return "checkbox checkbox-sm rounded-sm checkbox-info"; - case "done": + case "completed": return "checkbox checkbox-sm rounded-sm checkbox-success"; + case "cancelled": + return "checkbox checkbox-sm rounded-sm checkbox-neutral"; } } -function isChecked(status: TaskItem["status"]): boolean { - return status === "done"; +function isChecked(status: Status): boolean { + return status === "completed"; } -function isIndeterminate(status: TaskItem["status"]): boolean { +function isIndeterminate(status: Status): boolean { return status === "in_progress"; } + +function rowClass(status: Status): string { + if (status === "completed") return "opacity-60"; + if (status === "cancelled") return "opacity-40"; + return ""; +} + +function textClass(status: Status): string { + switch (status) { + case "completed": + return "line-through text-base-content/50"; + case "cancelled": + return "line-through text-base-content/40"; + case "in_progress": + return "font-semibold"; + default: + return ""; + } +} </script> <div class="flex flex-col gap-2"> @@ -36,13 +57,11 @@ function isIndeterminate(status: TaskItem["status"]): boolean { <p class="text-xs text-base-content/50">No tasks yet.</p> {:else} <p class="text-xs text-base-content/60"> - {doneCount}/{tasks.length} done{#if inProgressCount > 0}, {inProgressCount} in progress{/if} + {completedCount}/{activeTotal} completed{#if inProgressCount > 0}, {inProgressCount} in progress{/if}{#if cancelledCount > 0}, {cancelledCount} cancelled{/if} </p> <ul class="flex flex-col gap-0.5"> {#each tasks as task (task.id)} - <li - class="flex items-start gap-2 rounded p-1.5 transition-colors {task.status === 'done' ? 'opacity-60' : ''}" - > + <li class="flex items-start gap-2 rounded p-1.5 transition-colors {rowClass(task.status)}"> <input type="checkbox" class={checkboxClass(task.status)} @@ -51,20 +70,9 @@ function isIndeterminate(status: TaskItem["status"]): boolean { disabled tabindex="-1" /> - <div class="flex flex-col gap-0.5 min-w-0"> - <span - class="text-xs leading-tight {task.status === 'done' - ? 'line-through text-base-content/50' - : task.status === 'in_progress' - ? 'font-semibold' - : ''}" - > - {task.title} - </span> - {#if task.description} - <p class="text-xs text-base-content/50 line-clamp-2">{task.description}</p> - {/if} - </div> + <span class="text-xs leading-tight min-w-0 {textClass(task.status)}"> + {task.content} + </span> </li> {/each} </ul> diff --git a/packages/frontend/src/lib/tabs.svelte.ts b/packages/frontend/src/lib/tabs.svelte.ts index ec718bd..875287b 100644 --- a/packages/frontend/src/lib/tabs.svelte.ts +++ b/packages/frontend/src/lib/tabs.svelte.ts @@ -844,7 +844,9 @@ export function createTabStore() { modelId: row.modelId ?? null, reasoningEffort: DEFAULT_REASONING_EFFORT, currentAssistantId, - tasks: [], + // Rehydrate the todo list from the backend snapshot so a reload + // doesn't blank the Tasks panel mid-task. + tasks: snap?.tasks ?? [], injectedSkills: [], parentTabId: row.parentTabId ?? null, persistent: true, @@ -974,6 +976,10 @@ export function createTabStore() { updateTab(t.id, { agentStatus: backendStatus }); } + // Rehydrate the todo list from the snapshot (backend truth) + // so a reconnect/reload doesn't blank the Tasks panel. + updateTab(t.id, { tasks: snap?.tasks ?? [] }); + if (backendStatus === "running") { // Seed the in-flight assistant message from the snapshot. // This handles the "browser just reopened mid-stream" @@ -1940,7 +1946,7 @@ export function createTabStore() { `Persistent: ${tab.persistent}`, `Working directory: ${tab.workingDirectory ?? "default"}`, `Reasoning effort: ${tab.reasoningEffort}`, - `Pending tasks: ${tab.tasks.length}`, + `Todos: ${tab.tasks.length} (${tab.tasks.filter((t) => t.status === "completed").length} completed)`, "", ]; const TOOL_RESULT_MAX = 300; diff --git a/packages/frontend/src/lib/types.ts b/packages/frontend/src/lib/types.ts index 173f68c..384028c 100644 --- a/packages/frontend/src/lib/types.ts +++ b/packages/frontend/src/lib/types.ts @@ -131,6 +131,8 @@ export interface TabStatusSnapshot { currentAssistantId?: string; /** turn_id of the in-flight turn; present iff status === "running". */ currentTurnId?: string; + /** The tab's todo list, for rehydrating the Tasks panel on reload. */ + tasks?: TaskItem[]; } export type AgentEvent = @@ -221,10 +223,10 @@ export type AgentEvent = | { type: "message-cancelled"; tabId: string; messageId: string }; export interface TaskItem { + /** Stable positional id for Svelte keying only; never shown to the model. */ id: string; - title: string; - description: string; - status: "pending" | "in_progress" | "done"; + content: string; + status: "pending" | "in_progress" | "completed" | "cancelled"; } export interface PermissionPrompt { |
