diff options
| author | Adam Malczewski <[email protected]> | 2026-06-28 21:24:18 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-28 21:24:18 +0900 |
| commit | 6dd9ea9b935e5011c16faed6c869c976cf5ff172 (patch) | |
| tree | 4702e1ba6a58cbf630831deb24bcd1d6d261a75c | |
| parent | 7f1381c4452846e5a2689d868ab0ee2bc90042c9 (diff) | |
| download | dispatch-6dd9ea9b935e5011c16faed6c869c976cf5ff172.tar.gz dispatch-6dd9ea9b935e5011c16faed6c869c976cf5ff172.zip | |
fix(vision): tell vision agents not to use tools, just describe images directly
Kimi was trying to use Python tools to analyze images rather than just
describing them. Updated both vision system prompts (consult_vision and
image compaction) to explicitly instruct: do not use any tools unless
specifically asked to — just use your vision to see the image and describe
it directly.
| -rw-r--r-- | packages/vision-handoff/src/service.ts | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/packages/vision-handoff/src/service.ts b/packages/vision-handoff/src/service.ts index 01245df..397d81a 100644 --- a/packages/vision-handoff/src/service.ts +++ b/packages/vision-handoff/src/service.ts @@ -354,7 +354,9 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando }; const stream = vision.provider.stream([userMessage], [], { model: vision.model, - systemPrompt: "You are a vision assistant. Describe images faithfully and thoroughly.", + systemPrompt: + "You are a vision assistant. Describe images faithfully and thoroughly. " + + "Do not use any tools — just use your vision to see the image and describe it directly.", }); const description = (await collectTextFromStream(stream)).trim(); const text = @@ -657,7 +659,10 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando modelName: vision.modelName, ...(opts.cwd !== undefined ? { cwd: opts.cwd } : {}), systemPrompt: - "You are a vision assistant. A developer who cannot see images is asking you specific questions about an image they attached. Answer their question precisely and thoroughly.", + "You are a vision assistant. A developer who cannot see images is asking you specific " + + "questions about an image they attached. Answer their question precisely and thoroughly. " + + "Do not use any tools unless specifically asked to — just use your vision to see the " + + "image and describe it directly.", onEvent: (event: AgentEvent) => { if (event.type === "text-delta") { responseText += event.delta; |
