summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-28 21:24:18 +0900
committerAdam Malczewski <[email protected]>2026-06-28 21:24:18 +0900
commit6dd9ea9b935e5011c16faed6c869c976cf5ff172 (patch)
tree4702e1ba6a58cbf630831deb24bcd1d6d261a75c
parent7f1381c4452846e5a2689d868ab0ee2bc90042c9 (diff)
downloaddispatch-6dd9ea9b935e5011c16faed6c869c976cf5ff172.tar.gz
dispatch-6dd9ea9b935e5011c16faed6c869c976cf5ff172.zip
fix(vision): tell vision agents not to use tools, just describe images directly
Kimi was trying to use Python tools to analyze images rather than just describing them. Updated both vision system prompts (consult_vision and image compaction) to explicitly instruct: do not use any tools unless specifically asked to — just use your vision to see the image and describe it directly.
-rw-r--r--packages/vision-handoff/src/service.ts9
1 files changed, 7 insertions, 2 deletions
diff --git a/packages/vision-handoff/src/service.ts b/packages/vision-handoff/src/service.ts
index 01245df..397d81a 100644
--- a/packages/vision-handoff/src/service.ts
+++ b/packages/vision-handoff/src/service.ts
@@ -354,7 +354,9 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
};
const stream = vision.provider.stream([userMessage], [], {
model: vision.model,
- systemPrompt: "You are a vision assistant. Describe images faithfully and thoroughly.",
+ systemPrompt:
+ "You are a vision assistant. Describe images faithfully and thoroughly. " +
+ "Do not use any tools — just use your vision to see the image and describe it directly.",
});
const description = (await collectTextFromStream(stream)).trim();
const text =
@@ -657,7 +659,10 @@ export function createVisionHandoffService(deps: VisionHandoffDeps): VisionHando
modelName: vision.modelName,
...(opts.cwd !== undefined ? { cwd: opts.cwd } : {}),
systemPrompt:
- "You are a vision assistant. A developer who cannot see images is asking you specific questions about an image they attached. Answer their question precisely and thoroughly.",
+ "You are a vision assistant. A developer who cannot see images is asking you specific " +
+ "questions about an image they attached. Answer their question precisely and thoroughly. " +
+ "Do not use any tools unless specifically asked to — just use your vision to see the " +
+ "image and describe it directly.",
onEvent: (event: AgentEvent) => {
if (event.type === "text-delta") {
responseText += event.delta;