summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAiden Cline <[email protected]>2025-10-09 09:05:11 -0500
committerGitHub <[email protected]>2025-10-09 09:05:11 -0500
commit225adc46ba2cdcf744f3344181c71ae90a76ae9c (patch)
treefd8e2219e3063215d8b9d811776074cd6b2c9bf3
parenteb4b5721cd115ecba5652545df93b2c385c5afe3 (diff)
downloadopencode-225adc46ba2cdcf744f3344181c71ae90a76ae9c.tar.gz
opencode-225adc46ba2cdcf744f3344181c71ae90a76ae9c.zip
feat: allow read tool to handle images (#3052)
-rw-r--r--packages/opencode/src/provider/models.ts6
-rw-r--r--packages/opencode/src/provider/provider.ts5
-rw-r--r--packages/opencode/src/session/message-v2.ts174
-rw-r--r--packages/opencode/src/session/prompt.ts5
-rw-r--r--packages/opencode/src/tool/read.ts45
-rw-r--r--packages/opencode/src/tool/read.txt4
-rw-r--r--packages/opencode/src/tool/tool.ts3
7 files changed, 159 insertions, 83 deletions
diff --git a/packages/opencode/src/provider/models.ts b/packages/opencode/src/provider/models.ts
index 514203e91..97310dd19 100644
--- a/packages/opencode/src/provider/models.ts
+++ b/packages/opencode/src/provider/models.ts
@@ -28,6 +28,12 @@ export namespace ModelsDev {
context: z.number(),
output: z.number(),
}),
+ modalities: z
+ .object({
+ input: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
+ output: z.array(z.enum(["text", "audio", "image", "video", "pdf"])),
+ })
+ .optional(),
experimental: z.boolean().optional(),
options: z.record(z.string(), z.any()),
provider: z.object({ npm: z.string() }).optional(),
diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts
index c18bc4898..e0fe4be23 100644
--- a/packages/opencode/src/provider/provider.ts
+++ b/packages/opencode/src/provider/provider.ts
@@ -279,6 +279,11 @@ export namespace Provider {
context: 0,
output: 0,
},
+ modalities: model.modalities ??
+ existing?.modalities ?? {
+ input: ["text"],
+ output: ["text"],
+ },
provider: model.provider ?? existing?.provider,
}
parsed.models[modelID] = parsedModel
diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts
index 7704a6155..8dc059ca1 100644
--- a/packages/opencode/src/session/message-v2.ts
+++ b/packages/opencode/src/session/message-v2.ts
@@ -17,71 +17,6 @@ export namespace MessageV2 {
}),
)
- export const ToolStatePending = z
- .object({
- status: z.literal("pending"),
- })
- .meta({
- ref: "ToolStatePending",
- })
-
- export type ToolStatePending = z.infer<typeof ToolStatePending>
-
- export const ToolStateRunning = z
- .object({
- status: z.literal("running"),
- input: z.any(),
- title: z.string().optional(),
- metadata: z.record(z.string(), z.any()).optional(),
- time: z.object({
- start: z.number(),
- }),
- })
- .meta({
- ref: "ToolStateRunning",
- })
- export type ToolStateRunning = z.infer<typeof ToolStateRunning>
-
- export const ToolStateCompleted = z
- .object({
- status: z.literal("completed"),
- input: z.record(z.string(), z.any()),
- output: z.string(),
- title: z.string(),
- metadata: z.record(z.string(), z.any()),
- time: z.object({
- start: z.number(),
- end: z.number(),
- compacted: z.number().optional(),
- }),
- })
- .meta({
- ref: "ToolStateCompleted",
- })
- export type ToolStateCompleted = z.infer<typeof ToolStateCompleted>
-
- export const ToolStateError = z
- .object({
- status: z.literal("error"),
- input: z.record(z.string(), z.any()),
- error: z.string(),
- metadata: z.record(z.string(), z.any()).optional(),
- time: z.object({
- start: z.number(),
- end: z.number(),
- }),
- })
- .meta({
- ref: "ToolStateError",
- })
- export type ToolStateError = z.infer<typeof ToolStateError>
-
- export const ToolState = z
- .discriminatedUnion("status", [ToolStatePending, ToolStateRunning, ToolStateCompleted, ToolStateError])
- .meta({
- ref: "ToolState",
- })
-
const PartBase = z.object({
id: z.string(),
sessionID: z.string(),
@@ -134,17 +69,6 @@ export namespace MessageV2 {
})
export type ReasoningPart = z.infer<typeof ReasoningPart>
- export const ToolPart = PartBase.extend({
- type: z.literal("tool"),
- callID: z.string(),
- tool: z.string(),
- state: ToolState,
- metadata: z.record(z.string(), z.any()).optional(),
- }).meta({
- ref: "ToolPart",
- })
- export type ToolPart = z.infer<typeof ToolPart>
-
const FilePartSourceBase = z.object({
text: z
.object({
@@ -228,6 +152,83 @@ export namespace MessageV2 {
})
export type StepFinishPart = z.infer<typeof StepFinishPart>
+ export const ToolStatePending = z
+ .object({
+ status: z.literal("pending"),
+ })
+ .meta({
+ ref: "ToolStatePending",
+ })
+
+ export type ToolStatePending = z.infer<typeof ToolStatePending>
+
+ export const ToolStateRunning = z
+ .object({
+ status: z.literal("running"),
+ input: z.any(),
+ title: z.string().optional(),
+ metadata: z.record(z.string(), z.any()).optional(),
+ time: z.object({
+ start: z.number(),
+ }),
+ })
+ .meta({
+ ref: "ToolStateRunning",
+ })
+ export type ToolStateRunning = z.infer<typeof ToolStateRunning>
+
+ export const ToolStateCompleted = z
+ .object({
+ status: z.literal("completed"),
+ input: z.record(z.string(), z.any()),
+ output: z.string(),
+ title: z.string(),
+ metadata: z.record(z.string(), z.any()),
+ time: z.object({
+ start: z.number(),
+ end: z.number(),
+ compacted: z.number().optional(),
+ }),
+ attachments: FilePart.array().optional(),
+ })
+ .meta({
+ ref: "ToolStateCompleted",
+ })
+ export type ToolStateCompleted = z.infer<typeof ToolStateCompleted>
+
+ export const ToolStateError = z
+ .object({
+ status: z.literal("error"),
+ input: z.record(z.string(), z.any()),
+ error: z.string(),
+ metadata: z.record(z.string(), z.any()).optional(),
+ time: z.object({
+ start: z.number(),
+ end: z.number(),
+ }),
+ })
+ .meta({
+ ref: "ToolStateError",
+ })
+ export type ToolStateError = z.infer<typeof ToolStateError>
+
+ export const ToolState = z
+ .discriminatedUnion("status", [ToolStatePending, ToolStateRunning, ToolStateCompleted, ToolStateError])
+ .meta({
+ ref: "ToolState",
+ })
+
+ export const ToolPart = PartBase.extend({
+ type: z.literal("tool"),
+ callID: z.string(),
+ tool: z.string(),
+ state: ToolState,
+ metadata: z.record(z.string(), z.any()).optional(),
+ }).meta({
+ ref: "ToolPart",
+ })
+ export type ToolPart = z.infer<typeof ToolPart>
+
const Base = z.object({
id: z.string(),
sessionID: z.string(),
@@ -531,7 +532,25 @@ export namespace MessageV2 {
},
]
if (part.type === "tool") {
- if (part.state.status === "completed")
+ if (part.state.status === "completed") {
+ if (part.state.attachments?.length) {
+ result.push({
+ id: Identifier.ascending("message"),
+ role: "user",
+ parts: [
+ {
+ type: "text",
+ text: `Tool ${part.tool} returned an attachment:`,
+ },
+ ...part.state.attachments.map((attachment) => ({
+ type: "file" as const,
+ url: attachment.url,
+ mediaType: attachment.mime,
+ filename: attachment.filename,
+ })),
+ ],
+ })
+ }
return [
{
type: ("tool-" + part.tool) as `tool-${string}`,
@@ -542,6 +561,7 @@ export namespace MessageV2 {
callProviderMetadata: part.metadata,
},
]
+ }
if (part.state.status === "error")
return [
{
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
index b65309d9e..0ccb208c6 100644
--- a/packages/opencode/src/session/prompt.ts
+++ b/packages/opencode/src/session/prompt.ts
@@ -457,6 +457,10 @@ export namespace SessionPrompt {
abort: options.abortSignal!,
messageID: input.processor.message.id,
callID: options.toolCallId,
+ extra: {
+ modelID: input.modelID,
+ providerID: input.providerID,
+ },
agent: input.agent.name,
metadata: async (val) => {
const match = input.processor.partFromToolCall(options.toolCallId)
@@ -989,6 +993,7 @@ export namespace SessionPrompt {
start: match.state.time.start,
end: Date.now(),
},
+ attachments: value.output.attachments,
},
})
delete toolcalls[value.toolCallId]
diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts
index 2ed3accbd..5e8cecaf2 100644
--- a/packages/opencode/src/tool/read.ts
+++ b/packages/opencode/src/tool/read.ts
@@ -7,6 +7,8 @@ import { FileTime } from "../file/time"
import DESCRIPTION from "./read.txt"
import { Filesystem } from "../util/filesystem"
import { Instance } from "../project/instance"
+import { Provider } from "../provider/provider"
+import { Identifier } from "../id/id"
const DEFAULT_READ_LIMIT = 2000
const MAX_LINE_LENGTH = 2000
@@ -23,6 +25,8 @@ export const ReadTool = Tool.define("read", {
if (!path.isAbsolute(filepath)) {
filepath = path.join(process.cwd(), filepath)
}
+ const title = path.relative(Instance.worktree, filepath)
+
if (!ctx.extra?.["bypassCwdCheck"] && !Filesystem.contains(Instance.directory, filepath)) {
throw new Error(`File ${filepath} is not in the current working directory`)
}
@@ -48,12 +52,45 @@ export const ReadTool = Tool.define("read", {
throw new Error(`File not found: ${filepath}`)
}
- const limit = params.limit ?? DEFAULT_READ_LIMIT
- const offset = params.offset || 0
const isImage = isImageFile(filepath)
- if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`)
+ const supportsImages = await (async () => {
+ if (!ctx.extra?.["providerID"] || !ctx.extra?.["modelID"]) return false
+ const providerID = ctx.extra["providerID"] as string
+ const modelID = ctx.extra["modelID"] as string
+ const model = await Provider.getModel(providerID, modelID).catch(() => undefined)
+ if (!model) return false
+ return model.info.modalities?.input?.includes("image") ?? false
+ })()
+ if (isImage) {
+ if (!supportsImages) {
+ throw new Error(`Failed to read image: ${filepath}, model may not be able to read images`)
+ }
+ const mime = file.type
+ const msg = "Image read successfully"
+ return {
+ title,
+ output: msg,
+ metadata: {
+ preview: msg,
+ },
+ attachments: [
+ {
+ id: Identifier.ascending("part"),
+ sessionID: ctx.sessionID,
+ messageID: ctx.messageID,
+ type: "file",
+ mime,
+ url: `data:${mime};base64,${Buffer.from(await file.bytes()).toString("base64")}`,
+ },
+ ],
+ }
+ }
+
const isBinary = await isBinaryFile(filepath, file)
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
+
+ const limit = params.limit ?? DEFAULT_READ_LIMIT
+ const offset = params.offset || 0
const lines = await file.text().then((text) => text.split("\n"))
const raw = lines.slice(offset, offset + limit).map((line) => {
return line.length > MAX_LINE_LENGTH ? line.substring(0, MAX_LINE_LENGTH) + "..." : line
@@ -76,7 +113,7 @@ export const ReadTool = Tool.define("read", {
FileTime.read(ctx.sessionID, filepath)
return {
- title: path.relative(Instance.worktree, filepath),
+ title,
output,
metadata: {
preview,
diff --git a/packages/opencode/src/tool/read.txt b/packages/opencode/src/tool/read.txt
index 3904c0939..b5bffee26 100644
--- a/packages/opencode/src/tool/read.txt
+++ b/packages/opencode/src/tool/read.txt
@@ -7,6 +7,6 @@ Usage:
- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
- Any lines longer than 2000 characters will be truncated
- Results are returned using cat -n format, with line numbers starting at 1
-- This tool cannot read binary files, including images
-- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
+- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
+- You can read image files using this tool.
diff --git a/packages/opencode/src/tool/tool.ts b/packages/opencode/src/tool/tool.ts
index a372a69d7..2fc72274d 100644
--- a/packages/opencode/src/tool/tool.ts
+++ b/packages/opencode/src/tool/tool.ts
@@ -1,9 +1,11 @@
import z from "zod/v4"
+import type { MessageV2 } from "../session/message-v2"
export namespace Tool {
interface Metadata {
[key: string]: any
}
+
export type Context<M extends Metadata = Metadata> = {
sessionID: string
messageID: string
@@ -25,6 +27,7 @@ export namespace Tool {
title: string
metadata: M
output: string
+ attachments?: MessageV2.FilePart[]
}>
}>
}