summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAiden Cline <[email protected]>2025-08-17 17:59:51 -0500
committerGitHub <[email protected]>2025-08-17 17:59:51 -0500
commitebd1b18b70ff7446314223693a00ff12d6d5616c (patch)
treec218975851700da5ed0d3566e3b871479097e71a
parentde1764841cd079d041b3dbf5744277bd999c0c5d (diff)
downloadopencode-ebd1b18b70ff7446314223693a00ff12d6d5616c.tar.gz
opencode-ebd1b18b70ff7446314223693a00ff12d6d5616c.zip
fix: better binary file detection (#2025)
-rw-r--r--packages/opencode/src/tool/read.ts58
1 files changed, 52 insertions, 6 deletions
diff --git a/packages/opencode/src/tool/read.ts b/packages/opencode/src/tool/read.ts
index 8b3cd97bd..8ebbb7fd8 100644
--- a/packages/opencode/src/tool/read.ts
+++ b/packages/opencode/src/tool/read.ts
@@ -53,7 +53,7 @@ export const ReadTool = Tool.define("read", {
const offset = params.offset || 0
const isImage = isImageFile(filepath)
if (isImage) throw new Error(`This is an image file of type: ${isImage}\nUse a different tool to process images`)
- const isBinary = await isBinaryFile(file)
+ const isBinary = await isBinaryFile(filepath, file)
if (isBinary) throw new Error(`Cannot read binary file: ${filepath}`)
const lines = await file.text().then((text) => text.split("\n"))
const raw = lines.slice(offset, offset + limit).map((line) => {
@@ -105,13 +105,59 @@ function isImageFile(filePath: string): string | false {
}
}
-async function isBinaryFile(file: Bun.BunFile): Promise<boolean> {
+async function isBinaryFile(filepath: string, file: Bun.BunFile): Promise<boolean> {
+ const ext = path.extname(filepath).toLowerCase()
+ // binary check for common non-text extensions
+ switch (ext) {
+ case ".zip":
+ case ".tar":
+ case ".gz":
+ case ".exe":
+ case ".dll":
+ case ".so":
+ case ".class":
+ case ".jar":
+ case ".war":
+ case ".7z":
+ case ".doc":
+ case ".docx":
+ case ".xls":
+ case ".xlsx":
+ case ".ppt":
+ case ".pptx":
+ case ".odt":
+ case ".ods":
+ case ".odp":
+ case ".bin":
+ case ".dat":
+ case ".obj":
+ case ".o":
+ case ".a":
+ case ".lib":
+ case ".wasm":
+ case ".pyc":
+ case ".pyo":
+ return true
+ default:
+ break
+ }
+
+ const stat = await file.stat()
+ const fileSize = stat.size
+ if (fileSize === 0) return false
+
+ const bufferSize = Math.min(4096, fileSize)
const buffer = await file.arrayBuffer()
- const bytes = new Uint8Array(buffer.slice(0, 512)) // Check first 512 bytes
+ if (buffer.byteLength === 0) return false
+ const bytes = new Uint8Array(buffer.slice(0, bufferSize))
+ let nonPrintableCount = 0
for (let i = 0; i < bytes.length; i++) {
- if (bytes[i] === 0) return true // Null byte indicates binary
+ if (bytes[i] === 0) return true
+ if (bytes[i] < 9 || (bytes[i] > 13 && bytes[i] < 32)) {
+ nonPrintableCount++
+ }
}
-
- return false
+ // If >30% non-printable characters, consider it binary
+ return nonPrintableCount / bytes.length > 0.3
}