import { readFile } from "node:fs/promises"; import { z } from "zod"; import type { ToolDefinition } from "../types/index.js"; import { canonicalize } from "./path-utils.js"; import { MAX_LINES, SPILL_ROOT } from "./truncate.js"; // Per-line truncation: any single line longer than MAX_LINE_CHARS is cut and // replaced with a marker indicating the total length. This protects against // minified files / base64 blobs / massive single-line JSON. The AI can use // `read_file_slice` to inspect a specific char range within a long line. const MAX_LINE_CHARS = 2000; // Aligned with the universal truncator's MAX_LINES so a default-args read // returns a response that fits under the truncator's line ceiling. Without // this alignment, every default read of a >500-line file got returned by // the tool and then immediately spilled by the truncator — wasted work. // Char-dense files can still spill via MAX_CHARS, but that's content- // dependent rather than guaranteed. const DEFAULT_LIMIT = MAX_LINES; // Hard cap on lines per request even if `limit` is larger or omitted. // Prevents a `read_file(huge.log)` with no params from returning a million // lines. The universal truncator at the agent level will spill anything // over its own threshold, but this is a tighter first line of defense // scoped to the read tool itself. const HARD_LIMIT = 5000; export function createReadFileTool(workingDirectory: string): ToolDefinition { return { name: "read_file", description: "Read a file relative to the working directory. Returns up to `limit` lines starting at line `offset` (1-indexed). Lines longer than 2000 chars are truncated mid-line with a marker showing the total length — use the `read_file_slice` tool to read a specific char range within a long line. If the response is still too large, the dispatch tool-output truncator may spill the full content to /tmp/dispatch/tool-results/.", parameters: z.object({ path: z.string().describe("Path to the file, relative to the working directory"), offset: z .number() .int() .min(1) .optional() .describe("1-indexed start line. Default: 1 (start of file)."), limit: z .number() .int() .min(1) .optional() .describe( `Max lines to return. Default: ${DEFAULT_LIMIT}. Hard cap: ${HARD_LIMIT}. Use a small limit when exploring a large file.`, ), }), execute: async (args: Record): Promise => { const filePath = args.path as string; const offset = typeof args.offset === "number" ? Math.max(1, Math.floor(args.offset)) : 1; const requestedLimit = typeof args.limit === "number" ? Math.max(1, Math.floor(args.limit)) : DEFAULT_LIMIT; const limit = Math.min(requestedLimit, HARD_LIMIT); // Canonicalize all three so symlink-in-workdir escapes are detected: // a workdir-relative path that resolves through symlinks to /etc must // fail the containment check below. const absolutePath = await canonicalize(workingDirectory, filePath); const absoluteWorkDir = await canonicalize(workingDirectory); const absoluteSpillRoot = await canonicalize(SPILL_ROOT); const isUnderWorkdir = absolutePath === absoluteWorkDir || absolutePath.startsWith(`${absoluteWorkDir}/`); const isSpillFile = absolutePath === absoluteSpillRoot || absolutePath.startsWith(`${absoluteSpillRoot}/`); if (!isUnderWorkdir && !isSpillFile) { return `Error: Path "${filePath}" is outside the working directory.`; } let raw: string; try { raw = await readFile(absolutePath, "utf8"); } catch (err) { const code = (err as NodeJS.ErrnoException).code; if (code === "ENOENT") { return `Error: File "${filePath}" not found.`; } return `Error reading file: ${err instanceof Error ? err.message : String(err)}`; } // A truly empty file (0 bytes) would otherwise slip through the // line-counting below: `"".split("\n")` is `[""]` and there's no // trailing newline, yielding a spurious `totalLines === 1`. if (raw === "") { return `(empty file: ${filePath})`; } const allLines = raw.split("\n"); // `split("\n")` produces an extra empty entry when the file ends with a // newline. The total line count we report to the caller should match // the human-visible line count (lines that have content or terminate // with \n). const trailingNewline = raw.endsWith("\n"); const totalLines = trailingNewline ? allLines.length - 1 : allLines.length; if (totalLines === 0) { return `(empty file: ${filePath})`; } if (offset > totalLines) { return `Error: offset ${offset} exceeds file length (${totalLines} lines).`; } const startIdx = offset - 1; // 0-indexed const endIdx = Math.min(startIdx + limit, totalLines); const slice = allLines.slice(startIdx, endIdx); // Apply per-line truncation. We tag truncated lines with the line // number and total chars so the AI knows how to call read_file_slice. const rendered: string[] = []; for (let i = 0; i < slice.length; i++) { const lineNumber = startIdx + i + 1; const line = slice[i] ?? ""; if (line.length > MAX_LINE_CHARS) { const visible = line.slice(0, MAX_LINE_CHARS); rendered.push( `${visible}...[line ${lineNumber} truncated, total ${line.length.toLocaleString()} chars; use read_file_slice with path="${filePath}" line=${lineNumber} to read more]`, ); } else { rendered.push(line); } } const header = `[file: ${filePath} — lines ${offset}-${endIdx} of ${totalLines}]`; return `${header}\n${rendered.join("\n")}`; }, }; }