packages/core/src/tools/read-file.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

import { readFile } from "node:fs/promises";
import { z } from "zod";
import type { ToolDefinition } from "../types/index.js";
import { canonicalize } from "./path-utils.js";
import { MAX_LINES, SPILL_ROOT } from "./truncate.js";

// Per-line truncation: any single line longer than MAX_LINE_CHARS is cut and
// replaced with a marker indicating the total length. This protects against
// minified files / base64 blobs / massive single-line JSON. The AI can use
// `read_file_slice` to inspect a specific char range within a long line.
const MAX_LINE_CHARS = 2000;

// Aligned with the universal truncator's MAX_LINES so a default-args read
// returns a response that fits under the truncator's line ceiling. Without
// this alignment, every default read of a >500-line file got returned by
// the tool and then immediately spilled by the truncator — wasted work.
// Char-dense files can still spill via MAX_CHARS, but that's content-
// dependent rather than guaranteed.
const DEFAULT_LIMIT = MAX_LINES;
// Hard cap on lines per request even if `limit` is larger or omitted.
// Prevents a `read_file(huge.log)` with no params from returning a million
// lines. The universal truncator at the agent level will spill anything
// over its own threshold, but this is a tighter first line of defense
// scoped to the read tool itself.
const HARD_LIMIT = 5000;

export function createReadFileTool(workingDirectory: string): ToolDefinition {
	return {
		name: "read_file",
		description:
			"Read a file relative to the working directory. Returns up to `limit` lines starting at line `offset` (1-indexed). Lines longer than 2000 chars are truncated mid-line with a marker showing the total length — use the `read_file_slice` tool to read a specific char range within a long line. If the response is still too large, the dispatch tool-output truncator may spill the full content to /tmp/dispatch/tool-results/.",
		parameters: z.object({
			path: z.string().describe("Path to the file, relative to the working directory"),
			offset: z
				.number()
				.int()
				.min(1)
				.optional()
				.describe("1-indexed start line. Default: 1 (start of file)."),
			limit: z
				.number()
				.int()
				.min(1)
				.optional()
				.describe(
					`Max lines to return. Default: ${DEFAULT_LIMIT}. Hard cap: ${HARD_LIMIT}. Use a small limit when exploring a large file.`,
				),
		}),
		execute: async (args: Record<string, unknown>): Promise<string> => {
			const filePath = args.path as string;
			const offset = typeof args.offset === "number" ? Math.max(1, Math.floor(args.offset)) : 1;
			const requestedLimit =
				typeof args.limit === "number" ? Math.max(1, Math.floor(args.limit)) : DEFAULT_LIMIT;
			const limit = Math.min(requestedLimit, HARD_LIMIT);

			// Canonicalize all three so symlink-in-workdir escapes are detected:
			// a workdir-relative path that resolves through symlinks to /etc must
			// fail the containment check below.
			const absolutePath = await canonicalize(workingDirectory, filePath);
			const absoluteWorkDir = await canonicalize(workingDirectory);
			const absoluteSpillRoot = await canonicalize(SPILL_ROOT);
			const isUnderWorkdir =
				absolutePath === absoluteWorkDir || absolutePath.startsWith(`${absoluteWorkDir}/`);
			const isSpillFile =
				absolutePath === absoluteSpillRoot || absolutePath.startsWith(`${absoluteSpillRoot}/`);

			if (!isUnderWorkdir && !isSpillFile) {
				return `Error: Path "${filePath}" is outside the working directory.`;
			}

			let raw: string;
			try {
				raw = await readFile(absolutePath, "utf8");
			} catch (err) {
				const code = (err as NodeJS.ErrnoException).code;
				if (code === "ENOENT") {
					return `Error: File "${filePath}" not found.`;
				}
				return `Error reading file: ${err instanceof Error ? err.message : String(err)}`;
			}

			// A truly empty file (0 bytes) would otherwise slip through the
			// line-counting below: `"".split("\n")` is `[""]` and there's no
			// trailing newline, yielding a spurious `totalLines === 1`.
			if (raw === "") {
				return `(empty file: ${filePath})`;
			}

			const allLines = raw.split("\n");
			// `split("\n")` produces an extra empty entry when the file ends with a
			// newline. The total line count we report to the caller should match
			// the human-visible line count (lines that have content or terminate
			// with \n).
			const trailingNewline = raw.endsWith("\n");
			const totalLines = trailingNewline ? allLines.length - 1 : allLines.length;

			if (totalLines === 0) {
				return `(empty file: ${filePath})`;
			}

			if (offset > totalLines) {
				return `Error: offset ${offset} exceeds file length (${totalLines} lines).`;
			}

			const startIdx = offset - 1; // 0-indexed
			const endIdx = Math.min(startIdx + limit, totalLines);
			const slice = allLines.slice(startIdx, endIdx);

			// Apply per-line truncation. We tag truncated lines with the line
			// number and total chars so the AI knows how to call read_file_slice.
			const rendered: string[] = [];
			for (let i = 0; i < slice.length; i++) {
				const lineNumber = startIdx + i + 1;
				const line = slice[i] ?? "";
				if (line.length > MAX_LINE_CHARS) {
					const visible = line.slice(0, MAX_LINE_CHARS);
					rendered.push(
						`${visible}...[line ${lineNumber} truncated, total ${line.length.toLocaleString()} chars; use read_file_slice with path="${filePath}" line=${lineNumber} to read more]`,
					);
				} else {
					rendered.push(line);
				}
			}

			const header = `[file: ${filePath} — lines ${offset}-${endIdx} of ${totalLines}]`;
			return `${header}\n${rendered.join("\n")}`;
		},
	};
}