1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
import { readFile } from "node:fs/promises";
import { z } from "zod";
import type { ToolDefinition } from "../types/index.js";
import { canonicalize } from "./path-utils.js";
import { MAX_LINES, SPILL_ROOT } from "./truncate.js";
// Per-line truncation: any single line longer than MAX_LINE_CHARS is cut and
// replaced with a marker indicating the total length. This protects against
// minified files / base64 blobs / massive single-line JSON. The AI can use
// `read_file_slice` to inspect a specific char range within a long line.
const MAX_LINE_CHARS = 2000;
// Aligned with the universal truncator's MAX_LINES so a default-args read
// returns a response that fits under the truncator's line ceiling. Without
// this alignment, every default read of a >500-line file got returned by
// the tool and then immediately spilled by the truncator — wasted work.
// Char-dense files can still spill via MAX_CHARS, but that's content-
// dependent rather than guaranteed.
const DEFAULT_LIMIT = MAX_LINES;
// Hard cap on lines per request even if `limit` is larger or omitted.
// Prevents a `read_file(huge.log)` with no params from returning a million
// lines. The universal truncator at the agent level will spill anything
// over its own threshold, but this is a tighter first line of defense
// scoped to the read tool itself.
const HARD_LIMIT = 5000;
export function createReadFileTool(workingDirectory: string): ToolDefinition {
return {
name: "read_file",
description:
"Read a file relative to the working directory. Returns up to `limit` lines starting at line `offset` (1-indexed). Lines longer than 2000 chars are truncated mid-line with a marker showing the total length — use the `read_file_slice` tool to read a specific char range within a long line. If the response is still too large, the dispatch tool-output truncator may spill the full content to /tmp/dispatch/tool-results/.",
parameters: z.object({
path: z.string().describe("Path to the file, relative to the working directory"),
offset: z
.number()
.int()
.min(1)
.optional()
.describe("1-indexed start line. Default: 1 (start of file)."),
limit: z
.number()
.int()
.min(1)
.optional()
.describe(
`Max lines to return. Default: ${DEFAULT_LIMIT}. Hard cap: ${HARD_LIMIT}. Use a small limit when exploring a large file.`,
),
}),
execute: async (args: Record<string, unknown>): Promise<string> => {
const filePath = args.path as string;
const offset = typeof args.offset === "number" ? Math.max(1, Math.floor(args.offset)) : 1;
const requestedLimit =
typeof args.limit === "number" ? Math.max(1, Math.floor(args.limit)) : DEFAULT_LIMIT;
const limit = Math.min(requestedLimit, HARD_LIMIT);
// Canonicalize all three so symlink-in-workdir escapes are detected:
// a workdir-relative path that resolves through symlinks to /etc must
// fail the containment check below.
const absolutePath = await canonicalize(workingDirectory, filePath);
const absoluteWorkDir = await canonicalize(workingDirectory);
const absoluteSpillRoot = await canonicalize(SPILL_ROOT);
const isUnderWorkdir =
absolutePath === absoluteWorkDir || absolutePath.startsWith(`${absoluteWorkDir}/`);
const isSpillFile =
absolutePath === absoluteSpillRoot || absolutePath.startsWith(`${absoluteSpillRoot}/`);
if (!isUnderWorkdir && !isSpillFile) {
return `Error: Path "${filePath}" is outside the working directory.`;
}
let raw: string;
try {
raw = await readFile(absolutePath, "utf8");
} catch (err) {
const code = (err as NodeJS.ErrnoException).code;
if (code === "ENOENT") {
return `Error: File "${filePath}" not found.`;
}
return `Error reading file: ${err instanceof Error ? err.message : String(err)}`;
}
// A truly empty file (0 bytes) would otherwise slip through the
// line-counting below: `"".split("\n")` is `[""]` and there's no
// trailing newline, yielding a spurious `totalLines === 1`.
if (raw === "") {
return `(empty file: ${filePath})`;
}
const allLines = raw.split("\n");
// `split("\n")` produces an extra empty entry when the file ends with a
// newline. The total line count we report to the caller should match
// the human-visible line count (lines that have content or terminate
// with \n).
const trailingNewline = raw.endsWith("\n");
const totalLines = trailingNewline ? allLines.length - 1 : allLines.length;
if (totalLines === 0) {
return `(empty file: ${filePath})`;
}
if (offset > totalLines) {
return `Error: offset ${offset} exceeds file length (${totalLines} lines).`;
}
const startIdx = offset - 1; // 0-indexed
const endIdx = Math.min(startIdx + limit, totalLines);
const slice = allLines.slice(startIdx, endIdx);
// Apply per-line truncation. We tag truncated lines with the line
// number and total chars so the AI knows how to call read_file_slice.
const rendered: string[] = [];
for (let i = 0; i < slice.length; i++) {
const lineNumber = startIdx + i + 1;
const line = slice[i] ?? "";
if (line.length > MAX_LINE_CHARS) {
const visible = line.slice(0, MAX_LINE_CHARS);
rendered.push(
`${visible}...[line ${lineNumber} truncated, total ${line.length.toLocaleString()} chars; use read_file_slice with path="${filePath}" line=${lineNumber} to read more]`,
);
} else {
rendered.push(line);
}
}
const header = `[file: ${filePath} — lines ${offset}-${endIdx} of ${totalLines}]`;
return `${header}\n${rendered.join("\n")}`;
},
};
}
|