summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorKit Langton <[email protected]>2026-04-03 21:05:23 -0400
committerGitHub <[email protected]>2026-04-04 01:05:23 +0000
commit288eb044cb6f222d06d8e75dc51c9d59c40f86ba (patch)
treeda5aaa5dfdd1122df57d34646da9c25b48d1db37
parent59ca4543d80148122127666c22a47e0b24180549 (diff)
downloadopencode-288eb044cb6f222d06d8e75dc51c9d59c40f86ba.tar.gz
opencode-288eb044cb6f222d06d8e75dc51c9d59c40f86ba.zip
perf(opencode): batch snapshot diffFull blob reads (#20752)
Co-authored-by: Nate Williams <[email protected]>
-rw-r--r--packages/opencode/src/snapshot/index.ts201
-rw-r--r--packages/opencode/test/snapshot/snapshot.test.ts92
2 files changed, 270 insertions, 23 deletions
diff --git a/packages/opencode/src/snapshot/index.ts b/packages/opencode/src/snapshot/index.ts
index 7c952bc54..2db67695f 100644
--- a/packages/opencode/src/snapshot/index.ts
+++ b/packages/opencode/src/snapshot/index.ts
@@ -437,6 +437,146 @@ export namespace Snapshot {
const diffFull = Effect.fnUntraced(function* (from: string, to: string) {
return yield* locked(
Effect.gen(function* () {
+ type Row = {
+ file: string
+ status: "added" | "deleted" | "modified"
+ binary: boolean
+ additions: number
+ deletions: number
+ }
+
+ type Ref = {
+ file: string
+ side: "before" | "after"
+ ref: string
+ }
+
+ const show = Effect.fnUntraced(function* (row: Row) {
+ if (row.binary) return ["", ""]
+ if (row.status === "added") {
+ return [
+ "",
+ yield* git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(
+ Effect.map((item) => item.text),
+ ),
+ ]
+ }
+ if (row.status === "deleted") {
+ return [
+ yield* git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(
+ Effect.map((item) => item.text),
+ ),
+ "",
+ ]
+ }
+ return yield* Effect.all(
+ [
+ git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
+ git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
+ ],
+ { concurrency: 2 },
+ )
+ })
+
+ const load = Effect.fnUntraced(
+ function* (rows: Row[]) {
+ const refs = rows.flatMap((row) => {
+ if (row.binary) return []
+ if (row.status === "added")
+ return [{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref]
+ if (row.status === "deleted") {
+ return [{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref]
+ }
+ return [
+ { file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref,
+ { file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref,
+ ]
+ })
+ if (!refs.length) return new Map<string, { before: string; after: string }>()
+
+ const proc = ChildProcess.make("git", [...cfg, ...args(["cat-file", "--batch"])], {
+ cwd: state.directory,
+ extendEnv: true,
+ stdin: Stream.make(new TextEncoder().encode(refs.map((item) => item.ref).join("\n") + "\n")),
+ })
+ const handle = yield* spawner.spawn(proc)
+ const [out, err] = yield* Effect.all(
+ [Stream.mkUint8Array(handle.stdout), Stream.mkString(Stream.decodeText(handle.stderr))],
+ { concurrency: 2 },
+ )
+ const code = yield* handle.exitCode
+ if (code !== 0) {
+ log.info("git cat-file --batch failed during snapshot diff, falling back to per-file git show", {
+ stderr: err,
+ refs: refs.length,
+ })
+ return
+ }
+
+ const fail = (msg: string, extra?: Record<string, string>) => {
+ log.info(msg, { ...extra, refs: refs.length })
+ return undefined
+ }
+
+ const map = new Map<string, { before: string; after: string }>()
+ const dec = new TextDecoder()
+ let i = 0
+ // Parse the default `git cat-file --batch` stream: one header line,
+ // then exactly `size` bytes of blob content, then a trailing newline.
+ for (const ref of refs) {
+ let end = i
+ while (end < out.length && out[end] !== 10) end += 1
+ if (end >= out.length) {
+ return fail(
+ "git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show",
+ )
+ }
+
+ const head = dec.decode(out.slice(i, end))
+ i = end + 1
+ const hit = map.get(ref.file) ?? { before: "", after: "" }
+ if (head.endsWith(" missing")) {
+ map.set(ref.file, hit)
+ continue
+ }
+
+ const match = head.match(/^[0-9a-f]+ blob (\d+)$/)
+ if (!match) {
+ return fail(
+ "git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show",
+ { head },
+ )
+ }
+
+ const size = Number(match[1])
+ if (!Number.isInteger(size) || size < 0 || i + size >= out.length || out[i + size] !== 10) {
+ return fail(
+ "git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show",
+ { head },
+ )
+ }
+
+ const text = dec.decode(out.slice(i, i + size))
+ if (ref.side === "before") hit.before = text
+ if (ref.side === "after") hit.after = text
+ map.set(ref.file, hit)
+ i += size + 1
+ }
+
+ if (i !== out.length) {
+ return fail(
+ "git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show",
+ )
+ }
+
+ return map
+ },
+ Effect.scoped,
+ Effect.catch(() =>
+ Effect.succeed<Map<string, { before: string; after: string }> | undefined>(undefined),
+ ),
+ )
+
const result: Snapshot.FileDiff[] = []
const status = new Map<string, "added" | "deleted" | "modified">()
@@ -459,30 +599,45 @@ export namespace Snapshot {
},
)
- for (const line of numstat.text.trim().split("\n")) {
- if (!line) continue
- const [adds, dels, file] = line.split("\t")
- if (!file) continue
- const binary = adds === "-" && dels === "-"
- const [before, after] = binary
- ? ["", ""]
- : yield* Effect.all(
- [
- git([...cfg, ...args(["show", `${from}:${file}`])]).pipe(Effect.map((item) => item.text)),
- git([...cfg, ...args(["show", `${to}:${file}`])]).pipe(Effect.map((item) => item.text)),
- ],
- { concurrency: 2 },
- )
- const additions = binary ? 0 : parseInt(adds)
- const deletions = binary ? 0 : parseInt(dels)
- result.push({
- file,
- before,
- after,
- additions: Number.isFinite(additions) ? additions : 0,
- deletions: Number.isFinite(deletions) ? deletions : 0,
- status: status.get(file) ?? "modified",
+ const rows = numstat.text
+ .trim()
+ .split("\n")
+ .filter(Boolean)
+ .flatMap((line) => {
+ const [adds, dels, file] = line.split("\t")
+ if (!file) return []
+ const binary = adds === "-" && dels === "-"
+ const additions = binary ? 0 : parseInt(adds)
+ const deletions = binary ? 0 : parseInt(dels)
+ return [
+ {
+ file,
+ status: status.get(file) ?? "modified",
+ binary,
+ additions: Number.isFinite(additions) ? additions : 0,
+ deletions: Number.isFinite(deletions) ? deletions : 0,
+ } satisfies Row,
+ ]
})
+ const step = 100
+
+ // Keep batches bounded so a large diff does not buffer every blob at once.
+ for (let i = 0; i < rows.length; i += step) {
+ const run = rows.slice(i, i + step)
+ const text = yield* load(run)
+
+ for (const row of run) {
+ const hit = text?.get(row.file) ?? { before: "", after: "" }
+ const [before, after] = row.binary ? ["", ""] : text ? [hit.before, hit.after] : yield* show(row)
+ result.push({
+ file: row.file,
+ before,
+ after,
+ additions: row.additions,
+ deletions: row.deletions,
+ status: row.status,
+ })
+ }
}
return result
diff --git a/packages/opencode/test/snapshot/snapshot.test.ts b/packages/opencode/test/snapshot/snapshot.test.ts
index 8dc80721d..0cd9366a5 100644
--- a/packages/opencode/test/snapshot/snapshot.test.ts
+++ b/packages/opencode/test/snapshot/snapshot.test.ts
@@ -982,6 +982,98 @@ test("diffFull with new file additions", async () => {
})
})
+test("diffFull with a large interleaved mixed diff", async () => {
+ await using tmp = await bootstrap()
+ await Instance.provide({
+ directory: tmp.path,
+ fn: async () => {
+ const ids = Array.from({ length: 60 }, (_, i) => i.toString().padStart(3, "0"))
+ const mod = ids.map((id) => fwd(tmp.path, "mix", `${id}-mod.txt`))
+ const del = ids.map((id) => fwd(tmp.path, "mix", `${id}-del.txt`))
+ const add = ids.map((id) => fwd(tmp.path, "mix", `${id}-add.txt`))
+ const bin = ids.map((id) => fwd(tmp.path, "mix", `${id}-bin.bin`))
+
+ await $`mkdir -p ${tmp.path}/mix`.quiet()
+ await Promise.all([
+ ...mod.map((file, i) => Filesystem.write(file, `before-${ids[i]}-é\n🙂\nline`)),
+ ...del.map((file, i) => Filesystem.write(file, `gone-${ids[i]}\n你好`)),
+ ...bin.map((file, i) => Filesystem.write(file, new Uint8Array([0, i, 255, i % 251]))),
+ ])
+
+ const before = await Snapshot.track()
+ expect(before).toBeTruthy()
+
+ await Promise.all([
+ ...mod.map((file, i) => Filesystem.write(file, `after-${ids[i]}-é\n🚀\nline`)),
+ ...add.map((file, i) => Filesystem.write(file, `new-${ids[i]}\nこんにちは`)),
+ ...bin.map((file, i) => Filesystem.write(file, new Uint8Array([9, i, 8, i % 251]))),
+ ...del.map((file) => fs.rm(file)),
+ ])
+
+ const after = await Snapshot.track()
+ expect(after).toBeTruthy()
+
+ const diffs = await Snapshot.diffFull(before!, after!)
+ expect(diffs).toHaveLength(ids.length * 4)
+
+ const map = new Map(diffs.map((item) => [item.file, item]))
+ for (let i = 0; i < ids.length; i++) {
+ const m = map.get(fwd("mix", `${ids[i]}-mod.txt`))
+ expect(m).toBeDefined()
+ expect(m!.before).toBe(`before-${ids[i]}-é\n🙂\nline`)
+ expect(m!.after).toBe(`after-${ids[i]}-é\n🚀\nline`)
+ expect(m!.status).toBe("modified")
+
+ const d = map.get(fwd("mix", `${ids[i]}-del.txt`))
+ expect(d).toBeDefined()
+ expect(d!.before).toBe(`gone-${ids[i]}\n你好`)
+ expect(d!.after).toBe("")
+ expect(d!.status).toBe("deleted")
+
+ const a = map.get(fwd("mix", `${ids[i]}-add.txt`))
+ expect(a).toBeDefined()
+ expect(a!.before).toBe("")
+ expect(a!.after).toBe(`new-${ids[i]}\nこんにちは`)
+ expect(a!.status).toBe("added")
+
+ const b = map.get(fwd("mix", `${ids[i]}-bin.bin`))
+ expect(b).toBeDefined()
+ expect(b!.before).toBe("")
+ expect(b!.after).toBe("")
+ expect(b!.additions).toBe(0)
+ expect(b!.deletions).toBe(0)
+ expect(b!.status).toBe("modified")
+ }
+ },
+ })
+})
+
+test("diffFull preserves git diff order across batch boundaries", async () => {
+ await using tmp = await bootstrap()
+ await Instance.provide({
+ directory: tmp.path,
+ fn: async () => {
+ const ids = Array.from({ length: 140 }, (_, i) => i.toString().padStart(3, "0"))
+
+ await $`mkdir -p ${tmp.path}/order`.quiet()
+ await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `before-${id}`)))
+
+ const before = await Snapshot.track()
+ expect(before).toBeTruthy()
+
+ await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `after-${id}`)))
+
+ const after = await Snapshot.track()
+ expect(after).toBeTruthy()
+
+ const expected = ids.map((id) => `order/${id}.txt`)
+
+ const diffs = await Snapshot.diffFull(before!, after!)
+ expect(diffs.map((item) => item.file)).toEqual(expected)
+ },
+ })
+})
+
test("diffFull with file modifications", async () => {
await using tmp = await bootstrap()
await Instance.provide({