fix(lsp): prevent server crash from malformed LSP messages

Two bugs caused the dispatch server to crash (15 times since Jun 24) when chat cc6c edited packages/transport-http/src/app.ts — a 40KB file with 23 multi-byte UTF-8 lines. The edit_file diagnostics hook sends the file to tsserver, which sends back a large publishDiagnostics response. When the response was split across stdout chunks at a multi-byte character boundary, the server crashed. Layer 1 — rpc.ts handleMessage: JSON.parse had no try/catch. A corrupted message threw an unhandled SyntaxError → unhandled rejection → process exit. Wrapped in try/catch; malformed messages are now skipped. Also hardened client.ts handleBytes: the async handleMessage Promise was fire-and-forget. Added .catch(() => {}) as defence-in-depth so no rejection from the RPC layer can ever crash the server. Layer 2 — framing.ts FrameDecoder: used a string buffer with new TextDecoder().decode(chunk) (no { stream: true }), corrupting multi-byte characters split across chunks. Worse, Content-Length counts bytes but the buffer was sliced by character count — for multi-byte content byte length ≠ char length, so the decoder extracted the wrong slice as a message. Rewrote to use a Uint8Array byte buffer: header separator search is byte-level, Content-Length comparison is byte-level, and the body is decoded only after all bytes are confirmed present. Tests: 5 new multi-byte framing tests (split at char boundary, byte-vs-char Content-Length, two messages in one chunk, three-way split) + 1 rpc test (malformed JSON does not throw). All 1545 tests pass.
author: Adam Malczewski <[email protected]> 2026-06-25 07:24:47 +0900
committer: Adam Malczewski <[email protected]> 2026-06-25 07:24:47 +0900
commit: 4bc062c21a830dd58535252fd24ddb392d262c79 (patch)
tree: c8fa16b63660f466fefdf0c957f2cd734cd42519
parent: 1b2a13e29e98da04d55c061c2dcadb8c36d783cd (diff)
download: dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.tar.gz
dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.zip
5 files changed, 161 insertions, 17 deletions
diff --git a/packages/lsp/src/client.ts b/packages/lsp/src/client.ts
index 743fcb4..677a22a 100644
--- a/packages/lsp/src/client.ts
+++ b/packages/lsp/src/client.ts
@@ -175,7 +175,11 @@ export class LanguageServerClient {
 	private handleBytes(chunk: Uint8Array): void {
 		const messages = this.decoder.decode(chunk);
 		for (const msg of messages) {
-			this.rpc?.handleMessage(msg);
+			// handleMessage is async — catch rejections so a malformed
+			// message never becomes an unhandled rejection that crashes
+			// the server. (handleMessage also has its own try/catch around
+			// JSON.parse, but this is the defence-in-depth boundary.)
+			void this.rpc?.handleMessage(msg).catch(() => {});
 		}
 	}
 
diff --git a/packages/lsp/src/framing.test.ts b/packages/lsp/src/framing.test.ts
index 7c51a16..721665c 100644
--- a/packages/lsp/src/framing.test.ts
+++ b/packages/lsp/src/framing.test.ts
@@ -44,3 +44,99 @@ describe("framing", () => {
 		expect(messages[1]).toBe(msg2);
 	});
 });
+
+describe("multi-byte UTF-8", () => {
+	it("round-trips a message with multi-byte characters", () => {
+		const msg = JSON.stringify({
+			jsonrpc: "2.0",
+			method: "textDocument/publishDiagnostics",
+			params: { message: "Type '漢字' is not assignable to type 'number'. 🚫" },
+		});
+		const encoded = encode(msg);
+		const decoder = new FrameDecoder();
+		const messages = decoder.decode(encoded);
+		expect(messages).toHaveLength(1);
+		expect(messages[0]).toBe(msg);
+	});
+
+	it("reassembles a multi-byte message split at a character boundary", () => {
+		// A message whose JSON body contains 3-byte UTF-8 characters (漢字).
+		// We split the encoded frame so the boundary falls INSIDE a multi-byte
+		// sequence — the old string-based decoder would corrupt this.
+		const msg = JSON.stringify({
+			jsonrpc: "2.0",
+			method: "test",
+			params: { text: "漢字テスト" },
+		});
+		const encoded = encode(msg);
+
+		// Find a split point inside the body (skip the ASCII header).
+		const headerEnd = encoded.indexOf(0x0d, 0); // first \r
+		const bodyStart = headerEnd + 4; // skip \r\n\r\n
+		// Split in the middle of the body — likely inside a multi-byte char.
+		const splitPoint = bodyStart + Math.floor((encoded.length - bodyStart) / 2);
+		const chunk1 = encoded.slice(0, splitPoint);
+		const chunk2 = encoded.slice(splitPoint);
+
+		const decoder = new FrameDecoder();
+		expect(decoder.decode(chunk1)).toHaveLength(0); // incomplete
+		const result = decoder.decode(chunk2);
+		expect(result).toHaveLength(1);
+		expect(result[0]).toBe(msg);
+	});
+
+	it("handles Content-Length in bytes (not characters)", () => {
+		// Content-Length counts bytes. For multi-byte content, byte length
+		// > character length. The decoder must slice by bytes, not chars.
+		const unicode = "🎉分段測試";
+		const msg = JSON.stringify({ jsonrpc: "2.0", method: "test", params: { text: unicode } });
+		const encoded = encode(msg);
+
+		// Verify the Content-Length header matches the byte length of the body.
+		const headerStr = new TextDecoder().decode(encoded.slice(0, encoded.indexOf(0x0d)));
+		const contentLengthMatch = /Content-Length:\s*(\d+)/i.exec(headerStr);
+		expect(contentLengthMatch).not.toBeNull();
+		const declaredLength = Number.parseInt(contentLengthMatch?.[1], 10);
+		const bodyBytes = new TextEncoder().encode(msg);
+		expect(declaredLength).toBe(bodyBytes.length);
+
+		const decoder = new FrameDecoder();
+		const messages = decoder.decode(encoded);
+		expect(messages).toHaveLength(1);
+		expect(messages[0]).toBe(msg);
+	});
+
+	it("reassembles two multi-byte messages from one chunk", () => {
+		const msg1 = JSON.stringify({ jsonrpc: "2.0", method: "a", params: { t: "日本語" } });
+		const msg2 = JSON.stringify({ jsonrpc: "2.0", method: "b", params: { t: "한국어" } });
+		const encoded1 = encode(msg1);
+		const encoded2 = encode(msg2);
+
+		const combined = new Uint8Array(encoded1.length + encoded2.length);
+		combined.set(encoded1);
+		combined.set(encoded2, encoded1.length);
+
+		const decoder = new FrameDecoder();
+		const messages = decoder.decode(combined);
+		expect(messages).toHaveLength(2);
+		expect(messages[0]).toBe(msg1);
+		expect(messages[1]).toBe(msg2);
+	});
+
+	it("reassembles a multi-byte message split across three chunks", () => {
+		const msg = JSON.stringify({
+			jsonrpc: "2.0",
+			method: "test",
+			params: { text: "𝕳𝖊𝖑𝖑𝖔, 世界! Привет! 🌍" },
+		});
+		const encoded = encode(msg);
+
+		const third = Math.floor(encoded.length / 3);
+		const decoder = new FrameDecoder();
+		expect(decoder.decode(encoded.slice(0, third))).toHaveLength(0);
+		expect(decoder.decode(encoded.slice(third, third * 2))).toHaveLength(0);
+		const result = decoder.decode(encoded.slice(third * 2));
+		expect(result).toHaveLength(1);
+		expect(result[0]).toBe(msg);
+	});
+});
diff --git a/packages/lsp/src/framing.ts b/packages/lsp/src/framing.ts
index 3a8ab3a..88e60c1 100644
--- a/packages/lsp/src/framing.ts
+++ b/packages/lsp/src/framing.ts
@@ -5,10 +5,15 @@
  * `encode` wraps a JSON message with headers; `FrameDecoder` reassembles
  * complete messages from streaming byte chunks (handles partial frames and
  * multiple frames per chunk).
+ *
+ * The buffer is a `Uint8Array` (not a string) because `Content-Length` counts
+ * **bytes** — slicing by JavaScript character count corrupts messages whose
+ * JSON body contains multi-byte UTF-8 characters (byte length ≠ char length).
  */
 
-const HEADER_SEP = "\r\n\r\n";
-const CONTENT_LENGTH_RE = /^Content-Length:\s*(\d+)/i;
+const CR = 0x0d; // \r
+const LF = 0x0a; // \n
+const CONTENT_LENGTH_RE = /Content-Length:\s*(\d+)/i;
 
 export function encode(msg: string): Uint8Array {
 	const body = new TextEncoder().encode(msg);
@@ -20,43 +25,65 @@ export function encode(msg: string): Uint8Array {
 	return result;
 }
 
+/**
+ * Find the first occurrence of the 4-byte sequence \r\n\r\n in `buf`,
+ * starting at offset `from`. Returns the index of the first byte, or -1.
+ */
+function findHeaderSep(buf: Uint8Array, from: number): number {
+	const limit = buf.length - 3;
+	for (let i = from; i < limit; i++) {
+		if (buf[i] === CR && buf[i + 1] === LF && buf[i + 2] === CR && buf[i + 3] === LF) {
+			return i;
+		}
+	}
+	return -1;
+}
+
 export class FrameDecoder {
-	private buffer = "";
+	private buffer: Uint8Array = new Uint8Array(0);
 	private expectedLength: number | null = null;
-	private headerEnd = -1;
+	private headerEndByte = -1;
 
 	/**
 	 * Feed raw bytes into the decoder. Returns all complete JSON messages
 	 * that can be extracted from the accumulated buffer.
 	 */
 	decode(chunk: Uint8Array): string[] {
-		this.buffer += new TextDecoder().decode(chunk);
+		// Append the new chunk to the internal byte buffer.
+		const newBuf = new Uint8Array(this.buffer.length + chunk.length);
+		newBuf.set(this.buffer);
+		newBuf.set(chunk, this.buffer.length);
+		this.buffer = newBuf;
+
 		const messages: string[] = [];
 
 		while (true) {
 			if (this.expectedLength === null) {
-				const headerEnd = this.buffer.indexOf(HEADER_SEP);
-				if (headerEnd === -1) break;
+				const sepIdx = findHeaderSep(this.buffer, 0);
+				if (sepIdx === -1) break;
 
-				const headerPart = this.buffer.slice(0, headerEnd);
-				const match = CONTENT_LENGTH_RE.exec(headerPart);
+				// Decode only the header bytes (always ASCII) to read Content-Length.
+				const headerStr = new TextDecoder().decode(this.buffer.slice(0, sepIdx));
+				const match = CONTENT_LENGTH_RE.exec(headerStr);
 				if (!match?.[1]) {
-					this.buffer = this.buffer.slice(headerEnd + HEADER_SEP.length);
+					// Not a Content-Length header — skip past this separator and retry.
+					this.buffer = this.buffer.slice(sepIdx + 4);
 					continue;
 				}
 				this.expectedLength = Number.parseInt(match[1], 10);
-				this.headerEnd = headerEnd;
+				this.headerEndByte = sepIdx + 4; // skip \r\n\r\n
 			}
 
-			const bodyStart = this.headerEnd + HEADER_SEP.length;
+			const bodyStart = this.headerEndByte;
 			const available = this.buffer.length - bodyStart;
 
 			if (available >= this.expectedLength) {
-				const body = this.buffer.slice(bodyStart, bodyStart + this.expectedLength);
-				messages.push(body);
+				// Extract exactly `expectedLength` bytes (Content-Length is in bytes).
+				const bodyBytes = this.buffer.slice(bodyStart, bodyStart + this.expectedLength);
+				messages.push(new TextDecoder().decode(bodyBytes));
 				this.buffer = this.buffer.slice(bodyStart + this.expectedLength);
 				this.expectedLength = null;
-				this.headerEnd = -1;
+				this.headerEndByte = -1;
 			} else {
 				break;
 			}
diff --git a/packages/lsp/src/rpc.test.ts b/packages/lsp/src/rpc.test.ts
index a03870f..05ce924 100644
--- a/packages/lsp/src/rpc.test.ts
+++ b/packages/lsp/src/rpc.test.ts
@@ -76,3 +76,11 @@ describe("rpc", () => {
 		expect(response.result).toEqual([{ setting: true }]);
 	});
 });
+
+it("handleMessage does not throw on malformed JSON", async () => {
+	const { conn } = makeConnection();
+	// A corrupted/truncated LSP message — must not throw or reject.
+	await expect(conn.handleMessage("{ broken json")).resolves.toBeUndefined();
+	await expect(conn.handleMessage("")).resolves.toBeUndefined();
+	await expect(conn.handleMessage("not json at all")).resolves.toBeUndefined();
+});
diff --git a/packages/lsp/src/rpc.ts b/packages/lsp/src/rpc.ts
index 45adf42..6b82624 100644
--- a/packages/lsp/src/rpc.ts
+++ b/packages/lsp/src/rpc.ts
@@ -62,7 +62,16 @@ export class JsonRpcConnection {
 	}
 
 	async handleMessage(json: string): Promise<void> {
-		const msg = JSON.parse(json) as JsonRpcMessage;
+		let msg: JsonRpcMessage;
+		try {
+			msg = JSON.parse(json) as JsonRpcMessage;
+		} catch {
+			// A malformed LSP message must never crash the server. The most
+			// common cause is a multi-byte UTF-8 character split across stdout
+			// chunks (see FrameDecoder). Log and skip — the language server
+			// will re-send diagnostics on the next file change.
+			return;
+		}
 		const { id, method } = msg;
 
 		if (id !== undefined && method !== undefined) {
author	Adam Malczewski <[email protected]>	2026-06-25 07:24:47 +0900
committer	Adam Malczewski <[email protected]>	2026-06-25 07:24:47 +0900
commit	4bc062c21a830dd58535252fd24ddb392d262c79 (patch)
tree	c8fa16b63660f466fefdf0c957f2cd734cd42519
parent	1b2a13e29e98da04d55c061c2dcadb8c36d783cd (diff)
download	dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.tar.gz dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.zip