summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAdam Malczewski <[email protected]>2026-06-25 07:24:47 +0900
committerAdam Malczewski <[email protected]>2026-06-25 07:24:47 +0900
commit4bc062c21a830dd58535252fd24ddb392d262c79 (patch)
treec8fa16b63660f466fefdf0c957f2cd734cd42519
parent1b2a13e29e98da04d55c061c2dcadb8c36d783cd (diff)
downloaddispatch-4bc062c21a830dd58535252fd24ddb392d262c79.tar.gz
dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.zip
fix(lsp): prevent server crash from malformed LSP messages
Two bugs caused the dispatch server to crash (15 times since Jun 24) when chat cc6c edited packages/transport-http/src/app.ts — a 40KB file with 23 multi-byte UTF-8 lines. The edit_file diagnostics hook sends the file to tsserver, which sends back a large publishDiagnostics response. When the response was split across stdout chunks at a multi-byte character boundary, the server crashed. Layer 1 — rpc.ts handleMessage: JSON.parse had no try/catch. A corrupted message threw an unhandled SyntaxError → unhandled rejection → process exit. Wrapped in try/catch; malformed messages are now skipped. Also hardened client.ts handleBytes: the async handleMessage Promise was fire-and-forget. Added .catch(() => {}) as defence-in-depth so no rejection from the RPC layer can ever crash the server. Layer 2 — framing.ts FrameDecoder: used a string buffer with new TextDecoder().decode(chunk) (no { stream: true }), corrupting multi-byte characters split across chunks. Worse, Content-Length counts bytes but the buffer was sliced by character count — for multi-byte content byte length ≠ char length, so the decoder extracted the wrong slice as a message. Rewrote to use a Uint8Array byte buffer: header separator search is byte-level, Content-Length comparison is byte-level, and the body is decoded only after all bytes are confirmed present. Tests: 5 new multi-byte framing tests (split at char boundary, byte-vs-char Content-Length, two messages in one chunk, three-way split) + 1 rpc test (malformed JSON does not throw). All 1545 tests pass.
-rw-r--r--packages/lsp/src/client.ts6
-rw-r--r--packages/lsp/src/framing.test.ts96
-rw-r--r--packages/lsp/src/framing.ts57
-rw-r--r--packages/lsp/src/rpc.test.ts8
-rw-r--r--packages/lsp/src/rpc.ts11
5 files changed, 161 insertions, 17 deletions
diff --git a/packages/lsp/src/client.ts b/packages/lsp/src/client.ts
index 743fcb4..677a22a 100644
--- a/packages/lsp/src/client.ts
+++ b/packages/lsp/src/client.ts
@@ -175,7 +175,11 @@ export class LanguageServerClient {
private handleBytes(chunk: Uint8Array): void {
const messages = this.decoder.decode(chunk);
for (const msg of messages) {
- this.rpc?.handleMessage(msg);
+ // handleMessage is async — catch rejections so a malformed
+ // message never becomes an unhandled rejection that crashes
+ // the server. (handleMessage also has its own try/catch around
+ // JSON.parse, but this is the defence-in-depth boundary.)
+ void this.rpc?.handleMessage(msg).catch(() => {});
}
}
diff --git a/packages/lsp/src/framing.test.ts b/packages/lsp/src/framing.test.ts
index 7c51a16..721665c 100644
--- a/packages/lsp/src/framing.test.ts
+++ b/packages/lsp/src/framing.test.ts
@@ -44,3 +44,99 @@ describe("framing", () => {
expect(messages[1]).toBe(msg2);
});
});
+
+describe("multi-byte UTF-8", () => {
+ it("round-trips a message with multi-byte characters", () => {
+ const msg = JSON.stringify({
+ jsonrpc: "2.0",
+ method: "textDocument/publishDiagnostics",
+ params: { message: "Type '漢字' is not assignable to type 'number'. 🚫" },
+ });
+ const encoded = encode(msg);
+ const decoder = new FrameDecoder();
+ const messages = decoder.decode(encoded);
+ expect(messages).toHaveLength(1);
+ expect(messages[0]).toBe(msg);
+ });
+
+ it("reassembles a multi-byte message split at a character boundary", () => {
+ // A message whose JSON body contains 3-byte UTF-8 characters (漢字).
+ // We split the encoded frame so the boundary falls INSIDE a multi-byte
+ // sequence — the old string-based decoder would corrupt this.
+ const msg = JSON.stringify({
+ jsonrpc: "2.0",
+ method: "test",
+ params: { text: "漢字テスト" },
+ });
+ const encoded = encode(msg);
+
+ // Find a split point inside the body (skip the ASCII header).
+ const headerEnd = encoded.indexOf(0x0d, 0); // first \r
+ const bodyStart = headerEnd + 4; // skip \r\n\r\n
+ // Split in the middle of the body — likely inside a multi-byte char.
+ const splitPoint = bodyStart + Math.floor((encoded.length - bodyStart) / 2);
+ const chunk1 = encoded.slice(0, splitPoint);
+ const chunk2 = encoded.slice(splitPoint);
+
+ const decoder = new FrameDecoder();
+ expect(decoder.decode(chunk1)).toHaveLength(0); // incomplete
+ const result = decoder.decode(chunk2);
+ expect(result).toHaveLength(1);
+ expect(result[0]).toBe(msg);
+ });
+
+ it("handles Content-Length in bytes (not characters)", () => {
+ // Content-Length counts bytes. For multi-byte content, byte length
+ // > character length. The decoder must slice by bytes, not chars.
+ const unicode = "🎉分段測試";
+ const msg = JSON.stringify({ jsonrpc: "2.0", method: "test", params: { text: unicode } });
+ const encoded = encode(msg);
+
+ // Verify the Content-Length header matches the byte length of the body.
+ const headerStr = new TextDecoder().decode(encoded.slice(0, encoded.indexOf(0x0d)));
+ const contentLengthMatch = /Content-Length:\s*(\d+)/i.exec(headerStr);
+ expect(contentLengthMatch).not.toBeNull();
+ const declaredLength = Number.parseInt(contentLengthMatch?.[1], 10);
+ const bodyBytes = new TextEncoder().encode(msg);
+ expect(declaredLength).toBe(bodyBytes.length);
+
+ const decoder = new FrameDecoder();
+ const messages = decoder.decode(encoded);
+ expect(messages).toHaveLength(1);
+ expect(messages[0]).toBe(msg);
+ });
+
+ it("reassembles two multi-byte messages from one chunk", () => {
+ const msg1 = JSON.stringify({ jsonrpc: "2.0", method: "a", params: { t: "日本語" } });
+ const msg2 = JSON.stringify({ jsonrpc: "2.0", method: "b", params: { t: "한국어" } });
+ const encoded1 = encode(msg1);
+ const encoded2 = encode(msg2);
+
+ const combined = new Uint8Array(encoded1.length + encoded2.length);
+ combined.set(encoded1);
+ combined.set(encoded2, encoded1.length);
+
+ const decoder = new FrameDecoder();
+ const messages = decoder.decode(combined);
+ expect(messages).toHaveLength(2);
+ expect(messages[0]).toBe(msg1);
+ expect(messages[1]).toBe(msg2);
+ });
+
+ it("reassembles a multi-byte message split across three chunks", () => {
+ const msg = JSON.stringify({
+ jsonrpc: "2.0",
+ method: "test",
+ params: { text: "𝕳𝖊𝖑𝖑𝖔, 世界! Привет! 🌍" },
+ });
+ const encoded = encode(msg);
+
+ const third = Math.floor(encoded.length / 3);
+ const decoder = new FrameDecoder();
+ expect(decoder.decode(encoded.slice(0, third))).toHaveLength(0);
+ expect(decoder.decode(encoded.slice(third, third * 2))).toHaveLength(0);
+ const result = decoder.decode(encoded.slice(third * 2));
+ expect(result).toHaveLength(1);
+ expect(result[0]).toBe(msg);
+ });
+});
diff --git a/packages/lsp/src/framing.ts b/packages/lsp/src/framing.ts
index 3a8ab3a..88e60c1 100644
--- a/packages/lsp/src/framing.ts
+++ b/packages/lsp/src/framing.ts
@@ -5,10 +5,15 @@
* `encode` wraps a JSON message with headers; `FrameDecoder` reassembles
* complete messages from streaming byte chunks (handles partial frames and
* multiple frames per chunk).
+ *
+ * The buffer is a `Uint8Array` (not a string) because `Content-Length` counts
+ * **bytes** — slicing by JavaScript character count corrupts messages whose
+ * JSON body contains multi-byte UTF-8 characters (byte length ≠ char length).
*/
-const HEADER_SEP = "\r\n\r\n";
-const CONTENT_LENGTH_RE = /^Content-Length:\s*(\d+)/i;
+const CR = 0x0d; // \r
+const LF = 0x0a; // \n
+const CONTENT_LENGTH_RE = /Content-Length:\s*(\d+)/i;
export function encode(msg: string): Uint8Array {
const body = new TextEncoder().encode(msg);
@@ -20,43 +25,65 @@ export function encode(msg: string): Uint8Array {
return result;
}
+/**
+ * Find the first occurrence of the 4-byte sequence \r\n\r\n in `buf`,
+ * starting at offset `from`. Returns the index of the first byte, or -1.
+ */
+function findHeaderSep(buf: Uint8Array, from: number): number {
+ const limit = buf.length - 3;
+ for (let i = from; i < limit; i++) {
+ if (buf[i] === CR && buf[i + 1] === LF && buf[i + 2] === CR && buf[i + 3] === LF) {
+ return i;
+ }
+ }
+ return -1;
+}
+
export class FrameDecoder {
- private buffer = "";
+ private buffer: Uint8Array = new Uint8Array(0);
private expectedLength: number | null = null;
- private headerEnd = -1;
+ private headerEndByte = -1;
/**
* Feed raw bytes into the decoder. Returns all complete JSON messages
* that can be extracted from the accumulated buffer.
*/
decode(chunk: Uint8Array): string[] {
- this.buffer += new TextDecoder().decode(chunk);
+ // Append the new chunk to the internal byte buffer.
+ const newBuf = new Uint8Array(this.buffer.length + chunk.length);
+ newBuf.set(this.buffer);
+ newBuf.set(chunk, this.buffer.length);
+ this.buffer = newBuf;
+
const messages: string[] = [];
while (true) {
if (this.expectedLength === null) {
- const headerEnd = this.buffer.indexOf(HEADER_SEP);
- if (headerEnd === -1) break;
+ const sepIdx = findHeaderSep(this.buffer, 0);
+ if (sepIdx === -1) break;
- const headerPart = this.buffer.slice(0, headerEnd);
- const match = CONTENT_LENGTH_RE.exec(headerPart);
+ // Decode only the header bytes (always ASCII) to read Content-Length.
+ const headerStr = new TextDecoder().decode(this.buffer.slice(0, sepIdx));
+ const match = CONTENT_LENGTH_RE.exec(headerStr);
if (!match?.[1]) {
- this.buffer = this.buffer.slice(headerEnd + HEADER_SEP.length);
+ // Not a Content-Length header — skip past this separator and retry.
+ this.buffer = this.buffer.slice(sepIdx + 4);
continue;
}
this.expectedLength = Number.parseInt(match[1], 10);
- this.headerEnd = headerEnd;
+ this.headerEndByte = sepIdx + 4; // skip \r\n\r\n
}
- const bodyStart = this.headerEnd + HEADER_SEP.length;
+ const bodyStart = this.headerEndByte;
const available = this.buffer.length - bodyStart;
if (available >= this.expectedLength) {
- const body = this.buffer.slice(bodyStart, bodyStart + this.expectedLength);
- messages.push(body);
+ // Extract exactly `expectedLength` bytes (Content-Length is in bytes).
+ const bodyBytes = this.buffer.slice(bodyStart, bodyStart + this.expectedLength);
+ messages.push(new TextDecoder().decode(bodyBytes));
this.buffer = this.buffer.slice(bodyStart + this.expectedLength);
this.expectedLength = null;
- this.headerEnd = -1;
+ this.headerEndByte = -1;
} else {
break;
}
diff --git a/packages/lsp/src/rpc.test.ts b/packages/lsp/src/rpc.test.ts
index a03870f..05ce924 100644
--- a/packages/lsp/src/rpc.test.ts
+++ b/packages/lsp/src/rpc.test.ts
@@ -76,3 +76,11 @@ describe("rpc", () => {
expect(response.result).toEqual([{ setting: true }]);
});
});
+
+it("handleMessage does not throw on malformed JSON", async () => {
+ const { conn } = makeConnection();
+ // A corrupted/truncated LSP message — must not throw or reject.
+ await expect(conn.handleMessage("{ broken json")).resolves.toBeUndefined();
+ await expect(conn.handleMessage("")).resolves.toBeUndefined();
+ await expect(conn.handleMessage("not json at all")).resolves.toBeUndefined();
+});
diff --git a/packages/lsp/src/rpc.ts b/packages/lsp/src/rpc.ts
index 45adf42..6b82624 100644
--- a/packages/lsp/src/rpc.ts
+++ b/packages/lsp/src/rpc.ts
@@ -62,7 +62,16 @@ export class JsonRpcConnection {
}
async handleMessage(json: string): Promise<void> {
- const msg = JSON.parse(json) as JsonRpcMessage;
+ let msg: JsonRpcMessage;
+ try {
+ msg = JSON.parse(json) as JsonRpcMessage;
+ } catch {
+ // A malformed LSP message must never crash the server. The most
+ // common cause is a multi-byte UTF-8 character split across stdout
+ // chunks (see FrameDecoder). Log and skip — the language server
+ // will re-send diagnostics on the next file change.
+ return;
+ }
const { id, method } = msg;
if (id !== undefined && method !== undefined) {