diff options
| author | Adam Malczewski <[email protected]> | 2026-06-25 07:24:47 +0900 |
|---|---|---|
| committer | Adam Malczewski <[email protected]> | 2026-06-25 07:24:47 +0900 |
| commit | 4bc062c21a830dd58535252fd24ddb392d262c79 (patch) | |
| tree | c8fa16b63660f466fefdf0c957f2cd734cd42519 | |
| parent | 1b2a13e29e98da04d55c061c2dcadb8c36d783cd (diff) | |
| download | dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.tar.gz dispatch-4bc062c21a830dd58535252fd24ddb392d262c79.zip | |
fix(lsp): prevent server crash from malformed LSP messages
Two bugs caused the dispatch server to crash (15 times since Jun 24)
when chat cc6c edited packages/transport-http/src/app.ts — a 40KB file
with 23 multi-byte UTF-8 lines. The edit_file diagnostics hook sends the
file to tsserver, which sends back a large publishDiagnostics response.
When the response was split across stdout chunks at a multi-byte
character boundary, the server crashed.
Layer 1 — rpc.ts handleMessage: JSON.parse had no try/catch. A corrupted
message threw an unhandled SyntaxError → unhandled rejection → process
exit. Wrapped in try/catch; malformed messages are now skipped.
Also hardened client.ts handleBytes: the async handleMessage Promise was
fire-and-forget. Added .catch(() => {}) as defence-in-depth so no
rejection from the RPC layer can ever crash the server.
Layer 2 — framing.ts FrameDecoder: used a string buffer with
new TextDecoder().decode(chunk) (no { stream: true }), corrupting
multi-byte characters split across chunks. Worse, Content-Length counts
bytes but the buffer was sliced by character count — for multi-byte
content byte length ≠ char length, so the decoder extracted the wrong
slice as a message. Rewrote to use a Uint8Array byte buffer: header
separator search is byte-level, Content-Length comparison is byte-level,
and the body is decoded only after all bytes are confirmed present.
Tests: 5 new multi-byte framing tests (split at char boundary,
byte-vs-char Content-Length, two messages in one chunk, three-way split)
+ 1 rpc test (malformed JSON does not throw). All 1545 tests pass.
| -rw-r--r-- | packages/lsp/src/client.ts | 6 | ||||
| -rw-r--r-- | packages/lsp/src/framing.test.ts | 96 | ||||
| -rw-r--r-- | packages/lsp/src/framing.ts | 57 | ||||
| -rw-r--r-- | packages/lsp/src/rpc.test.ts | 8 | ||||
| -rw-r--r-- | packages/lsp/src/rpc.ts | 11 |
5 files changed, 161 insertions, 17 deletions
diff --git a/packages/lsp/src/client.ts b/packages/lsp/src/client.ts index 743fcb4..677a22a 100644 --- a/packages/lsp/src/client.ts +++ b/packages/lsp/src/client.ts @@ -175,7 +175,11 @@ export class LanguageServerClient { private handleBytes(chunk: Uint8Array): void { const messages = this.decoder.decode(chunk); for (const msg of messages) { - this.rpc?.handleMessage(msg); + // handleMessage is async — catch rejections so a malformed + // message never becomes an unhandled rejection that crashes + // the server. (handleMessage also has its own try/catch around + // JSON.parse, but this is the defence-in-depth boundary.) + void this.rpc?.handleMessage(msg).catch(() => {}); } } diff --git a/packages/lsp/src/framing.test.ts b/packages/lsp/src/framing.test.ts index 7c51a16..721665c 100644 --- a/packages/lsp/src/framing.test.ts +++ b/packages/lsp/src/framing.test.ts @@ -44,3 +44,99 @@ describe("framing", () => { expect(messages[1]).toBe(msg2); }); }); + +describe("multi-byte UTF-8", () => { + it("round-trips a message with multi-byte characters", () => { + const msg = JSON.stringify({ + jsonrpc: "2.0", + method: "textDocument/publishDiagnostics", + params: { message: "Type '漢字' is not assignable to type 'number'. 🚫" }, + }); + const encoded = encode(msg); + const decoder = new FrameDecoder(); + const messages = decoder.decode(encoded); + expect(messages).toHaveLength(1); + expect(messages[0]).toBe(msg); + }); + + it("reassembles a multi-byte message split at a character boundary", () => { + // A message whose JSON body contains 3-byte UTF-8 characters (漢字). + // We split the encoded frame so the boundary falls INSIDE a multi-byte + // sequence — the old string-based decoder would corrupt this. + const msg = JSON.stringify({ + jsonrpc: "2.0", + method: "test", + params: { text: "漢字テスト" }, + }); + const encoded = encode(msg); + + // Find a split point inside the body (skip the ASCII header). + const headerEnd = encoded.indexOf(0x0d, 0); // first \r + const bodyStart = headerEnd + 4; // skip \r\n\r\n + // Split in the middle of the body — likely inside a multi-byte char. + const splitPoint = bodyStart + Math.floor((encoded.length - bodyStart) / 2); + const chunk1 = encoded.slice(0, splitPoint); + const chunk2 = encoded.slice(splitPoint); + + const decoder = new FrameDecoder(); + expect(decoder.decode(chunk1)).toHaveLength(0); // incomplete + const result = decoder.decode(chunk2); + expect(result).toHaveLength(1); + expect(result[0]).toBe(msg); + }); + + it("handles Content-Length in bytes (not characters)", () => { + // Content-Length counts bytes. For multi-byte content, byte length + // > character length. The decoder must slice by bytes, not chars. + const unicode = "🎉分段測試"; + const msg = JSON.stringify({ jsonrpc: "2.0", method: "test", params: { text: unicode } }); + const encoded = encode(msg); + + // Verify the Content-Length header matches the byte length of the body. + const headerStr = new TextDecoder().decode(encoded.slice(0, encoded.indexOf(0x0d))); + const contentLengthMatch = /Content-Length:\s*(\d+)/i.exec(headerStr); + expect(contentLengthMatch).not.toBeNull(); + const declaredLength = Number.parseInt(contentLengthMatch?.[1], 10); + const bodyBytes = new TextEncoder().encode(msg); + expect(declaredLength).toBe(bodyBytes.length); + + const decoder = new FrameDecoder(); + const messages = decoder.decode(encoded); + expect(messages).toHaveLength(1); + expect(messages[0]).toBe(msg); + }); + + it("reassembles two multi-byte messages from one chunk", () => { + const msg1 = JSON.stringify({ jsonrpc: "2.0", method: "a", params: { t: "日本語" } }); + const msg2 = JSON.stringify({ jsonrpc: "2.0", method: "b", params: { t: "한국어" } }); + const encoded1 = encode(msg1); + const encoded2 = encode(msg2); + + const combined = new Uint8Array(encoded1.length + encoded2.length); + combined.set(encoded1); + combined.set(encoded2, encoded1.length); + + const decoder = new FrameDecoder(); + const messages = decoder.decode(combined); + expect(messages).toHaveLength(2); + expect(messages[0]).toBe(msg1); + expect(messages[1]).toBe(msg2); + }); + + it("reassembles a multi-byte message split across three chunks", () => { + const msg = JSON.stringify({ + jsonrpc: "2.0", + method: "test", + params: { text: "𝕳𝖊𝖑𝖑𝖔, 世界! Привет! 🌍" }, + }); + const encoded = encode(msg); + + const third = Math.floor(encoded.length / 3); + const decoder = new FrameDecoder(); + expect(decoder.decode(encoded.slice(0, third))).toHaveLength(0); + expect(decoder.decode(encoded.slice(third, third * 2))).toHaveLength(0); + const result = decoder.decode(encoded.slice(third * 2)); + expect(result).toHaveLength(1); + expect(result[0]).toBe(msg); + }); +}); diff --git a/packages/lsp/src/framing.ts b/packages/lsp/src/framing.ts index 3a8ab3a..88e60c1 100644 --- a/packages/lsp/src/framing.ts +++ b/packages/lsp/src/framing.ts @@ -5,10 +5,15 @@ * `encode` wraps a JSON message with headers; `FrameDecoder` reassembles * complete messages from streaming byte chunks (handles partial frames and * multiple frames per chunk). + * + * The buffer is a `Uint8Array` (not a string) because `Content-Length` counts + * **bytes** — slicing by JavaScript character count corrupts messages whose + * JSON body contains multi-byte UTF-8 characters (byte length ≠ char length). */ -const HEADER_SEP = "\r\n\r\n"; -const CONTENT_LENGTH_RE = /^Content-Length:\s*(\d+)/i; +const CR = 0x0d; // \r +const LF = 0x0a; // \n +const CONTENT_LENGTH_RE = /Content-Length:\s*(\d+)/i; export function encode(msg: string): Uint8Array { const body = new TextEncoder().encode(msg); @@ -20,43 +25,65 @@ export function encode(msg: string): Uint8Array { return result; } +/** + * Find the first occurrence of the 4-byte sequence \r\n\r\n in `buf`, + * starting at offset `from`. Returns the index of the first byte, or -1. + */ +function findHeaderSep(buf: Uint8Array, from: number): number { + const limit = buf.length - 3; + for (let i = from; i < limit; i++) { + if (buf[i] === CR && buf[i + 1] === LF && buf[i + 2] === CR && buf[i + 3] === LF) { + return i; + } + } + return -1; +} + export class FrameDecoder { - private buffer = ""; + private buffer: Uint8Array = new Uint8Array(0); private expectedLength: number | null = null; - private headerEnd = -1; + private headerEndByte = -1; /** * Feed raw bytes into the decoder. Returns all complete JSON messages * that can be extracted from the accumulated buffer. */ decode(chunk: Uint8Array): string[] { - this.buffer += new TextDecoder().decode(chunk); + // Append the new chunk to the internal byte buffer. + const newBuf = new Uint8Array(this.buffer.length + chunk.length); + newBuf.set(this.buffer); + newBuf.set(chunk, this.buffer.length); + this.buffer = newBuf; + const messages: string[] = []; while (true) { if (this.expectedLength === null) { - const headerEnd = this.buffer.indexOf(HEADER_SEP); - if (headerEnd === -1) break; + const sepIdx = findHeaderSep(this.buffer, 0); + if (sepIdx === -1) break; - const headerPart = this.buffer.slice(0, headerEnd); - const match = CONTENT_LENGTH_RE.exec(headerPart); + // Decode only the header bytes (always ASCII) to read Content-Length. + const headerStr = new TextDecoder().decode(this.buffer.slice(0, sepIdx)); + const match = CONTENT_LENGTH_RE.exec(headerStr); if (!match?.[1]) { - this.buffer = this.buffer.slice(headerEnd + HEADER_SEP.length); + // Not a Content-Length header — skip past this separator and retry. + this.buffer = this.buffer.slice(sepIdx + 4); continue; } this.expectedLength = Number.parseInt(match[1], 10); - this.headerEnd = headerEnd; + this.headerEndByte = sepIdx + 4; // skip \r\n\r\n } - const bodyStart = this.headerEnd + HEADER_SEP.length; + const bodyStart = this.headerEndByte; const available = this.buffer.length - bodyStart; if (available >= this.expectedLength) { - const body = this.buffer.slice(bodyStart, bodyStart + this.expectedLength); - messages.push(body); + // Extract exactly `expectedLength` bytes (Content-Length is in bytes). + const bodyBytes = this.buffer.slice(bodyStart, bodyStart + this.expectedLength); + messages.push(new TextDecoder().decode(bodyBytes)); this.buffer = this.buffer.slice(bodyStart + this.expectedLength); this.expectedLength = null; - this.headerEnd = -1; + this.headerEndByte = -1; } else { break; } diff --git a/packages/lsp/src/rpc.test.ts b/packages/lsp/src/rpc.test.ts index a03870f..05ce924 100644 --- a/packages/lsp/src/rpc.test.ts +++ b/packages/lsp/src/rpc.test.ts @@ -76,3 +76,11 @@ describe("rpc", () => { expect(response.result).toEqual([{ setting: true }]); }); }); + +it("handleMessage does not throw on malformed JSON", async () => { + const { conn } = makeConnection(); + // A corrupted/truncated LSP message — must not throw or reject. + await expect(conn.handleMessage("{ broken json")).resolves.toBeUndefined(); + await expect(conn.handleMessage("")).resolves.toBeUndefined(); + await expect(conn.handleMessage("not json at all")).resolves.toBeUndefined(); +}); diff --git a/packages/lsp/src/rpc.ts b/packages/lsp/src/rpc.ts index 45adf42..6b82624 100644 --- a/packages/lsp/src/rpc.ts +++ b/packages/lsp/src/rpc.ts @@ -62,7 +62,16 @@ export class JsonRpcConnection { } async handleMessage(json: string): Promise<void> { - const msg = JSON.parse(json) as JsonRpcMessage; + let msg: JsonRpcMessage; + try { + msg = JSON.parse(json) as JsonRpcMessage; + } catch { + // A malformed LSP message must never crash the server. The most + // common cause is a multi-byte UTF-8 character split across stdout + // chunks (see FrameDecoder). Log and skip — the language server + // will re-send diagnostics on the next file change. + return; + } const { id, method } = msg; if (id !== undefined && method !== undefined) { |
