From 0351bd5f28fa2a473636a3fcda06665245ca3b0d Mon Sep 17 00:00:00 2001 From: robobun Date: Mon, 15 Sep 2025 14:24:03 -0700 Subject: [PATCH] Fix zstd decompression truncation for multi-frame responses (#22680) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes #20053 When a server sends zstd-compressed data with chunked transfer encoding, each chunk may be compressed as a separate zstd frame. Previously, Bun's zstd decompressor would stop after the first frame, causing responses to be truncated at 16KB. ## The Fix The fix modifies the zstd decompressor (`src/deps/zstd.zig`) to continue decompression when a frame completes but input data remains. When `ZSTD_decompressStream` returns 0 (frame complete), we now check if there's more input data and reinitialize the decompressor to handle the next frame. ## Testing Added regression tests in `test/regression/issue/20053.test.ts` that: 1. Test multi-frame zstd decompression where two frames need to be concatenated 2. Simulate the exact Hono + compression middleware scenario from the original issue Both tests fail without the fix (truncating at 16KB) and pass with the fix. ## Verification ```bash # Without fix (regular bun): $ bun test test/regression/issue/20053.test.ts 0 pass 2 fail # With fix (debug build): $ bun bd test test/regression/issue/20053.test.ts 2 pass 0 fail ``` 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude Bot Co-authored-by: Claude Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- src/deps/zstd.zig | 52 +++++++++++-- test/regression/issue/20053.test.ts | 114 ++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 5 deletions(-) create mode 100644 test/regression/issue/20053.test.ts diff --git a/src/deps/zstd.zig b/src/deps/zstd.zig index 53523bdf1b..e800a83ae5 100644 --- a/src/deps/zstd.zig +++ b/src/deps/zstd.zig @@ -136,13 +136,27 @@ pub const ZstdReaderArrayList = struct { if (this.state == .End or this.state == .Error) return; while (this.state == .Uninitialized or this.state == .Inflating) { + const next_in = this.input[this.total_in..]; + + // If we have no input to process + if (next_in.len == 0) { + if (is_done) { + // If we're in the middle of inflating and stream is done, it's truncated + if (this.state == .Inflating) { + this.state = .Error; + return error.ZstdDecompressionError; + } + // No more input and stream is done, we can end + this.end(); + } + return; + } + var unused = this.list.unusedCapacitySlice(); if (unused.len < 4096) { try this.list.ensureUnusedCapacity(this.list_allocator, 4096); unused = this.list.unusedCapacitySlice(); } - - const next_in = this.input[this.total_in..]; var in_buf: c.ZSTD_inBuffer = .{ .src = if (next_in.len > 0) next_in.ptr else null, .size = next_in.len, @@ -167,12 +181,40 @@ pub const ZstdReaderArrayList = struct { this.total_out += bytes_written; if (rc == 0) { - this.end(); - return; + // Frame is complete + this.state = .Uninitialized; // Reset state since frame is complete + + // Check if there's more input (multiple frames) + if (this.total_in >= this.input.len) { + // We've consumed all available input + if (is_done) { + // No more data coming, we can end the stream + this.end(); + return; + } + // Frame is complete and no more input available right now. + // Just return normally - the caller can provide more data later if they have it. + return; + } + // More input available, reset for the next frame + // ZSTD_initDStream() safely resets the stream state without needing cleanup + // It's designed to be called multiple times on the same DStream object + _ = c.ZSTD_initDStream(this.zstd); + continue; + } + + // If rc > 0, decompressor needs more data + if (rc > 0) { + this.state = .Inflating; } if (bytes_read == next_in.len) { - this.state = .Inflating; + // We've consumed all available input + if (bytes_written > 0) { + // We wrote some output, continue to see if we need more output space + continue; + } + if (is_done) { // Stream is truncated - we're at EOF but need more data this.state = .Error; diff --git a/test/regression/issue/20053.test.ts b/test/regression/issue/20053.test.ts new file mode 100644 index 0000000000..071352d006 --- /dev/null +++ b/test/regression/issue/20053.test.ts @@ -0,0 +1,114 @@ +import { expect, test } from "bun:test"; +import { zstdCompressSync } from "node:zlib"; + +test("issue #20053 - multi-frame zstd responses should be fully decompressed", async () => { + // Create multiple zstd frames that when concatenated form a single large response + // This simulates what happens with chunked encoding where each chunk might be + // compressed as a separate frame + const part1 = "A".repeat(16384); // Exactly 16KB + const part2 = "B".repeat(3627); // Remaining data to total ~20KB + + const compressed1 = zstdCompressSync(Buffer.from(part1)); + const compressed2 = zstdCompressSync(Buffer.from(part2)); + + using server = Bun.serve({ + port: 0, + async fetch(req) { + // Concatenate two zstd frames (simulating chunked response with multiple frames) + const combined = Buffer.concat([compressed1, compressed2]); + + return new Response(combined, { + headers: { + "content-type": "text/plain", + "content-encoding": "zstd", + "transfer-encoding": "chunked", + }, + }); + }, + }); + + // Make a request to the server + const response = await fetch(`http://localhost:${server.port}/`); + const text = await response.text(); + + // Both frames should be decompressed and concatenated + expect(text.length).toBe(part1.length + part2.length); + expect(text.substring(0, 16384)).toBe("A".repeat(16384)); + expect(text.substring(16384)).toBe("B".repeat(3627)); +}); + +test("issue #20053 - zstd with chunked encoding splits JSON into multiple frames", async () => { + // This test simulates the exact scenario from the original issue + // where Hono with compression middleware sends multiple zstd frames + const largeData = { data: "A".repeat(20000) }; + const jsonString = JSON.stringify(largeData); + + using server = Bun.serve({ + port: 0, + async fetch(req) { + // Simulate chunked encoding by compressing in parts + // This is what happens when the server uses chunked transfer encoding + // with compression - each chunk might be compressed separately + const part1 = jsonString.slice(0, 16384); + const part2 = jsonString.slice(16384); + + const compressed1 = zstdCompressSync(Buffer.from(part1)); + const compressed2 = zstdCompressSync(Buffer.from(part2)); + + // Server sends multiple zstd frames as would happen with chunked encoding + const combined = Buffer.concat([compressed1, compressed2]); + + return new Response(combined, { + headers: { + "content-type": "application/json", + "content-encoding": "zstd", + "transfer-encoding": "chunked", + }, + }); + }, + }); + + const response = await fetch(`http://localhost:${server.port}/`); + const text = await response.text(); + + // The decompressed response should be the concatenation of all frames + expect(text.length).toBe(jsonString.length); + expect(text).toBe(jsonString); + + // Verify it can be parsed as JSON + const parsed = JSON.parse(text); + expect(parsed.data.length).toBe(20000); + expect(parsed.data).toBe("A".repeat(20000)); +}); + +test("issue #20053 - streaming zstd decompression handles frame boundaries correctly", async () => { + // Test that the decompressor correctly handles the case where a frame completes + // but more data might arrive later (streaming scenario) + const part1 = "First frame content"; + const part2 = "Second frame content"; + + const compressed1 = zstdCompressSync(Buffer.from(part1)); + const compressed2 = zstdCompressSync(Buffer.from(part2)); + + using server = Bun.serve({ + port: 0, + async fetch(req) { + // Simulate streaming by sending frames separately + const combined = Buffer.concat([compressed1, compressed2]); + + return new Response(combined, { + headers: { + "content-type": "text/plain", + "content-encoding": "zstd", + "transfer-encoding": "chunked", + }, + }); + }, + }); + + const response = await fetch(`http://localhost:${server.port}/`); + const text = await response.text(); + + // Both frames should be decompressed + expect(text).toBe(part1 + part2); +});