Fix zstd decompression truncation for multi-frame responses (#22680)

## Summary Fixes #20053 When a server sends zstd-compressed data with chunked transfer encoding, each chunk may be compressed as a separate zstd frame. Previously, Bun's zstd decompressor would stop after the first frame, causing responses to be truncated at 16KB. ## The Fix The fix modifies the zstd decompressor (`src/deps/zstd.zig`) to continue decompression when a frame completes but input data remains. When `ZSTD_decompressStream` returns 0 (frame complete), we now check if there's more input data and reinitialize the decompressor to handle the next frame. ## Testing Added regression tests in `test/regression/issue/20053.test.ts` that: 1. Test multi-frame zstd decompression where two frames need to be concatenated 2. Simulate the exact Hono + compression middleware scenario from the original issue Both tests fail without the fix (truncating at 16KB) and pass with the fix. ## Verification ```bash # Without fix (regular bun): $ bun test test/regression/issue/20053.test.ts 0 pass 2 fail # With fix (debug build): $ bun bd test test/regression/issue/20053.test.ts 2 pass 0 fail ``` 🤖 Generated with [Claude Code](https://claude.ai/code) --------- Co-authored-by: Claude Bot <claude-bot@bun.sh> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-02-09 10:28:47 +00:00 · 2025-09-15 14:24:03 -07:00
parent 0ec153ee1c
commit 0351bd5f28
2 changed files with 161 additions and 5 deletions
--- a/src/deps/zstd.zig
+++ b/src/deps/zstd.zig
@@ -136,13 +136,27 @@ pub const ZstdReaderArrayList = struct {
        if (this.state == .End or this.state == .Error) return;

        while (this.state == .Uninitialized or this.state == .Inflating) {
+            const next_in = this.input[this.total_in..];
+
+            // If we have no input to process
+            if (next_in.len == 0) {
+                if (is_done) {
+                    // If we're in the middle of inflating and stream is done, it's truncated
+                    if (this.state == .Inflating) {
+                        this.state = .Error;
+                        return error.ZstdDecompressionError;
+                    }
+                    // No more input and stream is done, we can end
+                    this.end();
+                }
+                return;
+            }
+
            var unused = this.list.unusedCapacitySlice();
            if (unused.len < 4096) {
                try this.list.ensureUnusedCapacity(this.list_allocator, 4096);
                unused = this.list.unusedCapacitySlice();
            }
-
-            const next_in = this.input[this.total_in..];
            var in_buf: c.ZSTD_inBuffer = .{
                .src = if (next_in.len > 0) next_in.ptr else null,
                .size = next_in.len,
@@ -167,12 +181,40 @@ pub const ZstdReaderArrayList = struct {
            this.total_out += bytes_written;

            if (rc == 0) {
-                this.end();
-                return;
+                // Frame is complete
+                this.state = .Uninitialized; // Reset state since frame is complete
+
+                // Check if there's more input (multiple frames)
+                if (this.total_in >= this.input.len) {
+                    // We've consumed all available input
+                    if (is_done) {
+                        // No more data coming, we can end the stream
+                        this.end();
+                        return;
+                    }
+                    // Frame is complete and no more input available right now.
+                    // Just return normally - the caller can provide more data later if they have it.
+                    return;
+                }
+                // More input available, reset for the next frame
+                // ZSTD_initDStream() safely resets the stream state without needing cleanup
+                // It's designed to be called multiple times on the same DStream object
+                _ = c.ZSTD_initDStream(this.zstd);
+                continue;
+            }
+
+            // If rc > 0, decompressor needs more data
+            if (rc > 0) {
+                this.state = .Inflating;
            }

            if (bytes_read == next_in.len) {
-                this.state = .Inflating;
+                // We've consumed all available input
+                if (bytes_written > 0) {
+                    // We wrote some output, continue to see if we need more output space
+                    continue;
+                }
+
                if (is_done) {
                    // Stream is truncated - we're at EOF but need more data
                    this.state = .Error;
--- a/test/regression/issue/20053.test.ts
+++ b/test/regression/issue/20053.test.ts
@@ -0,0 +1,114 @@
+import { expect, test } from "bun:test";
+import { zstdCompressSync } from "node:zlib";
+
+test("issue #20053 - multi-frame zstd responses should be fully decompressed", async () => {
+  // Create multiple zstd frames that when concatenated form a single large response
+  // This simulates what happens with chunked encoding where each chunk might be
+  // compressed as a separate frame
+  const part1 = "A".repeat(16384); // Exactly 16KB
+  const part2 = "B".repeat(3627); // Remaining data to total ~20KB
+
+  const compressed1 = zstdCompressSync(Buffer.from(part1));
+  const compressed2 = zstdCompressSync(Buffer.from(part2));
+
+  using server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      // Concatenate two zstd frames (simulating chunked response with multiple frames)
+      const combined = Buffer.concat([compressed1, compressed2]);
+
+      return new Response(combined, {
+        headers: {
+          "content-type": "text/plain",
+          "content-encoding": "zstd",
+          "transfer-encoding": "chunked",
+        },
+      });
+    },
+  });
+
+  // Make a request to the server
+  const response = await fetch(`http://localhost:${server.port}/`);
+  const text = await response.text();
+
+  // Both frames should be decompressed and concatenated
+  expect(text.length).toBe(part1.length + part2.length);
+  expect(text.substring(0, 16384)).toBe("A".repeat(16384));
+  expect(text.substring(16384)).toBe("B".repeat(3627));
+});
+
+test("issue #20053 - zstd with chunked encoding splits JSON into multiple frames", async () => {
+  // This test simulates the exact scenario from the original issue
+  // where Hono with compression middleware sends multiple zstd frames
+  const largeData = { data: "A".repeat(20000) };
+  const jsonString = JSON.stringify(largeData);
+
+  using server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      // Simulate chunked encoding by compressing in parts
+      // This is what happens when the server uses chunked transfer encoding
+      // with compression - each chunk might be compressed separately
+      const part1 = jsonString.slice(0, 16384);
+      const part2 = jsonString.slice(16384);
+
+      const compressed1 = zstdCompressSync(Buffer.from(part1));
+      const compressed2 = zstdCompressSync(Buffer.from(part2));
+
+      // Server sends multiple zstd frames as would happen with chunked encoding
+      const combined = Buffer.concat([compressed1, compressed2]);
+
+      return new Response(combined, {
+        headers: {
+          "content-type": "application/json",
+          "content-encoding": "zstd",
+          "transfer-encoding": "chunked",
+        },
+      });
+    },
+  });
+
+  const response = await fetch(`http://localhost:${server.port}/`);
+  const text = await response.text();
+
+  // The decompressed response should be the concatenation of all frames
+  expect(text.length).toBe(jsonString.length);
+  expect(text).toBe(jsonString);
+
+  // Verify it can be parsed as JSON
+  const parsed = JSON.parse(text);
+  expect(parsed.data.length).toBe(20000);
+  expect(parsed.data).toBe("A".repeat(20000));
+});
+
+test("issue #20053 - streaming zstd decompression handles frame boundaries correctly", async () => {
+  // Test that the decompressor correctly handles the case where a frame completes
+  // but more data might arrive later (streaming scenario)
+  const part1 = "First frame content";
+  const part2 = "Second frame content";
+
+  const compressed1 = zstdCompressSync(Buffer.from(part1));
+  const compressed2 = zstdCompressSync(Buffer.from(part2));
+
+  using server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      // Simulate streaming by sending frames separately
+      const combined = Buffer.concat([compressed1, compressed2]);
+
+      return new Response(combined, {
+        headers: {
+          "content-type": "text/plain",
+          "content-encoding": "zstd",
+          "transfer-encoding": "chunked",
+        },
+      });
+    },
+  });
+
+  const response = await fetch(`http://localhost:${server.port}/`);
+  const text = await response.text();
+
+  // Both frames should be decompressed
+  expect(text).toBe(part1 + part2);
+});