From 0351bd5f28fa2a473636a3fcda06665245ca3b0d Mon Sep 17 00:00:00 2001
From: robobun <robobun@oven.sh>
Date: Mon, 15 Sep 2025 14:24:03 -0700
Subject: [PATCH] Fix zstd decompression truncation for multi-frame responses
 (#22680)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Fixes #20053

When a server sends zstd-compressed data with chunked transfer encoding,
each chunk may be compressed as a separate zstd frame. Previously, Bun's
zstd decompressor would stop after the first frame, causing responses to
be truncated at 16KB.

## The Fix

The fix modifies the zstd decompressor (`src/deps/zstd.zig`) to continue
decompression when a frame completes but input data remains. When
`ZSTD_decompressStream` returns 0 (frame complete), we now check if
there's more input data and reinitialize the decompressor to handle the
next frame.

## Testing

Added regression tests in `test/regression/issue/20053.test.ts` that:
1. Test multi-frame zstd decompression where two frames need to be
concatenated
2. Simulate the exact Hono + compression middleware scenario from the
original issue

Both tests fail without the fix (truncating at 16KB) and pass with the
fix.

## Verification

```bash
# Without fix (regular bun):
$ bun test test/regression/issue/20053.test.ts
 0 pass
 2 fail

# With fix (debug build):
$ bun bd test test/regression/issue/20053.test.ts
 2 pass
 0 fail
```

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
---
 src/deps/zstd.zig                   |  52 +++++++++++--
 test/regression/issue/20053.test.ts | 114 ++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 5 deletions(-)
 create mode 100644 test/regression/issue/20053.test.ts

diff --git a/src/deps/zstd.zig b/src/deps/zstd.zig
index 53523bdf1b..e800a83ae5 100644
--- a/src/deps/zstd.zig
+++ b/src/deps/zstd.zig
@@ -136,13 +136,27 @@ pub const ZstdReaderArrayList = struct {
         if (this.state == .End or this.state == .Error) return;
 
         while (this.state == .Uninitialized or this.state == .Inflating) {
+            const next_in = this.input[this.total_in..];
+
+            // If we have no input to process
+            if (next_in.len == 0) {
+                if (is_done) {
+                    // If we're in the middle of inflating and stream is done, it's truncated
+                    if (this.state == .Inflating) {
+                        this.state = .Error;
+                        return error.ZstdDecompressionError;
+                    }
+                    // No more input and stream is done, we can end
+                    this.end();
+                }
+                return;
+            }
+
             var unused = this.list.unusedCapacitySlice();
             if (unused.len < 4096) {
                 try this.list.ensureUnusedCapacity(this.list_allocator, 4096);
                 unused = this.list.unusedCapacitySlice();
             }
-
-            const next_in = this.input[this.total_in..];
             var in_buf: c.ZSTD_inBuffer = .{
                 .src = if (next_in.len > 0) next_in.ptr else null,
                 .size = next_in.len,
@@ -167,12 +181,40 @@ pub const ZstdReaderArrayList = struct {
             this.total_out += bytes_written;
 
             if (rc == 0) {
-                this.end();
-                return;
+                // Frame is complete
+                this.state = .Uninitialized; // Reset state since frame is complete
+
+                // Check if there's more input (multiple frames)
+                if (this.total_in >= this.input.len) {
+                    // We've consumed all available input
+                    if (is_done) {
+                        // No more data coming, we can end the stream
+                        this.end();
+                        return;
+                    }
+                    // Frame is complete and no more input available right now.
+                    // Just return normally - the caller can provide more data later if they have it.
+                    return;
+                }
+                // More input available, reset for the next frame
+                // ZSTD_initDStream() safely resets the stream state without needing cleanup
+                // It's designed to be called multiple times on the same DStream object
+                _ = c.ZSTD_initDStream(this.zstd);
+                continue;
+            }
+
+            // If rc > 0, decompressor needs more data
+            if (rc > 0) {
+                this.state = .Inflating;
             }
 
             if (bytes_read == next_in.len) {
-                this.state = .Inflating;
+                // We've consumed all available input
+                if (bytes_written > 0) {
+                    // We wrote some output, continue to see if we need more output space
+                    continue;
+                }
+
                 if (is_done) {
                     // Stream is truncated - we're at EOF but need more data
                     this.state = .Error;
diff --git a/test/regression/issue/20053.test.ts b/test/regression/issue/20053.test.ts
new file mode 100644
index 0000000000..071352d006
--- /dev/null
+++ b/test/regression/issue/20053.test.ts
@@ -0,0 +1,114 @@
+import { expect, test } from "bun:test";
+import { zstdCompressSync } from "node:zlib";
+
+test("issue #20053 - multi-frame zstd responses should be fully decompressed", async () => {
+  // Create multiple zstd frames that when concatenated form a single large response
+  // This simulates what happens with chunked encoding where each chunk might be
+  // compressed as a separate frame
+  const part1 = "A".repeat(16384); // Exactly 16KB
+  const part2 = "B".repeat(3627); // Remaining data to total ~20KB
+
+  const compressed1 = zstdCompressSync(Buffer.from(part1));
+  const compressed2 = zstdCompressSync(Buffer.from(part2));
+
+  using server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      // Concatenate two zstd frames (simulating chunked response with multiple frames)
+      const combined = Buffer.concat([compressed1, compressed2]);
+
+      return new Response(combined, {
+        headers: {
+          "content-type": "text/plain",
+          "content-encoding": "zstd",
+          "transfer-encoding": "chunked",
+        },
+      });
+    },
+  });
+
+  // Make a request to the server
+  const response = await fetch(`http://localhost:${server.port}/`);
+  const text = await response.text();
+
+  // Both frames should be decompressed and concatenated
+  expect(text.length).toBe(part1.length + part2.length);
+  expect(text.substring(0, 16384)).toBe("A".repeat(16384));
+  expect(text.substring(16384)).toBe("B".repeat(3627));
+});
+
+test("issue #20053 - zstd with chunked encoding splits JSON into multiple frames", async () => {
+  // This test simulates the exact scenario from the original issue
+  // where Hono with compression middleware sends multiple zstd frames
+  const largeData = { data: "A".repeat(20000) };
+  const jsonString = JSON.stringify(largeData);
+
+  using server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      // Simulate chunked encoding by compressing in parts
+      // This is what happens when the server uses chunked transfer encoding
+      // with compression - each chunk might be compressed separately
+      const part1 = jsonString.slice(0, 16384);
+      const part2 = jsonString.slice(16384);
+
+      const compressed1 = zstdCompressSync(Buffer.from(part1));
+      const compressed2 = zstdCompressSync(Buffer.from(part2));
+
+      // Server sends multiple zstd frames as would happen with chunked encoding
+      const combined = Buffer.concat([compressed1, compressed2]);
+
+      return new Response(combined, {
+        headers: {
+          "content-type": "application/json",
+          "content-encoding": "zstd",
+          "transfer-encoding": "chunked",
+        },
+      });
+    },
+  });
+
+  const response = await fetch(`http://localhost:${server.port}/`);
+  const text = await response.text();
+
+  // The decompressed response should be the concatenation of all frames
+  expect(text.length).toBe(jsonString.length);
+  expect(text).toBe(jsonString);
+
+  // Verify it can be parsed as JSON
+  const parsed = JSON.parse(text);
+  expect(parsed.data.length).toBe(20000);
+  expect(parsed.data).toBe("A".repeat(20000));
+});
+
+test("issue #20053 - streaming zstd decompression handles frame boundaries correctly", async () => {
+  // Test that the decompressor correctly handles the case where a frame completes
+  // but more data might arrive later (streaming scenario)
+  const part1 = "First frame content";
+  const part2 = "Second frame content";
+
+  const compressed1 = zstdCompressSync(Buffer.from(part1));
+  const compressed2 = zstdCompressSync(Buffer.from(part2));
+
+  using server = Bun.serve({
+    port: 0,
+    async fetch(req) {
+      // Simulate streaming by sending frames separately
+      const combined = Buffer.concat([compressed1, compressed2]);
+
+      return new Response(combined, {
+        headers: {
+          "content-type": "text/plain",
+          "content-encoding": "zstd",
+          "transfer-encoding": "chunked",
+        },
+      });
+    },
+  });
+
+  const response = await fetch(`http://localhost:${server.port}/`);
+  const text = await response.text();
+
+  // Both frames should be decompressed
+  expect(text).toBe(part1 + part2);
+});