Fix zstd decompression truncation for multi-frame responses (#22680)

## Summary

Fixes #20053

When a server sends zstd-compressed data with chunked transfer encoding,
each chunk may be compressed as a separate zstd frame. Previously, Bun's
zstd decompressor would stop after the first frame, causing responses to
be truncated at 16KB.

## The Fix

The fix modifies the zstd decompressor (`src/deps/zstd.zig`) to continue
decompression when a frame completes but input data remains. When
`ZSTD_decompressStream` returns 0 (frame complete), we now check if
there's more input data and reinitialize the decompressor to handle the
next frame.

## Testing

Added regression tests in `test/regression/issue/20053.test.ts` that:
1. Test multi-frame zstd decompression where two frames need to be
concatenated
2. Simulate the exact Hono + compression middleware scenario from the
original issue

Both tests fail without the fix (truncating at 16KB) and pass with the
fix.

## Verification

```bash
# Without fix (regular bun):
$ bun test test/regression/issue/20053.test.ts
 0 pass
 2 fail

# With fix (debug build):
$ bun bd test test/regression/issue/20053.test.ts  
 2 pass
 0 fail
```

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
robobun
2025-09-15 14:24:03 -07:00
committed by GitHub
parent 0ec153ee1c
commit 0351bd5f28
2 changed files with 161 additions and 5 deletions

View File

@@ -136,13 +136,27 @@ pub const ZstdReaderArrayList = struct {
if (this.state == .End or this.state == .Error) return;
while (this.state == .Uninitialized or this.state == .Inflating) {
const next_in = this.input[this.total_in..];
// If we have no input to process
if (next_in.len == 0) {
if (is_done) {
// If we're in the middle of inflating and stream is done, it's truncated
if (this.state == .Inflating) {
this.state = .Error;
return error.ZstdDecompressionError;
}
// No more input and stream is done, we can end
this.end();
}
return;
}
var unused = this.list.unusedCapacitySlice();
if (unused.len < 4096) {
try this.list.ensureUnusedCapacity(this.list_allocator, 4096);
unused = this.list.unusedCapacitySlice();
}
const next_in = this.input[this.total_in..];
var in_buf: c.ZSTD_inBuffer = .{
.src = if (next_in.len > 0) next_in.ptr else null,
.size = next_in.len,
@@ -167,12 +181,40 @@ pub const ZstdReaderArrayList = struct {
this.total_out += bytes_written;
if (rc == 0) {
this.end();
return;
// Frame is complete
this.state = .Uninitialized; // Reset state since frame is complete
// Check if there's more input (multiple frames)
if (this.total_in >= this.input.len) {
// We've consumed all available input
if (is_done) {
// No more data coming, we can end the stream
this.end();
return;
}
// Frame is complete and no more input available right now.
// Just return normally - the caller can provide more data later if they have it.
return;
}
// More input available, reset for the next frame
// ZSTD_initDStream() safely resets the stream state without needing cleanup
// It's designed to be called multiple times on the same DStream object
_ = c.ZSTD_initDStream(this.zstd);
continue;
}
// If rc > 0, decompressor needs more data
if (rc > 0) {
this.state = .Inflating;
}
if (bytes_read == next_in.len) {
this.state = .Inflating;
// We've consumed all available input
if (bytes_written > 0) {
// We wrote some output, continue to see if we need more output space
continue;
}
if (is_done) {
// Stream is truncated - we're at EOF but need more data
this.state = .Error;

View File

@@ -0,0 +1,114 @@
import { expect, test } from "bun:test";
import { zstdCompressSync } from "node:zlib";
test("issue #20053 - multi-frame zstd responses should be fully decompressed", async () => {
// Create multiple zstd frames that when concatenated form a single large response
// This simulates what happens with chunked encoding where each chunk might be
// compressed as a separate frame
const part1 = "A".repeat(16384); // Exactly 16KB
const part2 = "B".repeat(3627); // Remaining data to total ~20KB
const compressed1 = zstdCompressSync(Buffer.from(part1));
const compressed2 = zstdCompressSync(Buffer.from(part2));
using server = Bun.serve({
port: 0,
async fetch(req) {
// Concatenate two zstd frames (simulating chunked response with multiple frames)
const combined = Buffer.concat([compressed1, compressed2]);
return new Response(combined, {
headers: {
"content-type": "text/plain",
"content-encoding": "zstd",
"transfer-encoding": "chunked",
},
});
},
});
// Make a request to the server
const response = await fetch(`http://localhost:${server.port}/`);
const text = await response.text();
// Both frames should be decompressed and concatenated
expect(text.length).toBe(part1.length + part2.length);
expect(text.substring(0, 16384)).toBe("A".repeat(16384));
expect(text.substring(16384)).toBe("B".repeat(3627));
});
test("issue #20053 - zstd with chunked encoding splits JSON into multiple frames", async () => {
// This test simulates the exact scenario from the original issue
// where Hono with compression middleware sends multiple zstd frames
const largeData = { data: "A".repeat(20000) };
const jsonString = JSON.stringify(largeData);
using server = Bun.serve({
port: 0,
async fetch(req) {
// Simulate chunked encoding by compressing in parts
// This is what happens when the server uses chunked transfer encoding
// with compression - each chunk might be compressed separately
const part1 = jsonString.slice(0, 16384);
const part2 = jsonString.slice(16384);
const compressed1 = zstdCompressSync(Buffer.from(part1));
const compressed2 = zstdCompressSync(Buffer.from(part2));
// Server sends multiple zstd frames as would happen with chunked encoding
const combined = Buffer.concat([compressed1, compressed2]);
return new Response(combined, {
headers: {
"content-type": "application/json",
"content-encoding": "zstd",
"transfer-encoding": "chunked",
},
});
},
});
const response = await fetch(`http://localhost:${server.port}/`);
const text = await response.text();
// The decompressed response should be the concatenation of all frames
expect(text.length).toBe(jsonString.length);
expect(text).toBe(jsonString);
// Verify it can be parsed as JSON
const parsed = JSON.parse(text);
expect(parsed.data.length).toBe(20000);
expect(parsed.data).toBe("A".repeat(20000));
});
test("issue #20053 - streaming zstd decompression handles frame boundaries correctly", async () => {
// Test that the decompressor correctly handles the case where a frame completes
// but more data might arrive later (streaming scenario)
const part1 = "First frame content";
const part2 = "Second frame content";
const compressed1 = zstdCompressSync(Buffer.from(part1));
const compressed2 = zstdCompressSync(Buffer.from(part2));
using server = Bun.serve({
port: 0,
async fetch(req) {
// Simulate streaming by sending frames separately
const combined = Buffer.concat([compressed1, compressed2]);
return new Response(combined, {
headers: {
"content-type": "text/plain",
"content-encoding": "zstd",
"transfer-encoding": "chunked",
},
});
},
});
const response = await fetch(`http://localhost:${server.port}/`);
const text = await response.text();
// Both frames should be decompressed
expect(text).toBe(part1 + part2);
});