Files
bun.sh/src/deps/zstd.zig
robobun 0351bd5f28 Fix zstd decompression truncation for multi-frame responses (#22680)
## Summary

Fixes #20053

When a server sends zstd-compressed data with chunked transfer encoding,
each chunk may be compressed as a separate zstd frame. Previously, Bun's
zstd decompressor would stop after the first frame, causing responses to
be truncated at 16KB.

## The Fix

The fix modifies the zstd decompressor (`src/deps/zstd.zig`) to continue
decompression when a frame completes but input data remains. When
`ZSTD_decompressStream` returns 0 (frame complete), we now check if
there's more input data and reinitialize the decompressor to handle the
next frame.

## Testing

Added regression tests in `test/regression/issue/20053.test.ts` that:
1. Test multi-frame zstd decompression where two frames need to be
concatenated
2. Simulate the exact Hono + compression middleware scenario from the
original issue

Both tests fail without the fix (truncating at 16KB) and pass with the
fix.

## Verification

```bash
# Without fix (regular bun):
$ bun test test/regression/issue/20053.test.ts
 0 pass
 2 fail

# With fix (debug build):
$ bun bd test test/regression/issue/20053.test.ts  
 2 pass
 0 fail
```

🤖 Generated with [Claude Code](https://claude.ai/code)

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-09-15 14:24:03 -07:00

234 lines
9.2 KiB
Zig

// -----------------------------------
/// ZSTD_compress() :
/// Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
/// NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have
/// enough space to successfully compress the data.
/// @return : compressed size written into `dst` (<= `dstCapacity),
/// or an error code if it fails (which can be tested using ZSTD_isError()). */
// ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
// const void* src, size_t srcSize,
// int compressionLevel);
pub fn compress(dest: []u8, src: []const u8, level: ?i32) Result {
const result = c.ZSTD_compress(dest.ptr, dest.len, src.ptr, src.len, level orelse c.ZSTD_defaultCLevel());
if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) };
return .{ .success = result };
}
pub fn compressBound(srcSize: usize) usize {
return c.ZSTD_compressBound(srcSize);
}
/// ZSTD_decompress() :
/// `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
/// `dstCapacity` is an upper bound of originalSize to regenerate.
/// If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
/// @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
/// or an errorCode if it fails (which can be tested using ZSTD_isError()). */
// ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
// const void* src, size_t compressedSize);
pub fn decompress(dest: []u8, src: []const u8) Result {
const result = c.ZSTD_decompress(dest.ptr, dest.len, src.ptr, src.len);
if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) };
return .{ .success = result };
}
pub fn getDecompressedSize(src: []const u8) usize {
return ZSTD_findDecompressedSize(src.ptr, src.len);
}
//ZSTD_findDecompressedSize() :
//`src` should point to the start of a series of ZSTD encoded and/or skippable frames
//`srcSize` must be the _exact_ size of this series
// (i.e. there should be a frame boundary at `src + srcSize`)
//@return : - decompressed size of all data in all successive frames
// - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
// - if an error occurred: ZSTD_CONTENTSIZE_ERROR
//
// note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
// When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
// In which case, it's necessary to use streaming mode to decompress data.
// note 2 : decompressed size is always present when compression is done with ZSTD_compress()
// note 3 : decompressed size can be very large (64-bits value),
// potentially larger than what local system can handle as a single memory segment.
// In which case, it's necessary to use streaming mode to decompress data.
// note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
// Always ensure result fits within application's authorized limits.
// Each application can set its own limits.
// note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
// read each contained frame header. This is fast as most of the data is skipped,
// however it does mean that all frame data must be present and valid. */
pub extern fn ZSTD_findDecompressedSize(src: ?*const anyopaque, srcSize: usize) c_ulonglong;
pub const Result = union(enum) {
success: usize,
err: [:0]const u8,
};
pub const ZstdReaderArrayList = struct {
const State = enum {
Uninitialized,
Inflating,
End,
Error,
};
input: []const u8,
list: std.ArrayListUnmanaged(u8),
list_allocator: std.mem.Allocator,
list_ptr: *std.ArrayListUnmanaged(u8),
allocator: std.mem.Allocator,
zstd: *c.ZSTD_DStream,
state: State = State.Uninitialized,
total_out: usize = 0,
total_in: usize = 0,
pub const new = bun.TrivialNew(ZstdReaderArrayList);
pub fn init(
input: []const u8,
list: *std.ArrayListUnmanaged(u8),
allocator: std.mem.Allocator,
) !*ZstdReaderArrayList {
return initWithListAllocator(input, list, allocator, allocator);
}
pub fn initWithListAllocator(
input: []const u8,
list: *std.ArrayListUnmanaged(u8),
list_allocator: std.mem.Allocator,
allocator: std.mem.Allocator,
) !*ZstdReaderArrayList {
var reader = try allocator.create(ZstdReaderArrayList);
reader.* = .{
.input = input,
.list = list.*,
.list_allocator = list_allocator,
.list_ptr = list,
.allocator = allocator,
.zstd = undefined,
};
reader.zstd = c.ZSTD_createDStream() orelse {
allocator.destroy(reader);
return error.ZstdFailedToCreateInstance;
};
_ = c.ZSTD_initDStream(reader.zstd);
return reader;
}
pub fn end(this: *ZstdReaderArrayList) void {
if (this.state != .End) {
_ = c.ZSTD_freeDStream(this.zstd);
this.state = .End;
}
}
pub fn deinit(this: *ZstdReaderArrayList) void {
var alloc = this.allocator;
this.end();
alloc.destroy(this);
}
pub fn readAll(this: *ZstdReaderArrayList, is_done: bool) !void {
defer this.list_ptr.* = this.list;
if (this.state == .End or this.state == .Error) return;
while (this.state == .Uninitialized or this.state == .Inflating) {
const next_in = this.input[this.total_in..];
// If we have no input to process
if (next_in.len == 0) {
if (is_done) {
// If we're in the middle of inflating and stream is done, it's truncated
if (this.state == .Inflating) {
this.state = .Error;
return error.ZstdDecompressionError;
}
// No more input and stream is done, we can end
this.end();
}
return;
}
var unused = this.list.unusedCapacitySlice();
if (unused.len < 4096) {
try this.list.ensureUnusedCapacity(this.list_allocator, 4096);
unused = this.list.unusedCapacitySlice();
}
var in_buf: c.ZSTD_inBuffer = .{
.src = if (next_in.len > 0) next_in.ptr else null,
.size = next_in.len,
.pos = 0,
};
var out_buf: c.ZSTD_outBuffer = .{
.dst = if (unused.len > 0) unused.ptr else null,
.size = unused.len,
.pos = 0,
};
const rc = c.ZSTD_decompressStream(this.zstd, &out_buf, &in_buf);
if (c.ZSTD_isError(rc) != 0) {
this.state = .Error;
return error.ZstdDecompressionError;
}
const bytes_written = out_buf.pos;
const bytes_read = in_buf.pos;
this.list.items.len += bytes_written;
this.total_in += bytes_read;
this.total_out += bytes_written;
if (rc == 0) {
// Frame is complete
this.state = .Uninitialized; // Reset state since frame is complete
// Check if there's more input (multiple frames)
if (this.total_in >= this.input.len) {
// We've consumed all available input
if (is_done) {
// No more data coming, we can end the stream
this.end();
return;
}
// Frame is complete and no more input available right now.
// Just return normally - the caller can provide more data later if they have it.
return;
}
// More input available, reset for the next frame
// ZSTD_initDStream() safely resets the stream state without needing cleanup
// It's designed to be called multiple times on the same DStream object
_ = c.ZSTD_initDStream(this.zstd);
continue;
}
// If rc > 0, decompressor needs more data
if (rc > 0) {
this.state = .Inflating;
}
if (bytes_read == next_in.len) {
// We've consumed all available input
if (bytes_written > 0) {
// We wrote some output, continue to see if we need more output space
continue;
}
if (is_done) {
// Stream is truncated - we're at EOF but need more data
this.state = .Error;
return error.ZstdDecompressionError;
}
// Not at EOF - we can retry with more data
return error.ShortRead;
}
}
}
};
const std = @import("std");
const bun = @import("bun");
const c = bun.c;