From d273f7fdde691b850b054ff93063df5e812359bf Mon Sep 17 00:00:00 2001 From: robobun Date: Mon, 6 Oct 2025 19:56:40 -0700 Subject: [PATCH] fix: zstd decompression with async-compressed data (#23314) (#23317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What does this PR do? Fixes #23314 where `zlib.zstdCompress()` created data that caused an out-of-memory error when decompressed with `Bun.zstdDecompressSync()`. #### 1. `zlib.zstdCompress()` now sets `pledgedSrcSize` The async convenience method now automatically sets the `pledgedSrcSize` option to the input buffer size. This ensures the compressed frame includes the content size in the header, making sync and async compression produce identical output. **Node.js compatibility**: `pledgedSrcSize` is a documented Node.js option: - [`vendor/node/doc/api/zlib.md:754-758`](https://github.com/oven-sh/bun/blob/main/vendor/node/doc/api/zlib.md#L754-L758) - [`vendor/node/lib/zlib.js:893`](https://github.com/oven-sh/bun/blob/main/vendor/node/lib/zlib.js#L893) - [`vendor/node/src/node_zlib.cc:890-904`](https://github.com/oven-sh/bun/blob/main/vendor/node/src/node_zlib.cc#L890-L904) #### 2. Added `bun.zstd.decompressAlloc()` - centralized safe decompression Created a new function in `src/deps/zstd.zig` that handles decompression in one place with automatic safety features: - **Handles unknown content sizes**: Automatically switches to streaming decompression when the zstd frame doesn't include content size (e.g., from streams without `pledgedSrcSize`) - **16MB safety limit**: For security, if the reported decompressed size exceeds 16MB, streaming decompression is used instead of blindly trusting the header - **Fast path for small files**: Still uses efficient pre-allocation for files < 16MB with known sizes This centralized fix automatically protects: - `Bun.zstdDecompressSync()` / `Bun.zstdDecompress()` - `StandaloneModuleGraph` source map decompression - Any other code using `bun.zstd` decompression ### How did you verify your code works? **Before:** ```typescript const input = "hello world"; // Async compression const compressed = await new Promise((resolve, reject) => { zlib.zstdCompress(input, (err, result) => { if (err) reject(err); else resolve(result); }); }); // This would fail with "Out of memory" const decompressed = Bun.zstdDecompressSync(compressed); ``` **Error**: `RangeError: Out of memory` (tried to allocate UINT64_MAX bytes) **After:** ```typescript const input = "hello world"; // Async compression (now includes content size) const compressed = await new Promise((resolve, reject) => { zlib.zstdCompress(input, (err, result) => { if (err) reject(err); else resolve(result); }); }); // ✅ Works! Falls back to streaming decompression if needed const decompressed = Bun.zstdDecompressSync(compressed); console.log(decompressed.toString()); // "hello world" ``` **Tests:** - ✅ All existing tests pass - ✅ New regression tests for async/sync compression compatibility (`test/regression/issue/23314/zstd-async-compress.test.ts`) - ✅ Test for large (>16MB) decompression using streaming (`test/regression/issue/23314/zstd-large-decompression.test.ts`) - ✅ Test for various input sizes and types (`test/regression/issue/23314/zstd-large-input.test.ts`) **Security:** The 16MB safety limit protects against malicious zstd frames that claim huge decompressed sizes in the header, preventing potential OOM attacks. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Co-authored-by: Claude Bot Co-authored-by: Claude --- src/bun.js/api/BunObject.zig | 53 ++----------------- src/deps/zstd.zig | 41 ++++++++++++++ src/js/node/zlib.ts | 23 +++++++- .../issue/23314/zstd-async-compress.test.ts | 37 +++++++++++++ .../23314/zstd-large-decompression.test.ts | 20 +++++++ .../issue/23314/zstd-large-input.test.ts | 40 ++++++++++++++ 6 files changed, 163 insertions(+), 51 deletions(-) create mode 100644 test/regression/issue/23314/zstd-async-compress.test.ts create mode 100644 test/regression/issue/23314/zstd-large-decompression.test.ts create mode 100644 test/regression/issue/23314/zstd-large-input.test.ts diff --git a/src/bun.js/api/BunObject.zig b/src/bun.js/api/BunObject.zig index c7c6d1a7e8..04bfbda834 100644 --- a/src/bun.js/api/BunObject.zig +++ b/src/bun.js/api/BunObject.zig @@ -1837,31 +1837,10 @@ pub const JSZstd = struct { const input = buffer.slice(); const allocator = bun.default_allocator; - // Try to get the decompressed size - const decompressed_size = bun.zstd.getDecompressedSize(input); - - if (decompressed_size == std.math.maxInt(c_ulonglong) - 1 or decompressed_size == std.math.maxInt(c_ulonglong) - 2) { - // If size is unknown, we'll need to decompress in chunks - return globalThis.ERR(.ZSTD, "Decompressed size is unknown. Either the input is not a valid zstd compressed buffer or the decompressed size is too large. If you run into this error with a valid input, please file an issue at https://github.com/oven-sh/bun/issues", .{}).throw(); - } - - // Allocate output buffer based on decompressed size - var output = try allocator.alloc(u8, decompressed_size); - - // Perform decompression - const actual_size = switch (bun.zstd.decompress(output, input)) { - .success => |actual_size| actual_size, - .err => |err| { - allocator.free(output); - return globalThis.ERR(.ZSTD, "{s}", .{err}).throw(); - }, + const output = bun.zstd.decompressAlloc(allocator, input) catch |err| { + return globalThis.ERR(.ZSTD, "Decompression failed: {s}", .{@errorName(err)}).throw(); }; - bun.debugAssert(actual_size <= output.len); - - // mimalloc doesn't care about the self-reported size of the slice. - output.len = actual_size; - return jsc.JSValue.createBuffer(globalThis, output); } @@ -1918,34 +1897,10 @@ pub const JSZstd = struct { }; } else { // Decompression path - // Try to get the decompressed size - const decompressed_size = bun.zstd.getDecompressedSize(input); - - if (decompressed_size == std.math.maxInt(c_ulonglong) - 1 or decompressed_size == std.math.maxInt(c_ulonglong) - 2) { - job.error_message = "Decompressed size is unknown. Either the input is not a valid zstd compressed buffer or the decompressed size is too large"; - return; - } - - // Allocate output buffer based on decompressed size - job.output = allocator.alloc(u8, decompressed_size) catch { - job.error_message = "Out of memory"; + job.output = bun.zstd.decompressAlloc(allocator, input) catch { + job.error_message = "Decompression failed"; return; }; - - // Perform decompression - switch (bun.zstd.decompress(job.output, input)) { - .success => |actual_size| { - if (actual_size < job.output.len) { - job.output.len = actual_size; - } - }, - .err => |err| { - allocator.free(job.output); - job.output = &[_]u8{}; - job.error_message = err; - return; - }, - } } } diff --git a/src/deps/zstd.zig b/src/deps/zstd.zig index e800a83ae5..9042702e46 100644 --- a/src/deps/zstd.zig +++ b/src/deps/zstd.zig @@ -33,6 +33,47 @@ pub fn decompress(dest: []u8, src: []const u8) Result { return .{ .success = result }; } +/// Decompress data, automatically allocating the output buffer. +/// Returns owned slice that must be freed by the caller. +/// Handles both frames with known and unknown content sizes. +/// For safety, if the reported decompressed size exceeds 16MB, streaming decompression is used instead. +pub fn decompressAlloc(allocator: std.mem.Allocator, src: []const u8) ![]u8 { + const size = getDecompressedSize(src); + + const ZSTD_CONTENTSIZE_UNKNOWN = std.math.maxInt(c_ulonglong); // 0ULL - 1 + const ZSTD_CONTENTSIZE_ERROR = std.math.maxInt(c_ulonglong) - 1; // 0ULL - 2 + const MAX_PREALLOCATE_SIZE = 16 * 1024 * 1024; // 16MB safety limit + + if (size == ZSTD_CONTENTSIZE_ERROR) { + return error.InvalidZstdData; + } + + // Use streaming decompression if: + // 1. Content size is unknown, OR + // 2. Reported size exceeds safety limit (to prevent malicious inputs claiming huge sizes) + if (size == ZSTD_CONTENTSIZE_UNKNOWN or size > MAX_PREALLOCATE_SIZE) { + var list = std.ArrayListUnmanaged(u8){}; + const reader = try ZstdReaderArrayList.init(src, &list, allocator); + defer reader.deinit(); + + try reader.readAll(true); + return try list.toOwnedSlice(allocator); + } + + // Fast path: size is known and within reasonable limits + const output = try allocator.alloc(u8, size); + errdefer allocator.free(output); + + const result = decompress(output, src); + return switch (result) { + .success => |actual_size| output[0..actual_size], + .err => { + allocator.free(output); + return error.DecompressionFailed; + }, + }; +} + pub fn getDecompressedSize(src: []const u8) usize { return ZSTD_findDecompressedSize(src.ptr, src.len); } diff --git a/src/js/node/zlib.ts b/src/js/node/zlib.ts index 8caa689aaf..5949e454ce 100644 --- a/src/js/node/zlib.ts +++ b/src/js/node/zlib.ts @@ -642,7 +642,7 @@ function Unzip(opts): void { } $toClass(Unzip, "Unzip", Zlib); -function createConvenienceMethod(ctor, sync, methodName) { +function createConvenienceMethod(ctor, sync, methodName, isZstd) { if (sync) { const fn = function (buffer, opts) { return zlibBufferSync(new ctor(opts), buffer); @@ -655,6 +655,25 @@ function createConvenienceMethod(ctor, sync, methodName) { callback = opts; opts = {}; } + // For zstd compression, we need to set pledgedSrcSize to the buffer size + // so that the content size is included in the frame header + if (isZstd) { + // Calculate buffer size + let bufferSize; + if (typeof buffer === "string") { + bufferSize = Buffer.byteLength(buffer); + } else if (isArrayBufferView(buffer)) { + bufferSize = buffer.byteLength; + } else if (isAnyArrayBuffer(buffer)) { + bufferSize = buffer.byteLength; + } else { + bufferSize = 0; + } + // Set pledgedSrcSize if not already set + if (!opts.pledgedSrcSize && bufferSize > 0) { + opts = { ...opts, pledgedSrcSize: bufferSize }; + } + } return zlibBuffer(new ctor(opts), buffer, callback); }; ObjectDefineProperty(fn, "name", { value: methodName }); @@ -813,7 +832,7 @@ const zlib = { brotliCompressSync: createConvenienceMethod(BrotliCompress, true, "brotliCompressSync"), brotliDecompress: createConvenienceMethod(BrotliDecompress, false, "brotliDecompress"), brotliDecompressSync: createConvenienceMethod(BrotliDecompress, true, "brotliDecompressSync"), - zstdCompress: createConvenienceMethod(ZstdCompress, false, "zstdCompress"), + zstdCompress: createConvenienceMethod(ZstdCompress, false, "zstdCompress", true), zstdCompressSync: createConvenienceMethod(ZstdCompress, true, "zstdCompressSync"), zstdDecompress: createConvenienceMethod(ZstdDecompress, false, "zstdDecompress"), zstdDecompressSync: createConvenienceMethod(ZstdDecompress, true, "zstdDecompressSync"), diff --git a/test/regression/issue/23314/zstd-async-compress.test.ts b/test/regression/issue/23314/zstd-async-compress.test.ts new file mode 100644 index 0000000000..17b04544d5 --- /dev/null +++ b/test/regression/issue/23314/zstd-async-compress.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, it } from "bun:test"; +import zlib from "node:zlib"; + +// The zlib sync and async implementations create different outputs +// This may not be a bug in itself, but the async version creates data that causes an out of memory error when decompressed with Bun.zstdDecompressSync +describe("zstd compression compatibility", () => { + it("should decompress data compressed with zlib.zstdCompressSync", () => { + const input = "hello world"; + const compressed = zlib.zstdCompressSync(input); + const decompressed = Bun.zstdDecompressSync(compressed); + expect(decompressed.toString()).toBe(input); + }); + + it("should decompress data compressed with zlib.zstdCompress (async)", async () => { + const input = "hello world"; + const compressed = await new Promise((resolve, reject) => { + zlib.zstdCompress(input, (err, result) => { + if (err) reject(err); + else resolve(result); + }); + }); + const decompressed = Bun.zstdDecompressSync(compressed); + expect(decompressed.toString()).toBe(input); + }); + + it("should decompress data compressed with zlib.zstdCompress using Bun.zstdDecompress", async () => { + const input = "hello world"; + const compressed = await new Promise((resolve, reject) => { + zlib.zstdCompress(input, (err, result) => { + if (err) reject(err); + else resolve(result); + }); + }); + const decompressed = await Bun.zstdDecompress(compressed); + expect(decompressed.toString()).toBe(input); + }); +}); diff --git a/test/regression/issue/23314/zstd-large-decompression.test.ts b/test/regression/issue/23314/zstd-large-decompression.test.ts new file mode 100644 index 0000000000..e376cd1037 --- /dev/null +++ b/test/regression/issue/23314/zstd-large-decompression.test.ts @@ -0,0 +1,20 @@ +import { expect, test } from "bun:test"; +import zlib from "node:zlib"; + +test("should handle large data decompression safely", async () => { + // Create data that decompresses to > 16MB + const input = "x".repeat(20 * 1024 * 1024); // 20MB of repeated data + + // Compress with pledgedSrcSize so the frame header includes the size + const compressed = await new Promise((resolve, reject) => { + zlib.zstdCompress(input, { pledgedSrcSize: input.length }, (err, result) => { + if (err) reject(err); + else resolve(result); + }); + }); + + // This should use streaming decompression because reported size > 16MB + const decompressed = Bun.zstdDecompressSync(compressed); + expect(decompressed.length).toBe(input.length); + expect(decompressed.toString()).toBe(input); +}); diff --git a/test/regression/issue/23314/zstd-large-input.test.ts b/test/regression/issue/23314/zstd-large-input.test.ts new file mode 100644 index 0000000000..5342ed2a83 --- /dev/null +++ b/test/regression/issue/23314/zstd-large-input.test.ts @@ -0,0 +1,40 @@ +import { describe, expect, it } from "bun:test"; +import zlib from "node:zlib"; + +describe("zstd compression with larger inputs", () => { + it("should handle larger strings", async () => { + const input = "hello world ".repeat(1000); + const compressed = await new Promise((resolve, reject) => { + zlib.zstdCompress(input, (err, result) => { + if (err) reject(err); + else resolve(result); + }); + }); + const decompressed = Bun.zstdDecompressSync(compressed); + expect(decompressed.toString()).toBe(input); + }); + + it("should handle buffers", async () => { + const input = Buffer.from("test data ".repeat(500)); + const compressed = await new Promise((resolve, reject) => { + zlib.zstdCompress(input, (err, result) => { + if (err) reject(err); + else resolve(result); + }); + }); + const decompressed = Bun.zstdDecompressSync(compressed); + expect(decompressed.toString()).toBe(input.toString()); + }); + + it("should respect custom pledgedSrcSize if provided", async () => { + const input = "custom test"; + const compressed = await new Promise((resolve, reject) => { + zlib.zstdCompress(input, { pledgedSrcSize: input.length }, (err, result) => { + if (err) reject(err); + else resolve(result); + }); + }); + const decompressed = Bun.zstdDecompressSync(compressed); + expect(decompressed.toString()).toBe(input); + }); +});