From e213bc9cbefd27732761e75db5361f90145dcc80 Mon Sep 17 00:00:00 2001 From: Claude Bot Date: Sat, 23 Aug 2025 02:09:24 +0000 Subject: [PATCH] feat(node:zlib): Add dictionary support to zstdCompress and zstdDecompress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements dictionary support for ZSTD compression and decompression in node:zlib, matching Node.js API. Dictionaries can significantly improve compression ratios for data with predictable patterns. Changes: - Add compressUsingDict and decompressUsingDict functions to src/deps/zstd.zig - Update NativeZstd.zig to handle dictionary loading in compression contexts - Modify Zstd class in zlib.ts to accept dictionary option and pass to native code - Add test case from Node.js test suite to verify functionality The implementation uses ZSTD_CCtx_loadDictionary and ZSTD_DCtx_loadDictionary APIs for streaming compression/decompression with dictionary support. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/bun.js/node/zlib/NativeZstd.zig | 47 ++++++++++++++++++- src/deps/zstd.zig | 24 ++++++++++ src/js/node/zlib.ts | 16 ++++++- .../parallel/test-zlib-zstd-dictionary.js | 26 ++++++++++ 4 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 test/js/node/test/parallel/test-zlib-zstd-dictionary.js diff --git a/src/bun.js/node/zlib/NativeZstd.zig b/src/bun.js/node/zlib/NativeZstd.zig index 5254ca51fc..830c38bd25 100644 --- a/src/bun.js/node/zlib/NativeZstd.zig +++ b/src/bun.js/node/zlib/NativeZstd.zig @@ -61,14 +61,15 @@ pub fn estimatedSize(this: *const @This()) usize { } pub fn init(this: *@This(), globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue { - const arguments = callframe.argumentsAsArray(4); + const arguments = callframe.argumentsAsArray(5); const this_value = callframe.this(); - if (callframe.argumentsCount() != 4) return globalThis.ERR(.MISSING_ARGS, "init(initParamsArray, pledgedSrcSize, writeState, processCallback)", .{}).throw(); + if (callframe.argumentsCount() < 4 or callframe.argumentsCount() > 5) return globalThis.ERR(.MISSING_ARGS, "init(initParamsArray, pledgedSrcSize, writeState, processCallback, dictionary?)", .{}).throw(); const initParamsArray_value = arguments[0]; const pledgedSrcSize_value = arguments[1]; const writeState_value = arguments[2]; const processCallback_value = arguments[3]; + const dictionary_value = if (callframe.argumentsCount() >= 5) arguments[4] else jsc.JSValue.js_undefined; const writeState = writeState_value.asArrayBuffer(globalThis) orelse return globalThis.throwInvalidArgumentTypeValue("writeState", "Uint32Array", writeState_value); if (writeState.typed_array_type != .Uint32Array) return globalThis.throwInvalidArgumentTypeValue("writeState", "Uint32Array", writeState_value); @@ -82,6 +83,16 @@ pub fn init(this: *@This(), globalThis: *jsc.JSGlobalObject, callframe: *jsc.Cal pledged_src_size = try validators.validateUint32(globalThis, pledgedSrcSize_value, "pledgedSrcSize", .{}, false); } + // Handle dictionary if provided + if (!dictionary_value.isUndefined()) { + const dictionary_buffer = dictionary_value.asArrayBuffer(globalThis) orelse return globalThis.throwInvalidArgumentTypeValue("dictionary", "Buffer or TypedArray", dictionary_value); + const dictionary_slice = dictionary_buffer.slice(); + const dict_err = this.stream.setDictionary(dictionary_slice); + if (dict_err.isError()) { + return globalThis.ERR(.ZLIB_INITIALIZATION_FAILED, "{s}", .{std.mem.sliceTo(dict_err.msg.?, 0)}).throw(); + } + } + var err = this.stream.init(pledged_src_size); if (err.isError()) { try impl.emitError(this, globalThis, this_value, err); @@ -126,6 +137,7 @@ const Context = struct { output: c.ZSTD_outBuffer = .{ .dst = null, .size = 0, .pos = 0 }, pledged_src_size: u64 = std.math.maxInt(u64), remaining: u64 = 0, + dictionary: ?[]const u8 = null, pub fn init(this: *Context, pledged_src_size: u64) Error { switch (this.mode) { @@ -136,18 +148,49 @@ const Context = struct { this.state = state.?; const result = c.ZSTD_CCtx_setPledgedSrcSize(state, pledged_src_size); if (c.ZSTD_isError(result) > 0) return .init("Could not set pledged src size", -1, "ERR_ZLIB_INITIALIZATION_FAILED"); + + // Load dictionary if provided + if (this.dictionary) |dict| { + const dict_result = c.ZSTD_CCtx_loadDictionary(@ptrCast(this.state), dict.ptr, dict.len); + if (c.ZSTD_isError(dict_result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED"); + } return .ok; }, .ZSTD_DECOMPRESS => { const state = c.ZSTD_createDCtx(); if (state == null) return .init("Could not initialize zstd instance", -1, "ERR_ZLIB_INITIALIZATION_FAILED"); this.state = state.?; + + // Load dictionary if provided + if (this.dictionary) |dict| { + const dict_result = c.ZSTD_DCtx_loadDictionary(@ptrCast(this.state), dict.ptr, dict.len); + if (c.ZSTD_isError(dict_result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED"); + } return .ok; }, else => @panic("unreachable"), } } + pub fn setDictionary(this: *Context, dictionary: []const u8) Error { + this.dictionary = dictionary; + // If state is already initialized, load dictionary immediately + if (this.state) |state| { + switch (this.mode) { + .ZSTD_COMPRESS => { + const result = c.ZSTD_CCtx_loadDictionary(@ptrCast(state), dictionary.ptr, dictionary.len); + if (c.ZSTD_isError(result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED"); + }, + .ZSTD_DECOMPRESS => { + const result = c.ZSTD_DCtx_loadDictionary(@ptrCast(state), dictionary.ptr, dictionary.len); + if (c.ZSTD_isError(result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED"); + }, + else => {}, + } + } + return .ok; + } + pub fn setParams(this: *Context, key: c_uint, value: u32) Error { switch (this.mode) { .ZSTD_COMPRESS => { diff --git a/src/deps/zstd.zig b/src/deps/zstd.zig index e2b8016ff6..d67505c239 100644 --- a/src/deps/zstd.zig +++ b/src/deps/zstd.zig @@ -33,6 +33,30 @@ pub fn decompress(dest: []u8, src: []const u8) Result { return .{ .success = result }; } +/// ZSTD_compress_usingDict() : +/// Compression using a predefined Dictionary. +/// Dictionary should be built by training on a dataset of representative samples. +/// Compression with a dictionary is faster when used with the same content repeatedly. +/// @return : compressed size written into `dst` (<= `dstCapacity), +/// or an error code if it fails (which can be tested using ZSTD_isError()). */ +pub fn compressUsingDict(dest: []u8, src: []const u8, dict: []const u8, level: ?i32) Result { + const result = c.ZSTD_compress_usingDict(dest.ptr, dest.len, src.ptr, src.len, dict.ptr, dict.len, level orelse c.ZSTD_defaultCLevel()); + if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) }; + return .{ .success = result }; +} + +/// ZSTD_decompress_usingDict() : +/// Decompression using a predefined Dictionary. +/// Dictionary should be built by training on a dataset of representative samples. +/// Dictionary must be the same as the one used during compression. +/// @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), +/// or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +pub fn decompressUsingDict(dest: []u8, src: []const u8, dict: []const u8) Result { + const result = c.ZSTD_decompress_usingDict(dest.ptr, dest.len, src.ptr, src.len, dict.ptr, dict.len); + if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) }; + return .{ .success = result }; +} + pub fn getDecompressedSize(src: []const u8) usize { return ZSTD_findDecompressedSize(src.ptr, src.len); } diff --git a/src/js/node/zlib.ts b/src/js/node/zlib.ts index 57902037a0..93f40d2cba 100644 --- a/src/js/node/zlib.ts +++ b/src/js/node/zlib.ts @@ -740,12 +740,26 @@ class Zstd extends ZlibBase { }); } + // Handle dictionary option + let dictionary; + if (opts?.dictionary !== undefined) { + if (!isArrayBufferView(opts.dictionary)) { + if (isAnyArrayBuffer(opts.dictionary)) { + dictionary = Buffer.from(opts.dictionary); + } else { + throw $ERR_INVALID_ARG_TYPE("options.dictionary", "Buffer, TypedArray, DataView, or ArrayBuffer", opts.dictionary); + } + } else { + dictionary = opts.dictionary; + } + } + const handle = new NativeZstd(mode); const pledgedSrcSize = opts?.pledgedSrcSize ?? undefined; const writeState = new Uint32Array(2); - handle.init(initParamsArray, pledgedSrcSize, writeState, processCallback); + handle.init(initParamsArray, pledgedSrcSize, writeState, processCallback, dictionary); super(opts, mode, handle, zstdDefaultOpts); this._writeState = writeState; } diff --git a/test/js/node/test/parallel/test-zlib-zstd-dictionary.js b/test/js/node/test/parallel/test-zlib-zstd-dictionary.js new file mode 100644 index 0000000000..28dde28cb0 --- /dev/null +++ b/test/js/node/test/parallel/test-zlib-zstd-dictionary.js @@ -0,0 +1,26 @@ +'use strict'; + +const common = require('../common'); +const assert = require('assert'); +const zlib = require('zlib'); + +const dictionary = Buffer.from( + `Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.` +); + +const input = Buffer.from( + `Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. + Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.` +); + +zlib.zstdCompress(input, { dictionary }, common.mustSucceed((compressed) => { + assert(compressed.length < input.length); + zlib.zstdDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => { + assert.strictEqual(decompressed.toString(), input.toString()); + })); +}));