feat(node:zlib): Add dictionary support to zstdCompress and zstdDecompress

Implements dictionary support for ZSTD compression and decompression in node:zlib,
matching Node.js API. Dictionaries can significantly improve compression ratios
for data with predictable patterns.

Changes:
- Add compressUsingDict and decompressUsingDict functions to src/deps/zstd.zig
- Update NativeZstd.zig to handle dictionary loading in compression contexts
- Modify Zstd class in zlib.ts to accept dictionary option and pass to native code
- Add test case from Node.js test suite to verify functionality

The implementation uses ZSTD_CCtx_loadDictionary and ZSTD_DCtx_loadDictionary
APIs for streaming compression/decompression with dictionary support.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude Bot
2025-08-23 02:09:24 +00:00
parent 73fe9a4484
commit e213bc9cbe
4 changed files with 110 additions and 3 deletions

View File

@@ -61,14 +61,15 @@ pub fn estimatedSize(this: *const @This()) usize {
}
pub fn init(this: *@This(), globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue {
const arguments = callframe.argumentsAsArray(4);
const arguments = callframe.argumentsAsArray(5);
const this_value = callframe.this();
if (callframe.argumentsCount() != 4) return globalThis.ERR(.MISSING_ARGS, "init(initParamsArray, pledgedSrcSize, writeState, processCallback)", .{}).throw();
if (callframe.argumentsCount() < 4 or callframe.argumentsCount() > 5) return globalThis.ERR(.MISSING_ARGS, "init(initParamsArray, pledgedSrcSize, writeState, processCallback, dictionary?)", .{}).throw();
const initParamsArray_value = arguments[0];
const pledgedSrcSize_value = arguments[1];
const writeState_value = arguments[2];
const processCallback_value = arguments[3];
const dictionary_value = if (callframe.argumentsCount() >= 5) arguments[4] else jsc.JSValue.js_undefined;
const writeState = writeState_value.asArrayBuffer(globalThis) orelse return globalThis.throwInvalidArgumentTypeValue("writeState", "Uint32Array", writeState_value);
if (writeState.typed_array_type != .Uint32Array) return globalThis.throwInvalidArgumentTypeValue("writeState", "Uint32Array", writeState_value);
@@ -82,6 +83,16 @@ pub fn init(this: *@This(), globalThis: *jsc.JSGlobalObject, callframe: *jsc.Cal
pledged_src_size = try validators.validateUint32(globalThis, pledgedSrcSize_value, "pledgedSrcSize", .{}, false);
}
// Handle dictionary if provided
if (!dictionary_value.isUndefined()) {
const dictionary_buffer = dictionary_value.asArrayBuffer(globalThis) orelse return globalThis.throwInvalidArgumentTypeValue("dictionary", "Buffer or TypedArray", dictionary_value);
const dictionary_slice = dictionary_buffer.slice();
const dict_err = this.stream.setDictionary(dictionary_slice);
if (dict_err.isError()) {
return globalThis.ERR(.ZLIB_INITIALIZATION_FAILED, "{s}", .{std.mem.sliceTo(dict_err.msg.?, 0)}).throw();
}
}
var err = this.stream.init(pledged_src_size);
if (err.isError()) {
try impl.emitError(this, globalThis, this_value, err);
@@ -126,6 +137,7 @@ const Context = struct {
output: c.ZSTD_outBuffer = .{ .dst = null, .size = 0, .pos = 0 },
pledged_src_size: u64 = std.math.maxInt(u64),
remaining: u64 = 0,
dictionary: ?[]const u8 = null,
pub fn init(this: *Context, pledged_src_size: u64) Error {
switch (this.mode) {
@@ -136,18 +148,49 @@ const Context = struct {
this.state = state.?;
const result = c.ZSTD_CCtx_setPledgedSrcSize(state, pledged_src_size);
if (c.ZSTD_isError(result) > 0) return .init("Could not set pledged src size", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
// Load dictionary if provided
if (this.dictionary) |dict| {
const dict_result = c.ZSTD_CCtx_loadDictionary(@ptrCast(this.state), dict.ptr, dict.len);
if (c.ZSTD_isError(dict_result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
}
return .ok;
},
.ZSTD_DECOMPRESS => {
const state = c.ZSTD_createDCtx();
if (state == null) return .init("Could not initialize zstd instance", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
this.state = state.?;
// Load dictionary if provided
if (this.dictionary) |dict| {
const dict_result = c.ZSTD_DCtx_loadDictionary(@ptrCast(this.state), dict.ptr, dict.len);
if (c.ZSTD_isError(dict_result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
}
return .ok;
},
else => @panic("unreachable"),
}
}
pub fn setDictionary(this: *Context, dictionary: []const u8) Error {
this.dictionary = dictionary;
// If state is already initialized, load dictionary immediately
if (this.state) |state| {
switch (this.mode) {
.ZSTD_COMPRESS => {
const result = c.ZSTD_CCtx_loadDictionary(@ptrCast(state), dictionary.ptr, dictionary.len);
if (c.ZSTD_isError(result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
},
.ZSTD_DECOMPRESS => {
const result = c.ZSTD_DCtx_loadDictionary(@ptrCast(state), dictionary.ptr, dictionary.len);
if (c.ZSTD_isError(result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
},
else => {},
}
}
return .ok;
}
pub fn setParams(this: *Context, key: c_uint, value: u32) Error {
switch (this.mode) {
.ZSTD_COMPRESS => {

View File

@@ -33,6 +33,30 @@ pub fn decompress(dest: []u8, src: []const u8) Result {
return .{ .success = result };
}
/// ZSTD_compress_usingDict() :
/// Compression using a predefined Dictionary.
/// Dictionary should be built by training on a dataset of representative samples.
/// Compression with a dictionary is faster when used with the same content repeatedly.
/// @return : compressed size written into `dst` (<= `dstCapacity),
/// or an error code if it fails (which can be tested using ZSTD_isError()). */
pub fn compressUsingDict(dest: []u8, src: []const u8, dict: []const u8, level: ?i32) Result {
const result = c.ZSTD_compress_usingDict(dest.ptr, dest.len, src.ptr, src.len, dict.ptr, dict.len, level orelse c.ZSTD_defaultCLevel());
if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) };
return .{ .success = result };
}
/// ZSTD_decompress_usingDict() :
/// Decompression using a predefined Dictionary.
/// Dictionary should be built by training on a dataset of representative samples.
/// Dictionary must be the same as the one used during compression.
/// @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
/// or an errorCode if it fails (which can be tested using ZSTD_isError()). */
pub fn decompressUsingDict(dest: []u8, src: []const u8, dict: []const u8) Result {
const result = c.ZSTD_decompress_usingDict(dest.ptr, dest.len, src.ptr, src.len, dict.ptr, dict.len);
if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) };
return .{ .success = result };
}
pub fn getDecompressedSize(src: []const u8) usize {
return ZSTD_findDecompressedSize(src.ptr, src.len);
}

View File

@@ -740,12 +740,26 @@ class Zstd extends ZlibBase {
});
}
// Handle dictionary option
let dictionary;
if (opts?.dictionary !== undefined) {
if (!isArrayBufferView(opts.dictionary)) {
if (isAnyArrayBuffer(opts.dictionary)) {
dictionary = Buffer.from(opts.dictionary);
} else {
throw $ERR_INVALID_ARG_TYPE("options.dictionary", "Buffer, TypedArray, DataView, or ArrayBuffer", opts.dictionary);
}
} else {
dictionary = opts.dictionary;
}
}
const handle = new NativeZstd(mode);
const pledgedSrcSize = opts?.pledgedSrcSize ?? undefined;
const writeState = new Uint32Array(2);
handle.init(initParamsArray, pledgedSrcSize, writeState, processCallback);
handle.init(initParamsArray, pledgedSrcSize, writeState, processCallback, dictionary);
super(opts, mode, handle, zstdDefaultOpts);
this._writeState = writeState;
}

View File

@@ -0,0 +1,26 @@
'use strict';
const common = require('../common');
const assert = require('assert');
const zlib = require('zlib');
const dictionary = Buffer.from(
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`
);
const input = Buffer.from(
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.`
);
zlib.zstdCompress(input, { dictionary }, common.mustSucceed((compressed) => {
assert(compressed.length < input.length);
zlib.zstdDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => {
assert.strictEqual(decompressed.toString(), input.toString());
}));
}));