mirror of
https://github.com/oven-sh/bun
synced 2026-02-09 10:28:47 +00:00
feat(node:zlib): Add dictionary support to zstdCompress and zstdDecompress
Implements dictionary support for ZSTD compression and decompression in node:zlib, matching Node.js API. Dictionaries can significantly improve compression ratios for data with predictable patterns. Changes: - Add compressUsingDict and decompressUsingDict functions to src/deps/zstd.zig - Update NativeZstd.zig to handle dictionary loading in compression contexts - Modify Zstd class in zlib.ts to accept dictionary option and pass to native code - Add test case from Node.js test suite to verify functionality The implementation uses ZSTD_CCtx_loadDictionary and ZSTD_DCtx_loadDictionary APIs for streaming compression/decompression with dictionary support. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -61,14 +61,15 @@ pub fn estimatedSize(this: *const @This()) usize {
|
||||
}
|
||||
|
||||
pub fn init(this: *@This(), globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JSError!jsc.JSValue {
|
||||
const arguments = callframe.argumentsAsArray(4);
|
||||
const arguments = callframe.argumentsAsArray(5);
|
||||
const this_value = callframe.this();
|
||||
if (callframe.argumentsCount() != 4) return globalThis.ERR(.MISSING_ARGS, "init(initParamsArray, pledgedSrcSize, writeState, processCallback)", .{}).throw();
|
||||
if (callframe.argumentsCount() < 4 or callframe.argumentsCount() > 5) return globalThis.ERR(.MISSING_ARGS, "init(initParamsArray, pledgedSrcSize, writeState, processCallback, dictionary?)", .{}).throw();
|
||||
|
||||
const initParamsArray_value = arguments[0];
|
||||
const pledgedSrcSize_value = arguments[1];
|
||||
const writeState_value = arguments[2];
|
||||
const processCallback_value = arguments[3];
|
||||
const dictionary_value = if (callframe.argumentsCount() >= 5) arguments[4] else jsc.JSValue.js_undefined;
|
||||
|
||||
const writeState = writeState_value.asArrayBuffer(globalThis) orelse return globalThis.throwInvalidArgumentTypeValue("writeState", "Uint32Array", writeState_value);
|
||||
if (writeState.typed_array_type != .Uint32Array) return globalThis.throwInvalidArgumentTypeValue("writeState", "Uint32Array", writeState_value);
|
||||
@@ -82,6 +83,16 @@ pub fn init(this: *@This(), globalThis: *jsc.JSGlobalObject, callframe: *jsc.Cal
|
||||
pledged_src_size = try validators.validateUint32(globalThis, pledgedSrcSize_value, "pledgedSrcSize", .{}, false);
|
||||
}
|
||||
|
||||
// Handle dictionary if provided
|
||||
if (!dictionary_value.isUndefined()) {
|
||||
const dictionary_buffer = dictionary_value.asArrayBuffer(globalThis) orelse return globalThis.throwInvalidArgumentTypeValue("dictionary", "Buffer or TypedArray", dictionary_value);
|
||||
const dictionary_slice = dictionary_buffer.slice();
|
||||
const dict_err = this.stream.setDictionary(dictionary_slice);
|
||||
if (dict_err.isError()) {
|
||||
return globalThis.ERR(.ZLIB_INITIALIZATION_FAILED, "{s}", .{std.mem.sliceTo(dict_err.msg.?, 0)}).throw();
|
||||
}
|
||||
}
|
||||
|
||||
var err = this.stream.init(pledged_src_size);
|
||||
if (err.isError()) {
|
||||
try impl.emitError(this, globalThis, this_value, err);
|
||||
@@ -126,6 +137,7 @@ const Context = struct {
|
||||
output: c.ZSTD_outBuffer = .{ .dst = null, .size = 0, .pos = 0 },
|
||||
pledged_src_size: u64 = std.math.maxInt(u64),
|
||||
remaining: u64 = 0,
|
||||
dictionary: ?[]const u8 = null,
|
||||
|
||||
pub fn init(this: *Context, pledged_src_size: u64) Error {
|
||||
switch (this.mode) {
|
||||
@@ -136,18 +148,49 @@ const Context = struct {
|
||||
this.state = state.?;
|
||||
const result = c.ZSTD_CCtx_setPledgedSrcSize(state, pledged_src_size);
|
||||
if (c.ZSTD_isError(result) > 0) return .init("Could not set pledged src size", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
|
||||
|
||||
// Load dictionary if provided
|
||||
if (this.dictionary) |dict| {
|
||||
const dict_result = c.ZSTD_CCtx_loadDictionary(@ptrCast(this.state), dict.ptr, dict.len);
|
||||
if (c.ZSTD_isError(dict_result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
|
||||
}
|
||||
return .ok;
|
||||
},
|
||||
.ZSTD_DECOMPRESS => {
|
||||
const state = c.ZSTD_createDCtx();
|
||||
if (state == null) return .init("Could not initialize zstd instance", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
|
||||
this.state = state.?;
|
||||
|
||||
// Load dictionary if provided
|
||||
if (this.dictionary) |dict| {
|
||||
const dict_result = c.ZSTD_DCtx_loadDictionary(@ptrCast(this.state), dict.ptr, dict.len);
|
||||
if (c.ZSTD_isError(dict_result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
|
||||
}
|
||||
return .ok;
|
||||
},
|
||||
else => @panic("unreachable"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn setDictionary(this: *Context, dictionary: []const u8) Error {
|
||||
this.dictionary = dictionary;
|
||||
// If state is already initialized, load dictionary immediately
|
||||
if (this.state) |state| {
|
||||
switch (this.mode) {
|
||||
.ZSTD_COMPRESS => {
|
||||
const result = c.ZSTD_CCtx_loadDictionary(@ptrCast(state), dictionary.ptr, dictionary.len);
|
||||
if (c.ZSTD_isError(result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
|
||||
},
|
||||
.ZSTD_DECOMPRESS => {
|
||||
const result = c.ZSTD_DCtx_loadDictionary(@ptrCast(state), dictionary.ptr, dictionary.len);
|
||||
if (c.ZSTD_isError(result) > 0) return .init("Could not load dictionary", -1, "ERR_ZLIB_INITIALIZATION_FAILED");
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
return .ok;
|
||||
}
|
||||
|
||||
pub fn setParams(this: *Context, key: c_uint, value: u32) Error {
|
||||
switch (this.mode) {
|
||||
.ZSTD_COMPRESS => {
|
||||
|
||||
@@ -33,6 +33,30 @@ pub fn decompress(dest: []u8, src: []const u8) Result {
|
||||
return .{ .success = result };
|
||||
}
|
||||
|
||||
/// ZSTD_compress_usingDict() :
|
||||
/// Compression using a predefined Dictionary.
|
||||
/// Dictionary should be built by training on a dataset of representative samples.
|
||||
/// Compression with a dictionary is faster when used with the same content repeatedly.
|
||||
/// @return : compressed size written into `dst` (<= `dstCapacity),
|
||||
/// or an error code if it fails (which can be tested using ZSTD_isError()). */
|
||||
pub fn compressUsingDict(dest: []u8, src: []const u8, dict: []const u8, level: ?i32) Result {
|
||||
const result = c.ZSTD_compress_usingDict(dest.ptr, dest.len, src.ptr, src.len, dict.ptr, dict.len, level orelse c.ZSTD_defaultCLevel());
|
||||
if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) };
|
||||
return .{ .success = result };
|
||||
}
|
||||
|
||||
/// ZSTD_decompress_usingDict() :
|
||||
/// Decompression using a predefined Dictionary.
|
||||
/// Dictionary should be built by training on a dataset of representative samples.
|
||||
/// Dictionary must be the same as the one used during compression.
|
||||
/// @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
|
||||
/// or an errorCode if it fails (which can be tested using ZSTD_isError()). */
|
||||
pub fn decompressUsingDict(dest: []u8, src: []const u8, dict: []const u8) Result {
|
||||
const result = c.ZSTD_decompress_usingDict(dest.ptr, dest.len, src.ptr, src.len, dict.ptr, dict.len);
|
||||
if (c.ZSTD_isError(result) != 0) return .{ .err = bun.sliceTo(c.ZSTD_getErrorName(result), 0) };
|
||||
return .{ .success = result };
|
||||
}
|
||||
|
||||
pub fn getDecompressedSize(src: []const u8) usize {
|
||||
return ZSTD_findDecompressedSize(src.ptr, src.len);
|
||||
}
|
||||
|
||||
@@ -740,12 +740,26 @@ class Zstd extends ZlibBase {
|
||||
});
|
||||
}
|
||||
|
||||
// Handle dictionary option
|
||||
let dictionary;
|
||||
if (opts?.dictionary !== undefined) {
|
||||
if (!isArrayBufferView(opts.dictionary)) {
|
||||
if (isAnyArrayBuffer(opts.dictionary)) {
|
||||
dictionary = Buffer.from(opts.dictionary);
|
||||
} else {
|
||||
throw $ERR_INVALID_ARG_TYPE("options.dictionary", "Buffer, TypedArray, DataView, or ArrayBuffer", opts.dictionary);
|
||||
}
|
||||
} else {
|
||||
dictionary = opts.dictionary;
|
||||
}
|
||||
}
|
||||
|
||||
const handle = new NativeZstd(mode);
|
||||
|
||||
const pledgedSrcSize = opts?.pledgedSrcSize ?? undefined;
|
||||
|
||||
const writeState = new Uint32Array(2);
|
||||
handle.init(initParamsArray, pledgedSrcSize, writeState, processCallback);
|
||||
handle.init(initParamsArray, pledgedSrcSize, writeState, processCallback, dictionary);
|
||||
super(opts, mode, handle, zstdDefaultOpts);
|
||||
this._writeState = writeState;
|
||||
}
|
||||
|
||||
26
test/js/node/test/parallel/test-zlib-zstd-dictionary.js
Normal file
26
test/js/node/test/parallel/test-zlib-zstd-dictionary.js
Normal file
@@ -0,0 +1,26 @@
|
||||
'use strict';
|
||||
|
||||
const common = require('../common');
|
||||
const assert = require('assert');
|
||||
const zlib = require('zlib');
|
||||
|
||||
const dictionary = Buffer.from(
|
||||
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.`
|
||||
);
|
||||
|
||||
const input = Buffer.from(
|
||||
`Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.`
|
||||
);
|
||||
|
||||
zlib.zstdCompress(input, { dictionary }, common.mustSucceed((compressed) => {
|
||||
assert(compressed.length < input.length);
|
||||
zlib.zstdDecompress(compressed, { dictionary }, common.mustSucceed((decompressed) => {
|
||||
assert.strictEqual(decompressed.toString(), input.toString());
|
||||
}));
|
||||
}));
|
||||
Reference in New Issue
Block a user