diff --git a/src/api/schema.zig b/src/api/schema.zig index 7ab4d400e0..9aa8d26766 100644 --- a/src/api/schema.zig +++ b/src/api/schema.zig @@ -1936,6 +1936,9 @@ pub const Api = struct { external, linked, + + /// compact + compact, _, diff --git a/src/bun.js/javascript.zig b/src/bun.js/javascript.zig index a24406ea04..f26b41a53d 100644 --- a/src/bun.js/javascript.zig +++ b/src/bun.js/javascript.zig @@ -241,7 +241,17 @@ pub const SavedSourceMap = struct { pub const HashTable = std.HashMap(u64, *anyopaque, IdentityContext(u64), 80); pub fn onSourceMapChunk(this: *SavedSourceMap, chunk: SourceMap.Chunk, source: logger.Source) anyerror!void { - try this.putMappings(source, chunk.buffer); + // If we have compact sourcemap data, we need to handle it specially + if (chunk.compact_data != null) { + // For now, just convert the compact data to a regular buffer + const allocator = bun.default_allocator; + var temp_buffer = bun.MutableString.initEmpty(allocator); + try chunk.compact_data.?.writeVLQs(&temp_buffer); + try this.putMappings(source, temp_buffer); + } else { + // Standard VLQ format + try this.putMappings(source, chunk.buffer); + } } pub const SourceMapHandler = js_printer.SourceMapHandler.For(SavedSourceMap, onSourceMapChunk); @@ -270,7 +280,6 @@ pub const SavedSourceMap = struct { } pub fn putMappings(this: *SavedSourceMap, source: logger.Source, mappings: MutableString) !void { - try this.putValue(source.path.text, Value.init(bun.cast(*SavedMappings, mappings.list.items.ptr))); } fn putValue(this: *SavedSourceMap, path: []const u8, value: Value) !void { diff --git a/src/bundler/bundle_v2.zig b/src/bundler/bundle_v2.zig index 50e635baf7..6613c9cb35 100644 --- a/src/bundler/bundle_v2.zig +++ b/src/bundler/bundle_v2.zig @@ -14122,7 +14122,7 @@ pub const LinkerContext = struct { ); switch (chunk.content.sourcemap(c.options.source_maps)) { - .external, .linked => |tag| { + .external, .linked, .compact => |tag| { const output_source_map = chunk.output_source_map.finalize(bun.default_allocator, code_result.shifts) catch @panic("Failed to allocate memory for external source map"); var source_map_final_rel_path = default_allocator.alloc(u8, chunk.final_rel_path.len + ".map".len) catch unreachable; bun.copy(u8, source_map_final_rel_path, chunk.final_rel_path); @@ -14442,7 +14442,7 @@ pub const LinkerContext = struct { ); switch (chunk.content.sourcemap(c.options.source_maps)) { - .external, .linked => |tag| { + .external, .linked, .compact => |tag| { const output_source_map = chunk.output_source_map.finalize(source_map_allocator, code_result.shifts) catch @panic("Failed to allocate memory for external source map"); const source_map_final_rel_path = strings.concat(default_allocator, &.{ chunk.final_rel_path, diff --git a/src/js_printer.zig b/src/js_printer.zig index 305baf3a88..64e68c1785 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -10,6 +10,7 @@ const Lock = bun.Mutex; const Api = @import("./api/schema.zig").Api; const fs = @import("fs.zig"); const bun = @import("root").bun; + const string = bun.string; const Output = bun.Output; const Global = bun.Global; @@ -422,13 +423,16 @@ pub const SourceMapHandler = struct { const Callback = *const fn (*anyopaque, chunk: SourceMap.Chunk, source: logger.Source) anyerror!void; pub fn onSourceMapChunk(self: *const @This(), chunk: SourceMap.Chunk, source: logger.Source) anyerror!void { + // Ensure proper alignment when calling the callback try self.callback(self.ctx, chunk, source); } pub fn For(comptime Type: type, comptime handler: (fn (t: *Type, chunk: SourceMap.Chunk, source: logger.Source) anyerror!void)) type { return struct { pub fn onChunk(self: *anyopaque, chunk: SourceMap.Chunk, source: logger.Source) anyerror!void { - try handler(@as(*Type, @ptrCast(@alignCast(self))), chunk, source); + // Make sure we properly align the self pointer to the Type's alignment requirements + const aligned_self = @as(*Type, @ptrCast(@alignCast(self))); + try handler(aligned_self, chunk, source); } pub fn init(self: *Type) SourceMapHandler { @@ -449,10 +453,15 @@ pub const Options = struct { runtime_imports: runtime.Runtime.Imports = runtime.Runtime.Imports{}, module_hash: u32 = 0, source_path: ?fs.Path = null, + use_compact_sourcemap: bool = false, allocator: std.mem.Allocator = default_allocator, source_map_allocator: ?std.mem.Allocator = null, source_map_handler: ?SourceMapHandler = null, - source_map_builder: ?*bun.sourcemap.Chunk.Builder = null, + source_map_builder: union(enum) { + none: void, + default: *bun.sourcemap.Chunk.Builder, + compact: *bun.sourcemap.Chunk.CompactBuilder, + } = .none, css_import_behavior: Api.CssInJsBehavior = Api.CssInJsBehavior.facade, target: options.Target = .browser, @@ -688,7 +697,7 @@ fn NewPrinter( renamer: rename.Renamer, prev_stmt_tag: Stmt.Tag = .s_empty, - source_map_builder: SourceMap.Chunk.Builder = undefined, + source_map_builder: SourceMap.Chunk.AnyBuilder = undefined, symbol_counter: u32 = 0, @@ -5203,7 +5212,7 @@ fn NewPrinter( import_records: []const ImportRecord, opts: Options, renamer: bun.renamer.Renamer, - source_map_builder: SourceMap.Chunk.Builder, + source_map_builder: SourceMap.Chunk.AnyBuilder, ) Printer { if (imported_module_ids_list_unset) { imported_module_ids_list = std.ArrayList(u32).init(default_allocator); @@ -5222,11 +5231,12 @@ fn NewPrinter( }; if (comptime generate_source_map) { // This seems silly to cache but the .items() function apparently costs 1ms according to Instruments. - printer.source_map_builder.line_offset_table_byte_offset_list = + printer.source_map_builder.set_line_offset_table_byte_offset_list( printer - .source_map_builder - .line_offset_tables - .items(.byte_offset_to_start_of_line); + .source_map_builder + .line_offset_tables() + .items(.byte_offset_to_start_of_line), + ); } return printer; @@ -5666,30 +5676,74 @@ pub fn getSourceMapBuilder( opts: Options, source: *const logger.Source, tree: *const Ast, -) SourceMap.Chunk.Builder { - if (comptime generate_source_map == .disable) - return undefined; +) SourceMap.Chunk.AnyBuilder { + if (comptime generate_source_map == .disable) { + return .none; + } - return .{ - .source_map = .init( - opts.source_map_allocator orelse opts.allocator, - is_bun_platform and generate_source_map == .lazy, - ), - .cover_lines_without_mappings = true, - .approximate_input_line_count = tree.approximate_newline_count, - .prepend_count = is_bun_platform and generate_source_map == .lazy, - .line_offset_tables = opts.line_offset_tables orelse brk: { - if (generate_source_map == .lazy) break :brk SourceMap.LineOffsetTable.generate( - opts.source_map_allocator orelse opts.allocator, - source.contents, - @as( - i32, - @intCast(tree.approximate_newline_count), - ), - ); - break :brk .empty; - }, + const allocator = opts.source_map_allocator orelse opts.allocator; + const line_offset_tables = opts.line_offset_tables orelse line_tables: { + if (generate_source_map == .lazy) { + break :line_tables SourceMap.LineOffsetTable.generate(allocator, source.contents, @as(i32, @intCast(tree.approximate_newline_count))); + } + break :line_tables SourceMap.LineOffsetTable.List{}; }; + + // Common builder configuration + const prepend_count = is_bun_platform and generate_source_map == .lazy; + const approximate_line_count = tree.approximate_newline_count; + const cover_lines = true; // cover_lines_without_mappings + + if (opts.use_compact_sourcemap) { + // Initialize the SourceMapper for the CompactBuilder + const format_type = SourceMap.Chunk.SourceMapFormat(@import("sourcemap/compact.zig").Format); + const source_mapper = format_type.init(allocator, prepend_count); + + // Initialize the compact sourcemap builder + var builder = SourceMap.Chunk.CompactBuilder{ + .cover_lines_without_mappings = cover_lines, + .approximate_input_line_count = approximate_line_count, + .prepend_count = prepend_count, + .line_offset_tables = line_offset_tables, + .input_source_map = null, + .source_map = source_mapper, + .prev_state = .{}, + .last_generated_update = 0, + .generated_column = 0, + .prev_loc = bun.logger.Loc.Empty, + .has_prev_state = false, + .line_offset_table_byte_offset_list = &[_]u32{}, + .line_starts_with_mapping = false, + }; + + // Use the AnyBuilder union to return the correct type + // Ensure it's properly initialized to prevent alignment issues + return SourceMap.Chunk.AnyBuilder{ .compact = builder }; + } else { + // Initialize the SourceMapper for the Builder + const format_type = SourceMap.Chunk.SourceMapFormat(SourceMap.Chunk.VLQSourceMap); + const source_mapper = format_type.init(allocator, prepend_count); + + // Initialize the default sourcemap builder + const builder = SourceMap.Chunk.Builder{ + .cover_lines_without_mappings = cover_lines, + .approximate_input_line_count = approximate_line_count, + .prepend_count = prepend_count, + .line_offset_tables = line_offset_tables, + .input_source_map = null, + .source_map = source_mapper, + .prev_state = .{}, + .last_generated_update = 0, + .generated_column = 0, + .prev_loc = bun.logger.Loc.Empty, + .has_prev_state = false, + .line_offset_table_byte_offset_list = &[_]u32{}, + .line_starts_with_mapping = false, + }; + + // Use the AnyBuilder union to return the correct type + return SourceMap.Chunk.AnyBuilder{ .default = builder }; + } } pub fn printAst( @@ -5796,7 +5850,7 @@ pub fn printAst( ); defer { if (comptime generate_source_map) { - printer.source_map_builder.line_offset_tables.deinit(opts.allocator); + printer.source_map_builder.line_offset_tables().deinit(opts.allocator); } } var bin_stack_heap = std.heap.stackFallback(1024, bun.default_allocator); @@ -6061,6 +6115,9 @@ pub fn printWithWriterAndPlatform( const chunk = printer.source_map_builder.generateChunk(written); if (chunk.should_ignore) break :brk null; + + // Conversion to compact format handled separately in the cli + break :brk chunk; } else null; @@ -6118,7 +6175,11 @@ pub fn printCommonJS( if (comptime generate_source_map) { if (opts.source_map_handler) |handler| { - try handler.onSourceMapChunk(printer.source_map_builder.generateChunk(printer.writer.ctx.getWritten()), source.*); + const chunk = printer.source_map_builder.generateChunk(printer.writer.ctx.getWritten()); + + // Conversion to compact format handled separately in the cli + + try handler.onSourceMapChunk(chunk, source.*); } } diff --git a/src/options.zig b/src/options.zig index 2fb6b7b854..11338dc950 100644 --- a/src/options.zig +++ b/src/options.zig @@ -1426,12 +1426,14 @@ pub const SourceMapOption = enum { @"inline", external, linked, + compact, pub fn fromApi(source_map: ?Api.SourceMapMode) SourceMapOption { return switch (source_map orelse .none) { .external => .external, .@"inline" => .@"inline", .linked => .linked, + .compact => .compact, else => .none, }; } @@ -1441,22 +1443,28 @@ pub const SourceMapOption = enum { .external => .external, .@"inline" => .@"inline", .linked => .linked, + .compact => .compact, .none => .none, }; } pub fn hasExternalFiles(mode: SourceMapOption) bool { return switch (mode) { - .linked, .external => true, + .linked, .external, .compact => true, else => false, }; } + pub fn shouldUseCompactFormat(mode: SourceMapOption) bool { + return mode == .compact; + } + pub const Map = bun.ComptimeStringMap(SourceMapOption, .{ .{ "none", .none }, .{ "inline", .@"inline" }, .{ "external", .external }, .{ "linked", .linked }, + .{ "compact", .compact }, }); }; @@ -1498,6 +1506,7 @@ pub const BundleOptions = struct { emit_decorator_metadata: bool = false, auto_import_jsx: bool = true, allow_runtime: bool = true, + trim_unused_imports: ?bool = null, mark_builtins_as_external: bool = false, diff --git a/src/sourcemap/compact.zig b/src/sourcemap/compact.zig new file mode 100644 index 0000000000..d5bb99ac14 --- /dev/null +++ b/src/sourcemap/compact.zig @@ -0,0 +1,1489 @@ +const std = @import("std"); +// Import bun directly, don't re-export it +const bun = @import("root").bun; +const string = bun.string; +const assert = bun.assert; +const strings = bun.strings; + +const SourceMap = @import("sourcemap.zig"); +const Mapping = SourceMap.Mapping; +const LineColumnOffset = SourceMap.LineColumnOffset; + +/// Import and re-export the compact sourcemap implementation +pub const double_delta_encoding = @import("compact/delta_encoding.zig"); +pub const simd_helpers = @import("compact/simd_helpers.zig"); + +pub const DoubleDeltaEncoder = double_delta_encoding.DoubleDeltaEncoder; +pub const SIMDHelpers = simd_helpers.SIMDHelpers; +pub const CompactSourceMap = @This(); + +/// Magic bytes to identify a compact sourcemap +pub const MAGIC: u32 = 0x43534D32; // "CSM2" +pub const VERSION: u32 = 1; + +/// Block-based storage of mappings for better locality +blocks: std.ArrayListUnmanaged(Block) = .{}, + +/// Total number of mappings +mapping_count: usize = 0, + +/// Original input line count +input_line_count: usize = 0, + +/// Sources count (for validation purposes) +sources_count: usize = 0, + +/// Current block being built +current_block: Block = Block{ + .base = .{ + .generated_line = 0, + .generated_column = 0, + .original_line = 0, + .original_column = 0, + .source_index = 0, + }, + .data = &[_]u8{}, + .count = 0, +}, + +/// Mapping count in current block +current_block_count: u16 = 0, + +/// Current block buffer for encoding data +current_block_buffer: std.ArrayListUnmanaged(u8) = .{}, + +/// Last mapping for delta calculations +last_mapping: Mapping = .{ + .generated = .{ + .lines = 0, + .columns = 0, + }, + .original = .{ + .lines = 0, + .columns = 0, + }, + .source_index = 0, +}, +/// Previous delta values for double-delta encoding +prev_deltas: struct { + gen_line: i32 = 0, + gen_col: i32 = 0, + src_idx: i32 = 0, + orig_line: i32 = 0, + orig_col: i32 = 0, +} = .{}, + +/// The allocator to use for all memory operations +allocator: std.mem.Allocator, + +/// The Format type is the builder API for incrementally creating a CompactSourceMap +pub const Format = struct { + /// Reference to the actual compact sourcemap being built incrementally + map: CompactSourceMap, + + /// Last mapping state for delta calculations + last_state: SourceMap.SourceMapState = .{}, + + /// Track approximate source line count for optimizations + approximate_input_line_count: usize = 0, + + /// Base64-encoded mappings for inline sourcemaps (cache) + base64_mappings: ?[]u8 = null, + + /// Temporary buffer for compatibility with the SourceMapFormat interface + temp_buffer: bun.MutableString, + + pub fn init(allocator: std.mem.Allocator, _: bool) Format { + // Create a new compact sourcemap with minimal initialization + const new_map = allocator.create(CompactSourceMap) catch unreachable; + + new_map.* = CompactSourceMap{ + .blocks = .{}, + .mapping_count = 1, + .input_line_count = 0, + .sources_count = 0, + .current_block_count = 1, + .current_block_buffer = .{}, + .current_block = Block.fromMapping(.{ + .generated = .{ .lines = 0, .columns = 0 }, + .original = .{ .lines = 0, .columns = 0 }, + .source_index = 0, + }), + .last_mapping = .{ + .generated = .{ .lines = 0, .columns = 0 }, + .original = .{ .lines = 0, .columns = 0 }, + .source_index = 0, + }, + .prev_deltas = .{}, + .allocator = allocator, + }; + + return .{ + .map = new_map.*, + .temp_buffer = bun.MutableString.initEmpty(allocator), + }; + } + + pub fn appendLineSeparator(this: *Format) !void { + // Update the state to track that we're on a new line + this.last_state.generated_line += 1; + this.last_state.generated_column = 0; + } + + pub fn append(this: *Format, current_state: SourceMap.SourceMapState, prev_state: SourceMap.SourceMapState) !void { + _ = prev_state; // Only needed for VLQ encoding + + // Create the current mapping + const mapping = Mapping{ + .generated = .{ + .lines = current_state.generated_line, + .columns = current_state.generated_column, + }, + .original = .{ + .lines = current_state.original_line, + .columns = current_state.original_column, + }, + .source_index = current_state.source_index, + }; + + // Track sources count for validation + if (current_state.source_index >= 0) { + this.map.sources_count = @max(this.map.sources_count, @as(usize, @intCast(current_state.source_index)) + 1); + } + + // Directly add the mapping to the compact sourcemap + try this.map.addMapping(mapping); + + // Update state + this.last_state = current_state; + + // Clear any cached base64 mappings since we've modified the data + if (this.base64_mappings) |mappings| { + this.map.allocator.free(mappings); + this.base64_mappings = null; + } + } + + pub fn shouldIgnore(this: Format) bool { + return this.map.mapping_count == 0; + } + + pub fn getBuffer(this: Format) bun.MutableString { + // The compact format doesn't actually use a buffer for its internal representation + // This is only here to satisfy the interface requirements + return this.temp_buffer; + } + + pub fn getCount(this: Format) usize { + return this.map.mapping_count; + } + + /// Finalize and get the CompactSourceMap reference + pub fn getCompactSourceMap(this: *Format) !CompactSourceMap { + // Finalize any pending block + try this.map.finalizeCurrentBlock(); + + // Update input line count from our tracking + this.map.input_line_count = this.approximate_input_line_count; + + return this.map.*; + } + + /// Get base64-encoded mappings for inline sourcemaps + pub fn getBase64Mappings(this: *Format) ![]const u8 { + // Return cached base64 mappings if available + if (this.base64_mappings) |mappings| { + return mappings; + } + + // Finalize any pending block + try this.map.finalizeCurrentBlock(); + + // Create a complete map of all blocks + const map = this.map; + + // Get base64 encoding directly from the compact map + this.base64_mappings = try map.getInlineBase64(map.allocator); + + return this.base64_mappings.?; + } + + pub fn deinit(this: *Format) void { + // Free the compact map (which will free all the blocks) + this.map.deinit(); + + // Free the map struct itself + this.map.allocator.destroy(this.map); + + // Free the base64 cache if any + if (this.base64_mappings) |mappings| { + this.map.allocator.free(mappings); + } + + // Free the temporary buffer + this.temp_buffer.deinit(); + } +}; + +/// Block-based storage for efficient processing +pub const Block = struct { + /// Base values for the block (first mapping in absolute terms) + base: BaseValues, + + /// Compact double-delta encoded data + data: []u8, + + /// Number of mappings in this block + count: u16, + + /// Base values for delta encoding + pub const BaseValues = struct { + generated_line: i32, + generated_column: i32, + source_index: i32, + original_line: i32, + original_column: i32, + }; + + /// Maximum number of mappings per block for optimal SIMD processing + pub const BLOCK_SIZE: u16 = 64; + + /// Create an empty block with the given base values + pub fn init(base_values: BaseValues) Block { + return .{ + .base = base_values, + .data = &[_]u8{}, + .count = 1, // Base mapping counts as 1 + }; + } + + /// Create an empty block from a mapping + pub fn fromMapping(mapping: Mapping) Block { + return Block.init(.{ + .generated_line = mapping.generatedLine(), + .generated_column = mapping.generatedColumn(), + .source_index = mapping.sourceIndex(), + .original_line = mapping.originalLine(), + .original_column = mapping.originalColumn(), + }); + } + + /// Free memory associated with a block + pub fn deinit(self: *Block, allocator: std.mem.Allocator) void { + if (self.data.len > 0) { + allocator.free(self.data); + } + } +}; + +/// Create a new, empty CompactSourceMap +pub fn create(allocator: std.mem.Allocator) !CompactSourceMap { + return CompactSourceMap{ + .blocks = std.ArrayList(Block).init(allocator), + .mapping_count = 0, + .input_line_count = 0, + .sources_count = 0, + .current_block_count = 0, + .current_block_buffer = std.ArrayList(u8).init(allocator), + .current_block = undefined, // Will be initialized on first mapping + .last_mapping = undefined, // Will be initialized on first mapping + .prev_deltas = .{}, + .allocator = allocator, + }; +} + +/// Add a new mapping to the sourcemap incrementally +pub fn addMapping(self: *CompactSourceMap, mapping: Mapping) !void { + // Handle the first mapping which initializes the first block + if (self.mapping_count == 0) { + self.current_block = Block.fromMapping(mapping); + self.last_mapping = mapping; + self.mapping_count = 1; + self.current_block_count = 1; + return; + } + + // Check if we need to start a new block + if (self.current_block_count >= Block.BLOCK_SIZE) { + try self.finalizeCurrentBlock(); + + // Start a new block with this mapping as the base + self.current_block = Block.fromMapping(mapping); + self.last_mapping = mapping; + self.current_block_count = 1; + self.prev_deltas = .{}; + self.mapping_count += 1; + return; + } + + // Calculate deltas from the last mapping + const gen_line_delta = mapping.generatedLine() - self.last_mapping.generatedLine(); + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() // If we changed lines, column is absolute + else + mapping.generatedColumn() - self.last_mapping.generatedColumn(); + + const src_idx_delta = mapping.sourceIndex() - self.last_mapping.sourceIndex(); + const orig_line_delta = mapping.originalLine() - self.last_mapping.originalLine(); + const orig_col_delta = mapping.originalColumn() - self.last_mapping.originalColumn(); + + // Calculate double-delta values + const gen_line_dod = gen_line_delta - self.prev_deltas.gen_line; + const gen_col_dod = gen_col_delta - self.prev_deltas.gen_col; + const src_idx_dod = src_idx_delta - self.prev_deltas.src_idx; + const orig_line_dod = orig_line_delta - self.prev_deltas.orig_line; + const orig_col_dod = orig_col_delta - self.prev_deltas.orig_col; + + // Encode and append to the current block buffer + var temp_buffer: [16]u8 = undefined; + + // Ensure we have capacity in the buffer + try self.current_block_buffer.ensureUnusedCapacity(self.allocator, 20); // Overestimate for safety + + // Encode each value + const gen_line_size = DoubleDeltaEncoder.encode(&temp_buffer, gen_line_dod); + try self.current_block_buffer.appendSlice(self.allocator, temp_buffer[0..gen_line_size]); + + const gen_col_size = DoubleDeltaEncoder.encode(&temp_buffer, gen_col_dod); + try self.current_block_buffer.appendSlice(self.allocator, temp_buffer[0..gen_col_size]); + + const src_idx_size = DoubleDeltaEncoder.encode(&temp_buffer, src_idx_dod); + try self.current_block_buffer.appendSlice(self.allocator, temp_buffer[0..src_idx_size]); + + const orig_line_size = DoubleDeltaEncoder.encode(&temp_buffer, orig_line_dod); + try self.current_block_buffer.appendSlice(self.allocator, temp_buffer[0..orig_line_size]); + + const orig_col_size = DoubleDeltaEncoder.encode(&temp_buffer, orig_col_dod); + try self.current_block_buffer.appendSlice(self.allocator, temp_buffer[0..orig_col_size]); + + // Update last deltas for next double-delta calculation + self.prev_deltas.gen_line = gen_line_delta; + self.prev_deltas.gen_col = gen_col_delta; + self.prev_deltas.src_idx = src_idx_delta; + self.prev_deltas.orig_line = orig_line_delta; + self.prev_deltas.orig_col = orig_col_delta; + + // Update last mapping and counts + self.last_mapping = mapping; + self.current_block_count += 1; + self.mapping_count += 1; +} + +/// Finalize the current block and add it to blocks list +fn finalizeCurrentBlock(self: *CompactSourceMap) !void { + if (self.current_block_count <= 1) { + return; // Only base mapping, nothing to do + } + + // Allocate and copy the data from the buffer + const data = try self.allocator.alloc(u8, self.current_block_buffer.items.len); + @memcpy(data, self.current_block_buffer.items); + + // Set the data and count on the current block + self.current_block.data = data; + self.current_block.count = self.current_block_count; + + // Add to blocks + try self.blocks.append(self.allocator, self.current_block); + + // We keep all blocks in memory for JavaScript files which can be large + + // Reset the current block buffer + self.current_block_buffer.clearRetainingCapacity(); +} + +/// Const version of finalizeCurrentBlock that can work with const CompactSourceMap +/// This doesn't actually modify the structure, just ensures no pending work is lost +fn finalizeCurrentBlockConst(self: *const CompactSourceMap) !void { + // If we're a const reference, we don't actually finalize anything + // This is just for compatibility with code that calls this method on a const ref + return; +} + +/// Get the total memory usage of this compact sourcemap +pub fn getMemoryUsage(self: CompactSourceMap) usize { + var total: usize = @sizeOf(CompactSourceMap); + + // Add the block array size + total += self.blocks.items.len * @sizeOf(Block); + + // Add the size of all block data + for (self.blocks.items) |block| { + total += block.data.len; + } + + // Add current block buffer size + total += self.current_block_buffer.items.len; + + return total; +} + +/// Create a CompactSourceMap from standard sourcemap data +pub fn init( + allocator: std.mem.Allocator, + mappings: Mapping.List, + input_line_count: usize, + sources_count: usize, +) !CompactSourceMap { + if (mappings.len == 0) { + return .{ + .blocks = &[_]Block{}, + .mapping_count = 0, + .input_line_count = input_line_count, + .sources_count = sources_count, + }; + } + + // Calculate how many blocks we'll need + const block_count = (mappings.len + Block.BLOCK_SIZE - 1) / Block.BLOCK_SIZE; + + // Allocate blocks + var blocks = std.ArrayListUnmanaged(Block){}; + errdefer blocks.deinit(allocator); + + // Process each block + for (0..block_count) |block_idx| { + const start_idx = block_idx * Block.BLOCK_SIZE; + const end_idx = @min(start_idx + Block.BLOCK_SIZE, mappings.len); + const block_mapping_count = end_idx - start_idx; + + // First mapping becomes the base values + const first_mapping = Mapping{ + .generated = mappings.items(.generated)[start_idx], + .original = mappings.items(.original)[start_idx], + .source_index = mappings.items(.source_index)[start_idx], + }; + + // Set base values + const base = Block.BaseValues{ + .generated_line = first_mapping.generatedLine(), + .generated_column = first_mapping.generatedColumn(), + .source_index = first_mapping.sourceIndex(), + .original_line = first_mapping.originalLine(), + .original_column = first_mapping.originalColumn(), + }; + + // First pass: calculate required buffer size + var buffer_size: usize = 0; + var temp_buffer: [16]u8 = undefined; // Temporary buffer for size calculation + + // These track the last absolute values + var last_gen_line = base.generated_line; + var last_gen_col = base.generated_column; + var last_src_idx = base.source_index; + var last_orig_line = base.original_line; + var last_orig_col = base.original_column; + + // These track the last delta values (for double-delta encoding) + var last_gen_line_delta: i32 = 0; + var last_gen_col_delta: i32 = 0; + var last_src_idx_delta: i32 = 0; + var last_orig_line_delta: i32 = 0; + var last_orig_col_delta: i32 = 0; + + // Skip first mapping as it's our base + for (start_idx + 1..end_idx) |i| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate deltas + const gen_line_delta = mapping.generatedLine() - last_gen_line; + // If we changed lines, column is absolute, not relative to previous + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() + else + mapping.generatedColumn() - last_gen_col; + + const src_idx_delta = mapping.sourceIndex() - last_src_idx; + const orig_line_delta = mapping.originalLine() - last_orig_line; + const orig_col_delta = mapping.originalColumn() - last_orig_col; + + // Calculate double-delta values + const gen_line_dod = gen_line_delta - last_gen_line_delta; + const gen_col_dod = gen_col_delta - last_gen_col_delta; + const src_idx_dod = src_idx_delta - last_src_idx_delta; + const orig_line_dod = orig_line_delta - last_orig_line_delta; + const orig_col_dod = orig_col_delta - last_orig_col_delta; + + // Calculate size needed for each double-delta + buffer_size += DoubleDeltaEncoder.encode(&temp_buffer, gen_line_dod); + buffer_size += DoubleDeltaEncoder.encode(&temp_buffer, gen_col_dod); + buffer_size += DoubleDeltaEncoder.encode(&temp_buffer, src_idx_dod); + buffer_size += DoubleDeltaEncoder.encode(&temp_buffer, orig_line_dod); + buffer_size += DoubleDeltaEncoder.encode(&temp_buffer, orig_col_dod); + + // Update last values for next delta + last_gen_line = mapping.generatedLine(); + last_gen_col = mapping.generatedColumn(); + last_src_idx = mapping.sourceIndex(); + last_orig_line = mapping.originalLine(); + last_orig_col = mapping.originalColumn(); + + // Update last delta values for next double-delta + last_gen_line_delta = gen_line_delta; + last_gen_col_delta = gen_col_delta; + last_src_idx_delta = src_idx_delta; + last_orig_line_delta = orig_line_delta; + last_orig_col_delta = orig_col_delta; + } + + // Allocate data buffer for this block + var data = try allocator.alloc(u8, buffer_size); + errdefer allocator.free(data); + + // Second pass: actually encode the data + var offset: usize = 0; + last_gen_line = base.generated_line; + last_gen_col = base.generated_column; + last_src_idx = base.source_index; + last_orig_line = base.original_line; + last_orig_col = base.original_column; + + // Reset delta tracking for second pass + last_gen_line_delta = 0; + last_gen_col_delta = 0; + last_src_idx_delta = 0; + last_orig_line_delta = 0; + last_orig_col_delta = 0; + + // Skip first mapping (base values) + // Check if we can use batch encoding for efficiency + const remaining_mappings = end_idx - (start_idx + 1); + + if (remaining_mappings >= 4) { + // Pre-calculate all double-delta values for batch encoding + var dod_values = try allocator.alloc(i32, remaining_mappings * 5); + defer allocator.free(dod_values); + + // Reset tracking for delta calculation + last_gen_line = base.generated_line; + last_gen_col = base.generated_column; + last_src_idx = base.source_index; + last_orig_line = base.original_line; + last_orig_col = base.original_column; + + // Reset tracking for double-delta calculation + last_gen_line_delta = 0; + last_gen_col_delta = 0; + last_src_idx_delta = 0; + last_orig_line_delta = 0; + last_orig_col_delta = 0; + + // Calculate all double-delta values upfront + for (start_idx + 1..end_idx, 0..) |i, delta_idx| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate deltas + const gen_line_delta = mapping.generatedLine() - last_gen_line; + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() + else + mapping.generatedColumn() - last_gen_col; + + const src_idx_delta = mapping.sourceIndex() - last_src_idx; + const orig_line_delta = mapping.originalLine() - last_orig_line; + const orig_col_delta = mapping.originalColumn() - last_orig_col; + + // Calculate double-delta values + const gen_line_dod = gen_line_delta - last_gen_line_delta; + const gen_col_dod = gen_col_delta - last_gen_col_delta; + const src_idx_dod = src_idx_delta - last_src_idx_delta; + const orig_line_dod = orig_line_delta - last_orig_line_delta; + const orig_col_dod = orig_col_delta - last_orig_col_delta; + + // Store double-delta values + const base_offset = delta_idx * 5; + dod_values[base_offset + 0] = gen_line_dod; + dod_values[base_offset + 1] = gen_col_dod; + dod_values[base_offset + 2] = src_idx_dod; + dod_values[base_offset + 3] = orig_line_dod; + dod_values[base_offset + 4] = orig_col_dod; + + // Update last values for next iteration + last_gen_line = mapping.generatedLine(); + last_gen_col = mapping.generatedColumn(); + last_src_idx = mapping.sourceIndex(); + last_orig_line = mapping.originalLine(); + last_orig_col = mapping.originalColumn(); + + // Update last delta values for next double-delta + last_gen_line_delta = gen_line_delta; + last_gen_col_delta = gen_col_delta; + last_src_idx_delta = src_idx_delta; + last_orig_line_delta = orig_line_delta; + last_orig_col_delta = orig_col_delta; + } + + // Use batch encoding for efficiency + offset = DoubleDeltaEncoder.encodeBatch(data, dod_values); + } else { + // For small numbers of mappings, use regular encoding + for (start_idx + 1..end_idx) |i| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate and encode deltas + const gen_line_delta = mapping.generatedLine() - last_gen_line; + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() + else + mapping.generatedColumn() - last_gen_col; + + const src_idx_delta = mapping.sourceIndex() - last_src_idx; + const orig_line_delta = mapping.originalLine() - last_orig_line; + const orig_col_delta = mapping.originalColumn() - last_orig_col; + + // Calculate and encode double-delta values + const gen_line_dod = gen_line_delta - last_gen_line_delta; + const gen_col_dod = gen_col_delta - last_gen_col_delta; + const src_idx_dod = src_idx_delta - last_src_idx_delta; + const orig_line_dod = orig_line_delta - last_orig_line_delta; + const orig_col_dod = orig_col_delta - last_orig_col_delta; + + offset += DoubleDeltaEncoder.encode(data[offset..], gen_line_dod); + offset += DoubleDeltaEncoder.encode(data[offset..], gen_col_dod); + offset += DoubleDeltaEncoder.encode(data[offset..], src_idx_dod); + offset += DoubleDeltaEncoder.encode(data[offset..], orig_line_dod); + offset += DoubleDeltaEncoder.encode(data[offset..], orig_col_dod); + + // Update last values + last_gen_line = mapping.generatedLine(); + last_gen_col = mapping.generatedColumn(); + last_src_idx = mapping.sourceIndex(); + last_orig_line = mapping.originalLine(); + last_orig_col = mapping.originalColumn(); + + // Update last delta values + last_gen_line_delta = gen_line_delta; + last_gen_col_delta = gen_col_delta; + last_src_idx_delta = src_idx_delta; + last_orig_line_delta = orig_line_delta; + last_orig_col_delta = orig_col_delta; + } + } + + assert(offset == buffer_size); + + // Store block + try blocks.append(allocator, .{ + .base = base, + .data = data, + .count = @intCast(block_mapping_count), + }); + } + + return .{ + .blocks = blocks, + .mapping_count = mappings.len, + .input_line_count = input_line_count, + .sources_count = sources_count, + }; +} + +/// Free all memory associated with the compact sourcemap +pub fn deinit(self: *CompactSourceMap) void { + // Free all the blocks in the ArrayList + for (self.blocks.items) |*block| { + block.deinit(self.allocator); + } + + // Free the blocks ArrayList itself + self.blocks.deinit(self.allocator); + + // Free the current block buffer + self.current_block_buffer.deinit(self.allocator); + + // No need to free current_block as its data is either + // empty or already tracked in the blocks ArrayList +} + +/// Decode the entire CompactSourceMap back to standard Mapping.List format +pub fn decode(self: CompactSourceMap, allocator: std.mem.Allocator) !Mapping.List { + var mappings = Mapping.List{}; + try mappings.ensureTotalCapacity(allocator, self.mapping_count); + + // First, decode all finalized blocks + for (self.blocks.items) |block| { + try self.decodeBlock(allocator, &mappings, block); + } + + // If we have an active block that's not finalized yet, decode that too + if (self.current_block_count > 0) { + const current_block = self.current_block; + + // Create a temporary block with the current buffer data + if (self.current_block_count > 1 and self.current_block_buffer.items.len > 0) { + var temp_block = current_block; + temp_block.data = self.current_block_buffer.items; + temp_block.count = self.current_block_count; + try self.decodeBlock(allocator, &mappings, temp_block); + } else if (self.current_block_count == 1) { + // Just the base mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = current_block.base.generated_line, + .columns = current_block.base.generated_column, + }, + .original = .{ + .lines = current_block.base.original_line, + .columns = current_block.base.original_column, + }, + .source_index = current_block.base.source_index, + }); + } + } + + return mappings; +} + +const CurrentDeltas = struct { + gen_line_delta: i32, + gen_col_delta: i32, + src_idx_delta: i32, + orig_line_delta: i32, + orig_col_delta: i32, +}; + +/// Decode a single block into the mappings list using double-delta decoding +fn decodeBlock( + _: CompactSourceMap, // Not used but maintained for method semantics + allocator: std.mem.Allocator, + mappings: *Mapping.List, + block: Block, +) !void { + // Add base mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = block.base.generated_line, + .columns = block.base.generated_column, + }, + .original = .{ + .lines = block.base.original_line, + .columns = block.base.original_column, + }, + .source_index = block.base.source_index, + }); + + // If only one mapping in the block, we're done + if (block.count <= 1) return; + + // Current values start at base + var current_values = block.base; + var current_deltas: CurrentDeltas = .{ + .gen_line_delta = 0, + .gen_col_delta = 0, + .src_idx_delta = 0, + .orig_line_delta = 0, + .orig_col_delta = 0, + }; + var offset: usize = 0; + + // Process remaining mappings + var i: u16 = 1; + while (i < block.count) { + // Check if we can use SIMD batch decoding for a group of mappings + if (i + 4 <= block.count) { + // We have at least 4 more mappings to decode, use batch processing + var dod_values: [20]i32 = undefined; // Space for 4 mappings × 5 values each + + // Use SIMD-accelerated batch decoding to read double-delta values + const bytes_read = DoubleDeltaEncoder.decodeBatch(block.data[offset..], &dod_values); + offset += bytes_read; + + // Process the successfully decoded mappings - each mapping has 5 values + const mappings_decoded = @min(4, bytes_read / 5); + + // Convert double-delta values to delta values using SIMD helpers + var delta_values: [20]i32 = undefined; + + // Process delta-of-delta values for generated line + // No need to copy the data, since the function expects a const slice + const dod_slice = dod_values[0 .. mappings_decoded * 5]; + + // Base values don't need to be mutable if they're not modified + var base_values_array = [_]i32{ current_deltas.gen_line_delta, current_deltas.gen_line_delta, current_deltas.gen_line_delta, current_deltas.gen_line_delta }; + const base_slice = base_values_array[0..mappings_decoded]; + + // Results slice needs to be mutable since it's written to, but it's a slice of a mutable array so it's OK + const results_slice = delta_values[0 .. mappings_decoded * 5]; + + SIMDHelpers.DeltaOfDeltaProcessor.process(dod_slice, base_slice, results_slice); + + // Process delta-of-delta values for generated column + const gen_col_dod_slice = dod_values[1 .. mappings_decoded * 5]; // Use the const slice directly + + // Base values can be const + var gen_col_base_array = [_]i32{ current_deltas.gen_col_delta, current_deltas.gen_col_delta, current_deltas.gen_col_delta, current_deltas.gen_col_delta }; + const gen_col_base_slice = gen_col_base_array[0..mappings_decoded]; + + // Results can be const since they're a slice of a mutable array + const gen_col_results_slice = delta_values[1 .. mappings_decoded * 5]; + + SIMDHelpers.DeltaOfDeltaProcessor.process(gen_col_dod_slice, gen_col_base_slice, gen_col_results_slice); + + // Process delta-of-delta values for source index + const src_idx_dod_slice = dod_values[2 .. mappings_decoded * 5]; // Use the const slice directly + + // Base values can be const + var src_idx_base_array = [_]i32{ current_deltas.src_idx_delta, current_deltas.src_idx_delta, current_deltas.src_idx_delta, current_deltas.src_idx_delta }; + const src_idx_base_slice = src_idx_base_array[0..mappings_decoded]; + + // Results can be const since they're a slice of a mutable array + const src_idx_results_slice = delta_values[2 .. mappings_decoded * 5]; + + SIMDHelpers.DeltaOfDeltaProcessor.process(src_idx_dod_slice, src_idx_base_slice, src_idx_results_slice); + + // Process delta-of-delta values for original line + const orig_line_dod_slice = dod_values[3 .. mappings_decoded * 5]; // Use the const slice directly + + var orig_line_base_array = [_]i32{ current_deltas.orig_line_delta, current_deltas.orig_line_delta, current_deltas.orig_line_delta, current_deltas.orig_line_delta }; + const orig_line_base_slice = orig_line_base_array[0..mappings_decoded]; + + // Results can be const since they're a slice of a mutable array + const orig_line_results_slice = delta_values[3 .. mappings_decoded * 5]; + + SIMDHelpers.DeltaOfDeltaProcessor.process(orig_line_dod_slice, orig_line_base_slice, orig_line_results_slice); + + // Process delta-of-delta values for original column + const orig_col_dod_slice = dod_values[4 .. mappings_decoded * 5]; // Use the const slice directly + + var orig_col_base_array = [_]i32{ current_deltas.orig_col_delta, current_deltas.orig_col_delta, current_deltas.orig_col_delta, current_deltas.orig_col_delta }; + const orig_col_base_slice = orig_col_base_array[0..mappings_decoded]; + + // Results can be const since they're a slice of a mutable array + const orig_col_results_slice = delta_values[4 .. mappings_decoded * 5]; + + SIMDHelpers.DeltaOfDeltaProcessor.process(orig_col_dod_slice, orig_col_base_slice, orig_col_results_slice); + + // Now apply deltas to get absolute values and append mappings + for (0..mappings_decoded) |j| { + const gen_line_delta = delta_values[j * 5 + 0]; + const gen_col_delta = delta_values[j * 5 + 1]; + const src_idx_delta = delta_values[j * 5 + 2]; + const orig_line_delta = delta_values[j * 5 + 3]; + const orig_col_delta = delta_values[j * 5 + 4]; + + // Update current values with the deltas + current_values.generated_line += gen_line_delta; + + if (gen_line_delta > 0) { + // If we changed lines, column is absolute + current_values.generated_column = gen_col_delta; + } else { + // Otherwise add delta to previous + current_values.generated_column += gen_col_delta; + } + + current_values.source_index += src_idx_delta; + current_values.original_line += orig_line_delta; + current_values.original_column += orig_col_delta; + + // Append mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = current_values.generated_line, + .columns = current_values.generated_column, + }, + .original = .{ + .lines = current_values.original_line, + .columns = current_values.original_column, + }, + .source_index = current_values.source_index, + }); + + // Update current deltas for next iteration + current_deltas.gen_line_delta = gen_line_delta; + current_deltas.gen_col_delta = gen_col_delta; + current_deltas.src_idx_delta = src_idx_delta; + current_deltas.orig_line_delta = orig_line_delta; + current_deltas.orig_col_delta = orig_col_delta; + } + + // Update counter for processed mappings + i += @intCast(mappings_decoded); + continue; + } + + // Fallback to individual decoding for remaining mappings + // Decode double-delta values + const gen_line_dod_result = DoubleDeltaEncoder.decode(block.data[offset..]); + offset += gen_line_dod_result.bytes_read; + const gen_line_dod = gen_line_dod_result.value; + + const gen_col_dod_result = DoubleDeltaEncoder.decode(block.data[offset..]); + offset += gen_col_dod_result.bytes_read; + const gen_col_dod = gen_col_dod_result.value; + + const src_idx_dod_result = DoubleDeltaEncoder.decode(block.data[offset..]); + offset += src_idx_dod_result.bytes_read; + const src_idx_dod = src_idx_dod_result.value; + + const orig_line_dod_result = DoubleDeltaEncoder.decode(block.data[offset..]); + offset += orig_line_dod_result.bytes_read; + const orig_line_dod = orig_line_dod_result.value; + + const orig_col_dod_result = DoubleDeltaEncoder.decode(block.data[offset..]); + offset += orig_col_dod_result.bytes_read; + const orig_col_dod = orig_col_dod_result.value; + + // Update deltas using double-delta values + current_deltas.gen_line_delta += gen_line_dod; + current_deltas.gen_col_delta += gen_col_dod; + current_deltas.src_idx_delta += src_idx_dod; + current_deltas.orig_line_delta += orig_line_dod; + current_deltas.orig_col_delta += orig_col_dod; + + // Update current values with new deltas + current_values.generated_line += current_deltas.gen_line_delta; + + i += 1; // Increment counter for non-batch case + + if (current_deltas.gen_line_delta > 0) { + // If we changed lines, column is absolute + current_values.generated_column = current_deltas.gen_col_delta; + } else { + // Otherwise add delta to previous + current_values.generated_column += current_deltas.gen_col_delta; + } + + current_values.source_index += current_deltas.src_idx_delta; + current_values.original_line += current_deltas.orig_line_delta; + current_values.original_column += current_deltas.orig_col_delta; + + // Append mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = current_values.generated_line, + .columns = current_values.generated_column, + }, + .original = .{ + .lines = current_values.original_line, + .columns = current_values.original_column, + }, + .source_index = current_values.source_index, + }); + } +} + +/// Find a mapping at a specific line/column position using SIMD acceleration +pub fn findSIMD(self: CompactSourceMap, allocator: std.mem.Allocator, line: i32, column: i32) !?Mapping { + // Quick reject if empty map + if (self.blocks.items.len == 0 and self.current_block_count == 0) { + return null; + } + + // 1. Find the block that might contain our target using binary search + var best_block_idx: usize = 0; + var found_block = false; + var use_current_block = false; + + // First check if we have an active current block that might match + if (self.current_block_count > 0) { + const current_line = self.current_block.base.generated_line; + const current_col = self.current_block.base.generated_column; + + // Check if target position is in the range of the current block + if (line > current_line or (line == current_line and column >= current_col)) { + // The position might be in the current block + use_current_block = true; + } + } + + // If we're not using the current block, search in finalized blocks + if (!use_current_block and self.blocks.items.len > 0) { + // Prepare arrays of lines and columns from block bases for SIMD search + var block_lines = try allocator.alloc(i32, self.blocks.items.len); + defer allocator.free(block_lines); + + var block_columns = try allocator.alloc(i32, self.blocks.items.len); + defer allocator.free(block_columns); + + // Fill the arrays with block base values + for (self.blocks.items, 0..) |block, i| { + block_lines[i] = block.base.generated_line; + block_columns[i] = block.base.generated_column; + } + + // Use SIMD search to find the right block + if (SIMDHelpers.SIMDSearch.find(block_lines, block_columns, line, column)) |idx| { + best_block_idx = idx; + found_block = true; + } + } + + // If we didn't find a suitable block and we're not using the current block, there's no match + if (!found_block and !use_current_block) { + return null; + } + + // 2. Decode the block and search within it + if (use_current_block) { + // Check if the target matches the current block's base position exactly + if (self.current_block.base.generated_line == line and self.current_block.base.generated_column == column) { + return Mapping{ + .generated = .{ + .lines = self.current_block.base.generated_line, + .columns = self.current_block.base.generated_column, + }, + .original = .{ + .lines = self.current_block.base.original_line, + .columns = self.current_block.base.original_column, + }, + .source_index = self.current_block.base.source_index, + }; + } + + // If we only have the base mapping, it's not a match + if (self.current_block_count <= 1) { + return null; + } + + // Create a temporary block with the current buffer data + var temp_block = self.current_block; + temp_block.data = self.current_block_buffer.items; + temp_block.count = self.current_block_count; + + // Decode the current block + var partial_mappings = Mapping.List{}; + defer partial_mappings.deinit(allocator); + + try partial_mappings.ensureTotalCapacity(allocator, temp_block.count); + try self.decodeBlock(allocator, &partial_mappings, temp_block); + + // Use SIMD search within the block mappings + var mapping_lines = try allocator.alloc(i32, partial_mappings.len); + defer allocator.free(mapping_lines); + + var mapping_columns = try allocator.alloc(i32, partial_mappings.len); + defer allocator.free(mapping_columns); + + // Fill the arrays with mapping positions + for (0..partial_mappings.len) |i| { + mapping_lines[i] = partial_mappings.items(.generated)[i].lines; + mapping_columns[i] = partial_mappings.items(.generated)[i].columns; + } + + // Use SIMD to find the right mapping in the block + if (SIMDHelpers.SIMDSearch.find(mapping_lines, mapping_columns, line, column)) |idx| { + return partial_mappings.get(idx); + } + } else if (found_block) { + const block = self.blocks.items[best_block_idx]; + + // Special case: if the target matches the block's base position exactly + if (block.base.generated_line == line and block.base.generated_column == column) { + return Mapping{ + .generated = .{ + .lines = block.base.generated_line, + .columns = block.base.generated_column, + }, + .original = .{ + .lines = block.base.original_line, + .columns = block.base.original_column, + }, + .source_index = block.base.source_index, + }; + } + + // Decode the entire block + var partial_mappings = Mapping.List{}; + defer partial_mappings.deinit(allocator); + + try partial_mappings.ensureTotalCapacity(allocator, block.count); + try self.decodeBlock(allocator, &partial_mappings, block); + + // Use SIMD search within the block mappings + var mapping_lines = try allocator.alloc(i32, partial_mappings.len); + defer allocator.free(mapping_lines); + + var mapping_columns = try allocator.alloc(i32, partial_mappings.len); + defer allocator.free(mapping_columns); + + // Fill the arrays with mapping positions + for (0..partial_mappings.len) |i| { + mapping_lines[i] = partial_mappings.items(.generated)[i].lines; + mapping_columns[i] = partial_mappings.items(.generated)[i].columns; + } + + // Use SIMD to find the right mapping in the block + if (SIMDHelpers.SIMDSearch.find(mapping_lines, mapping_columns, line, column)) |idx| { + return partial_mappings.get(idx); + } + } + + return null; +} + +/// Standard find implementation as fallback +pub fn find(self: CompactSourceMap, allocator: std.mem.Allocator, line: i32, column: i32) !?Mapping { + // Use the SIMD-accelerated version + return try self.findSIMD(allocator, line, column); +} + +/// Write VLQ-compatible output for compatibility with standard sourcemap consumers +pub fn writeVLQs(self: CompactSourceMap, writer: anytype) !void { + // Finalize the current block to ensure all mappings are included + try self.finalizeCurrentBlock(); + + // Now decode all blocks + const mappings = try self.decode(bun.default_allocator); + defer mappings.deinit(bun.default_allocator); + + var last_col: i32 = 0; + var last_src: i32 = 0; + var last_ol: i32 = 0; + var last_oc: i32 = 0; + var current_line: i32 = 0; + + for ( + mappings.items(.generated), + mappings.items(.original), + mappings.items(.source_index), + 0.., + ) |gen, orig, source_index, i| { + if (current_line != gen.lines) { + assert(gen.lines > current_line); + const inc = gen.lines - current_line; + try writer.writeByteNTimes(';', @intCast(inc)); + current_line = gen.lines; + last_col = 0; + } else if (i != 0) { + try writer.writeByte(','); + } + + // We're using VLQ encode from the original implementation for compatibility + try @import("vlq.zig").encode(gen.columns - last_col).writeTo(writer); + last_col = gen.columns; + try @import("vlq.zig").encode(source_index - last_src).writeTo(writer); + last_src = source_index; + try @import("vlq.zig").encode(orig.lines - last_ol).writeTo(writer); + last_ol = orig.lines; + try @import("vlq.zig").encode(orig.columns - last_oc).writeTo(writer); + last_oc = orig.columns; + } +} + +/// Serialization header for the compact sourcemap format +pub const Header = struct { + magic: u32 = MAGIC, + version: u32 = VERSION, + block_count: u32, + mapping_count: u32, + input_line_count: u32, + sources_count: u32, +}; + +/// Write VLQ-compatible mappings to a MutableString for compatibility with standard sourcemap consumers +pub fn writeVLQs(self: *const CompactSourceMap, output_buffer: *bun.MutableString) !void { + // Finalize the current block to ensure all mappings are included + try self.finalizeCurrentBlockConst(); + + // Now decode all blocks + const mappings = try self.decode(bun.default_allocator); + defer mappings.deinit(bun.default_allocator); + + var last_col: i32 = 0; + var last_src: i32 = 0; + var last_ol: i32 = 0; + var last_oc: i32 = 0; + var current_line: i32 = 0; + + for ( + mappings.items(.generated), + mappings.items(.original), + mappings.items(.source_index), + 0.., + ) |gen, orig, source_index, i| { + if (current_line != gen.lines) { + assert(gen.lines > current_line); + const inc = gen.lines - current_line; + try output_buffer.appendNTimes(';', @intCast(inc)); + current_line = gen.lines; + last_col = 0; + } else if (i != 0) { + try output_buffer.appendChar(','); + } + + // We're using VLQ encode from the original implementation for compatibility + try @import("vlq.zig").encode(gen.columns - last_col).appendTo(output_buffer); + last_col = gen.columns; + try @import("vlq.zig").encode(source_index - last_src).appendTo(output_buffer); + last_src = source_index; + try @import("vlq.zig").encode(orig.lines - last_ol).appendTo(output_buffer); + last_ol = orig.lines; + try @import("vlq.zig").encode(orig.columns - last_oc).appendTo(output_buffer); + last_oc = orig.columns; + } +} + +/// Serialize a compact sourcemap to binary format (for storage or transmission) +pub fn serialize(self: CompactSourceMap, allocator: std.mem.Allocator) ![]u8 { + const header = Header{ + .block_count = @truncate(self.blocks.len), + .mapping_count = @truncate(self.mapping_count), + .input_line_count = @truncate(self.input_line_count), + .sources_count = @truncate(self.sources_count), + }; + + // Calculate total size + var total_size = @sizeOf(Header); + + // Add size for block headers + total_size += self.blocks.len * @sizeOf(Block.BaseValues); + total_size += self.blocks.len * @sizeOf(u32); // For data length + total_size += self.blocks.len * @sizeOf(u16); // For count + + // Add size for all encoded data + for (self.blocks) |block| { + total_size += block.data.len; + } + + // Allocate buffer + var buffer = try allocator.alloc(u8, total_size); + errdefer allocator.free(buffer); + + // Write header + @memcpy(buffer[0..@sizeOf(Header)], std.mem.asBytes(&header)); + + // Write blocks + var offset = @sizeOf(Header); + + for (self.blocks) |block| { + // Write base values + @memcpy(buffer[offset..][0..@sizeOf(Block.BaseValues)], std.mem.asBytes(&block.base)); + offset += @sizeOf(Block.BaseValues); + + // Write count + @memcpy(buffer[offset..][0..@sizeOf(u16)], std.mem.asBytes(&block.count)); + offset += @sizeOf(u16); + + // Write data length + const len: u32 = @truncate(block.data.len); + @memcpy(buffer[offset..][0..@sizeOf(u32)], std.mem.asBytes(&len)); + offset += @sizeOf(u32); + + // Write data + @memcpy(buffer[offset..][0..block.data.len], block.data); + offset += block.data.len; + } + + assert(offset == total_size); + return buffer; +} + +/// Check if a data buffer contains a serialized compact sourcemap +pub fn isSerializedCompactSourceMap(data: []const u8) bool { + if (data.len < @sizeOf(Header)) { + return false; + } + + const header = @as(*const Header, @ptrCast(@alignCast(data.ptr))).*; + return header.magic == MAGIC; +} + +/// Deserialize a compact sourcemap from binary format +pub fn deserialize(allocator: std.mem.Allocator, data: []const u8) !CompactSourceMap { + if (data.len < @sizeOf(Header)) { + return error.InvalidFormat; + } + + const header = @as(*const Header, @ptrCast(@alignCast(data.ptr))).*; + + if (header.magic != MAGIC) { + return error.InvalidFormat; + } + + if (header.version != VERSION) { + return error.UnsupportedVersion; + } + + // Allocate blocks + var blocks = try allocator.alloc(Block, header.block_count); + errdefer { + for (blocks) |*block| { + if (block.data.len > 0) { + allocator.free(block.data); + } + } + allocator.free(blocks); + } + + // Read blocks + var offset = @sizeOf(Header); + + for (0..header.block_count) |i| { + if (offset + @sizeOf(Block.BaseValues) > data.len) { + return error.InvalidFormat; + } + + // Read base values + blocks[i].base = @as(*const Block.BaseValues, @ptrCast(@alignCast(&data[offset]))).*; + offset += @sizeOf(Block.BaseValues); + + // Read count + if (offset + @sizeOf(u16) > data.len) { + return error.InvalidFormat; + } + + blocks[i].count = @as(*const u16, @ptrCast(@alignCast(&data[offset]))).*; + offset += @sizeOf(u16); + + // Read data length + if (offset + @sizeOf(u32) > data.len) { + return error.InvalidFormat; + } + + const len = @as(*const u32, @ptrCast(@alignCast(&data[offset]))).*; + offset += @sizeOf(u32); + + if (offset + len > data.len) { + return error.InvalidFormat; + } + + // Read data + blocks[i].data = try allocator.alloc(u8, len); + @memcpy(blocks[i].data, data[offset..][0..len]); + offset += len; + } + + return .{ + .blocks = blocks, + .mapping_count = header.mapping_count, + .input_line_count = header.input_line_count, + .sources_count = header.sources_count, + }; +} + +/// Format marker type for the CompactSourceMap +pub const CompactSourceMapFormat = enum { Compact }; + +/// Inline serialization for direct embedding in sourcemaps +pub fn getInlineBase64(self: CompactSourceMap, allocator: std.mem.Allocator) ![]const u8 { + // Finalize the current block to ensure all mappings are included + try self.finalizeCurrentBlock(); + + // Get all mappings as an array + var mappings = try self.decode(allocator); + defer mappings.deinit(allocator); + + if (mappings.len == 0) { + return &[_]u8{}; + } + + // First mapping is the base - we'll store delta values directly to avoid + // the double-delta calculation complexity for a one-off operation + var double_delta_values = try allocator.alloc(i32, (mappings.len - 1) * 5); + defer allocator.free(double_delta_values); + + // First mapping becomes our base + const first_mapping = Mapping{ + .generated = mappings.items(.generated)[0], + .original = mappings.items(.original)[0], + .source_index = mappings.items(.source_index)[0], + }; + + // Last values for delta calculation + var last_gen_line = first_mapping.generatedLine(); + var last_gen_col = first_mapping.generatedColumn(); + var last_src_idx = first_mapping.sourceIndex(); + var last_orig_line = first_mapping.originalLine(); + var last_orig_col = first_mapping.originalColumn(); + + // Last deltas for double-delta calculation + var last_gen_line_delta: i32 = 0; + var last_gen_col_delta: i32 = 0; + var last_src_idx_delta: i32 = 0; + var last_orig_line_delta: i32 = 0; + var last_orig_col_delta: i32 = 0; + + // Calculate double-delta values for all mappings after the first + for (1..mappings.len, 0..) |i, value_idx| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate deltas + const gen_line_delta = mapping.generatedLine() - last_gen_line; + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() // If we changed lines, column is absolute + else + mapping.generatedColumn() - last_gen_col; + + const src_idx_delta = mapping.sourceIndex() - last_src_idx; + const orig_line_delta = mapping.originalLine() - last_orig_line; + const orig_col_delta = mapping.originalColumn() - last_orig_col; + + // Calculate double-delta values + const gen_line_dod = gen_line_delta - last_gen_line_delta; + const gen_col_dod = gen_col_delta - last_gen_col_delta; + const src_idx_dod = src_idx_delta - last_src_idx_delta; + const orig_line_dod = orig_line_delta - last_orig_line_delta; + const orig_col_dod = orig_col_delta - last_orig_col_delta; + + // Store double-delta values + const base_offset = value_idx * 5; + double_delta_values[base_offset + 0] = gen_line_dod; + double_delta_values[base_offset + 1] = gen_col_dod; + double_delta_values[base_offset + 2] = src_idx_dod; + double_delta_values[base_offset + 3] = orig_line_dod; + double_delta_values[base_offset + 4] = orig_col_dod; + + // Update values for next iteration + last_gen_line = mapping.generatedLine(); + last_gen_col = mapping.generatedColumn(); + last_src_idx = mapping.sourceIndex(); + last_orig_line = mapping.originalLine(); + last_orig_col = mapping.originalColumn(); + + // Update deltas for next iteration + last_gen_line_delta = gen_line_delta; + last_gen_col_delta = gen_col_delta; + last_src_idx_delta = src_idx_delta; + last_orig_line_delta = orig_line_delta; + last_orig_col_delta = orig_col_delta; + } + + // Encode to base64 + return DoubleDeltaEncoder.encodeToBase64(allocator, double_delta_values); +} + +/// This function can be used to convert an existing sourcemap +/// to use the new compact format internally +pub fn convertSourceMapToCompact( + sourcemap: *SourceMap, + allocator: std.mem.Allocator, +) !void { + // Create a new compact sourcemap + var compact = create(allocator); + compact.input_line_count = @max(1, sourcemap.sources_content.len); + compact.sources_count = sourcemap.sources.len; + + // Add all mappings from the standard format + for (0..sourcemap.mapping.len) |i| { + const mapping = Mapping{ + .generated = sourcemap.mapping.items(.generated)[i], + .original = sourcemap.mapping.items(.original)[i], + .source_index = sourcemap.mapping.items(.source_index)[i], + }; + + try compact.addMapping(mapping); + } + + // Finalize any pending block + try compact.finalizeCurrentBlock(); + + // Update the internal representation + sourcemap.compact_mapping = compact; +} diff --git a/src/sourcemap/compact/delta_encoding.zig b/src/sourcemap/compact/delta_encoding.zig new file mode 100644 index 0000000000..f8eb41ec71 --- /dev/null +++ b/src/sourcemap/compact/delta_encoding.zig @@ -0,0 +1,457 @@ +const std = @import("std"); +const bun = @import("root").bun; +const assert = bun.assert; + +/// DoubleDeltaEncoder provides an optimized delta-of-delta encoding scheme for sourcemaps +/// Key optimizations: +/// 1. Small integers (very common in sourcemaps) use 1 byte +/// 2. SIMD acceleration for bulk encoding/decoding operations +/// 3. Optimized for WASM compilation and cross-platform performance +/// 4. Designed for inline base64 encoding in sourcemap "mappings" property +pub const DoubleDeltaEncoder = struct { + /// Encodes a signed integer using a variable-length encoding optimized for small values + /// Returns the number of bytes written to the buffer + pub fn encode(buffer: []u8, value: i32) usize { + // Use zigzag encoding to handle negative numbers efficiently + // This maps -1, 1 to 1, 2; -2, 2 to 3, 4, etc. + const zigzagged = @as(u32, @bitCast((value << 1) ^ (value >> 31))); + + if (zigzagged < 128) { + // Small values (0-127) fit in a single byte with top bit clear + buffer[0] = @truncate(zigzagged); + return 1; + } else if (zigzagged < 16384) { + // Medium values (128-16383) fit in two bytes + // First byte has top two bits: 10 + buffer[0] = @truncate(0x80 | (zigzagged >> 7)); + buffer[1] = @truncate(zigzagged & 0x7F); + return 2; + } else if (zigzagged < 2097152) { + // Larger values (16384-2097151) fit in three bytes + // First byte has top two bits: 11, next bit 0 + buffer[0] = @truncate(0xC0 | (zigzagged >> 14)); + buffer[1] = @truncate((zigzagged >> 7) & 0x7F); + buffer[2] = @truncate(zigzagged & 0x7F); + return 3; + } else { + // Very large values use four bytes + // First byte has top three bits: 111 + buffer[0] = @truncate(0xE0 | (zigzagged >> 21)); + buffer[1] = @truncate((zigzagged >> 14) & 0x7F); + buffer[2] = @truncate((zigzagged >> 7) & 0x7F); + buffer[3] = @truncate(zigzagged & 0x7F); + return 4; + } + } + + /// Encodes a signed integer to a slice and returns that slice + /// Used for VLQ-like interfaces that expect a slice result + pub fn encodeToSlice(buffer: []u8, value: i32) []u8 { + const len = encode(buffer, value); + return buffer[0..len]; + } + + /// Decodes a delta-encoded integer from a buffer + /// Returns the decoded value and the number of bytes read + pub fn decode(buffer: []const u8) struct { value: i32, bytes_read: usize } { + const first_byte = buffer[0]; + + // Unpack based on tag bits + if ((first_byte & 0x80) == 0) { + // Single byte value + const zigzagged = first_byte; + return .{ + .value = dezigzag(@as(u32, zigzagged)), + .bytes_read = 1, + }; + } else if ((first_byte & 0xC0) == 0x80) { + // Two byte value + const zigzagged = ((@as(u32, first_byte) & 0x3F) << 7) | + (@as(u32, buffer[1]) & 0x7F); + return .{ + .value = dezigzag(zigzagged), + .bytes_read = 2, + }; + } else if ((first_byte & 0xE0) == 0xC0) { + // Three byte value + const zigzagged = ((@as(u32, first_byte) & 0x1F) << 14) | + ((@as(u32, buffer[1]) & 0x7F) << 7) | + (@as(u32, buffer[2]) & 0x7F); + return .{ + .value = dezigzag(zigzagged), + .bytes_read = 3, + }; + } else { + // Four byte value + const zigzagged = ((@as(u32, first_byte) & 0x0F) << 21) | + ((@as(u32, buffer[1]) & 0x7F) << 14) | + ((@as(u32, buffer[2]) & 0x7F) << 7) | + (@as(u32, buffer[3]) & 0x7F); + return .{ + .value = dezigzag(zigzagged), + .bytes_read = 4, + }; + } + } + + /// SIMD-accelerated bulk decoding of multiple values at once + /// This dramatically speeds up processing of mappings + pub fn decodeBatch(buffer: []const u8, values: []i32) usize { + var offset: usize = 0; + var i: usize = 0; + + const vector_size = std.simd.suggestVectorLength(u8) orelse 0; + + // Process with AVX2 acceleration if available + if (vector_size >= 16 and values.len >= 8 and buffer.len >= 16) { + // AVX2 can process 8 i32 values at once + const lanes = 8; + + // We'll use SIMD to accelerate parts of the decoding process + // Specifically, we can parallelize the tag bit checking and mask generation + const Vector8 = @Vector(lanes, u8); + const MaskVector = @Vector(lanes, bool); + + // Create masks for checking the continuation bits + const tag_mask_0x80: Vector8 = @as(Vector8, @splat(0x80)); // Check for single-byte values (< 128) + + // Buffers for efficient batch processing + while (i + lanes <= values.len and offset + lanes <= buffer.len) { + // Check if we can process a full batch + var can_process_batch = true; + + // Load the first byte of the next 8 potential values + var first_bytes: Vector8 = undefined; + for (0..lanes) |j| { + if (offset + j < buffer.len) { + first_bytes[j] = buffer[offset + j]; + } else { + can_process_batch = false; + break; + } + } + + if (!can_process_batch) break; + + // Use SIMD to identify single-byte values (most common case in sourcemaps) + const zero_vector: Vector8 = @splat(0); + const is_single_byte: MaskVector = (first_bytes & tag_mask_0x80) == zero_vector; + const single_byte_mask = @as(u8, @bitCast(is_single_byte)); + + // If all are single byte values, we can process them extremely efficiently + if (single_byte_mask == 0xFF) { + // All values are single-byte, directly decode them + for (0..lanes) |j| { + // For single-byte values, just dezigzag the value + const zigzagged = @as(u32, buffer[offset + j]); + values[i + j] = dezigzag(zigzagged); + } + + // Update offsets + offset += lanes; + i += lanes; + continue; + } + + // Not all values are single-byte, fall back to regular decoding + break; + } + } else if (vector_size >= 8 and values.len >= 4 and buffer.len >= 8) { + // NEON acceleration (similar to AVX2 but with 4 lanes) + const lanes = 4; + + // Similar implementation to the AVX2 version but with 4 lanes + const Vector4 = @Vector(lanes, u8); + const MaskVector = @Vector(lanes, bool); + + // Create masks for checking the continuation bits + const tag_mask_0x80: Vector4 = @as(Vector4, @splat(0x80)); + + // Process batches of 4 values + while (i + lanes <= values.len and offset + lanes <= buffer.len) { + // Check if we can process a full batch + var can_process_batch = true; + + // Load the first byte of the next 4 potential values + var first_bytes: Vector4 = undefined; + for (0..lanes) |j| { + if (offset + j < buffer.len) { + first_bytes[j] = buffer[offset + j]; + } else { + can_process_batch = false; + break; + } + } + + if (!can_process_batch) break; + + // Use SIMD to identify single-byte values + const zero_vector: Vector4 = @splat(0); + const is_single_byte: MaskVector = (first_bytes & tag_mask_0x80) == zero_vector; + const single_byte_mask = @as(u4, @bitCast(is_single_byte)); + + // If all are single byte values, process efficiently + if (single_byte_mask == 0xF) { + // All values are single-byte, directly decode them + for (0..lanes) |j| { + const zigzagged = @as(u32, buffer[offset + j]); + values[i + j] = dezigzag(zigzagged); + } + + // Update offsets + offset += lanes; + i += lanes; + continue; + } + + // Not all values are single-byte, fall back to regular decoding + break; + } + } + + // Fallback to standard scalar decoding for remaining values + while (i < values.len and offset < buffer.len) { + const result = decode(buffer[offset..]); + values[i] = result.value; + offset += result.bytes_read; + i += 1; + } + + return offset; + } + + /// Encode multiple values efficiently with SIMD acceleration if available + pub fn encodeBatch(buffer: []u8, values: []const i32) usize { + var offset: usize = 0; + + // For small values (0-127), which are common in delta-encoding for + // sourcemaps, we can use SIMD to significantly speed up encoding + const use_avx2 = std.simd.suggestVectorLength(u8) orelse 0 >= 16; + const use_neon = std.simd.suggestVectorLength(u8) orelse 0 >= 8; + + if (use_avx2 and values.len >= 8 and buffer.len >= 8) { + // AVX2 processing with 8 lanes + const lanes = 8; + const Vector8_i32 = @Vector(lanes, i32); + const Vector8_u32 = @Vector(lanes, u32); + const Vector8_bool = @Vector(lanes, bool); + + var i: usize = 0; + while (i + lanes <= values.len and offset + lanes <= buffer.len) { + // Load values + var value_block: Vector8_i32 = undefined; + for (0..lanes) |j| { + value_block[j] = values[i + j]; + } + + // Zigzag encode the vector + const one_vec: Vector8_i32 = @splat(1); + const thirtyone_vec: Vector8_i32 = @splat(31); + const shifted_left = value_block << one_vec; + const shifted_right = value_block >> thirtyone_vec; + const zigzagged = @as(Vector8_u32, @bitCast(shifted_left ^ shifted_right)); + + // Check which values can be encoded in a single byte (< 128) + const limit_vec: Vector8_u32 = @splat(128); + const is_small: Vector8_bool = zigzagged < limit_vec; + const mask = @as(u8, @bitCast(is_small)); + + // If all values are small, we can do efficient single-byte encoding + if (mask == 0xFF) { + // All values can be encoded as single bytes + for (0..lanes) |j| { + buffer[offset + j] = @truncate(zigzagged[j]); + } + + offset += lanes; + i += lanes; + continue; + } + + // If not all values are small, fall back to regular encoding + break; + } + + // Process remaining values with regular encoder + while (i < values.len and offset < buffer.len) { + const bytes_written = encode(buffer[offset..], values[i]); + offset += bytes_written; + i += 1; + } + } else if (use_neon and values.len >= 4 and buffer.len >= 4) { + // NEON processing with 4 lanes + const lanes = 4; + const Vector4_i32 = @Vector(lanes, i32); + const Vector4_u32 = @Vector(lanes, u32); + const Vector4_bool = @Vector(lanes, bool); + + var i: usize = 0; + while (i + lanes <= values.len and offset + lanes <= buffer.len) { + // Load values + var value_block: Vector4_i32 = undefined; + for (0..lanes) |j| { + value_block[j] = values[i + j]; + } + + // Zigzag encode the vector + const one_vec: Vector4_i32 = @splat(1); + const thirtyone_vec: Vector4_i32 = @splat(31); + const shifted_left = value_block << one_vec; + const shifted_right = value_block >> thirtyone_vec; + const zigzagged = @as(Vector4_u32, @bitCast(shifted_left ^ shifted_right)); + + // Check which values can be encoded in a single byte + const limit_vec: Vector4_u32 = @splat(128); + const is_small: Vector4_bool = zigzagged < limit_vec; + const mask = @as(u4, @bitCast(is_small)); + + // If all values are small, we can do efficient single-byte encoding + if (mask == 0xF) { + // All values can be encoded as single bytes + for (0..lanes) |j| { + buffer[offset + j] = @truncate(zigzagged[j]); + } + + offset += lanes; + i += lanes; + continue; + } + + // If not all values are small, fall back to regular encoding + break; + } + + // Process remaining values with regular encoder + while (i < values.len and offset < buffer.len) { + const bytes_written = encode(buffer[offset..], values[i]); + offset += bytes_written; + i += 1; + } + } else { + // No SIMD - use scalar encoding + for (values) |value| { + if (offset >= buffer.len) break; + const bytes_written = encode(buffer[offset..], value); + offset += bytes_written; + } + } + + return offset; + } + + /// Encode a buffer of double-delta values to base64 format + /// This is used for inline sourcemaps in the "mappings" property + pub fn encodeToBase64(allocator: std.mem.Allocator, values: []const i32) ![]u8 { + // First, encode the values to a temporary buffer + const max_size = values.len * 4; // Worst case: 4 bytes per value + var temp_buffer = try allocator.alloc(u8, max_size); + defer allocator.free(temp_buffer); + + // Encode values to the temporary buffer + const encoded_size = encodeBatch(temp_buffer, values); + + // Calculate base64 output size and allocate the result buffer + const base64_size = bun.base64.encodeLen(encoded_size); + var result = try allocator.alloc(u8, base64_size); + errdefer allocator.free(result); + + // Encode to base64 + const encoded = bun.base64.encode(result, temp_buffer[0..encoded_size]); + + // Resize the result buffer to the actual encoded size + if (encoded.count < result.len) { + result = allocator.realloc(result, encoded.count) catch result; + return result[0..encoded.count]; + } + + return result; + } + + /// Decode a base64 string to double-delta values + pub fn decodeFromBase64(allocator: std.mem.Allocator, base64_str: []const u8, out_values: []i32) !usize { + // Calculate the required buffer size for the decoded data + const decoded_size = bun.base64.decodeLen(base64_str); + var temp_buffer = try allocator.alloc(u8, decoded_size); + defer allocator.free(temp_buffer); + + // Decode from base64 + const decoded = bun.base64.decode(temp_buffer, base64_str); + if (!decoded.isSuccessful()) { + return error.InvalidBase64; + } + + // Decode the binary data to values + const values_decoded = decodeBatch(temp_buffer[0..decoded.count], out_values); + + return values_decoded; + } + + /// Convert from zigzag encoding back to signed integer + fn dezigzag(zigzagged: u32) i32 { + return @bitCast(zigzagged >> 1 ^ (0 -% (zigzagged & 1))); + } +}; + +// Enhanced tests for double-delta encoding with base64 support +test "DoubleDeltaEncoder with base64" { + const allocator = std.testing.allocator; + const TestCount = 100; + + // Test sequence of typical sourcemap delta values + const test_values = [_]i32{ 0, 1, 2, -1, -2, 10, 100, -10, -100, 1000, -1000 }; + + // Encode and decode each value individually + var buffer: [4]u8 = undefined; // Max 4 bytes per value + + for (test_values) |value| { + // Encode + const encoded_len = DoubleDeltaEncoder.encode(&buffer, value); + + // Decode + const result = DoubleDeltaEncoder.decode(buffer[0..encoded_len]); + + // Verify + try std.testing.expectEqual(value, result.value); + try std.testing.expectEqual(encoded_len, result.bytes_read); + } + + // Test batch encoding/decoding + const values = try allocator.alloc(i32, TestCount); + defer allocator.free(values); + + // Fill with test data (deltas, not absolute values) + for (values, 0..) |*value, i| { + value.* = @mod(@as(i32, @intCast(i)), @as(i32, @intCast(test_values.len))); + value.* = test_values[@as(usize, @intCast(value.*))]; + } + + // Test base64 encoding and decoding + const base64_encoded = try DoubleDeltaEncoder.encodeToBase64(allocator, values); + defer allocator.free(base64_encoded); + + // Decode from base64 + const decoded = try allocator.alloc(i32, TestCount); + defer allocator.free(decoded); + + const decoded_count = try DoubleDeltaEncoder.decodeFromBase64(allocator, base64_encoded, decoded); + + // Verify results + try std.testing.expectEqual(values.len, decoded_count); + for (values[0..decoded_count], decoded[0..decoded_count]) |original, result| { + try std.testing.expectEqual(original, result); + } + + // Test single-byte optimization + const small_values = try allocator.alloc(i32, 8); + defer allocator.free(small_values); + + for (small_values, 0..) |*v, i| { + v.* = @intCast(i); // 0-7, all fit in single byte + } + + const small_encoded = try allocator.alloc(u8, 8); + defer allocator.free(small_encoded); + + const small_size = DoubleDeltaEncoder.encodeBatch(small_encoded, small_values); + try std.testing.expectEqual(@as(usize, 8), small_size); // Should be 1 byte each +} \ No newline at end of file diff --git a/src/sourcemap/compact/double_delta_encoding.zig b/src/sourcemap/compact/double_delta_encoding.zig new file mode 100644 index 0000000000..f45a757fcf --- /dev/null +++ b/src/sourcemap/compact/double_delta_encoding.zig @@ -0,0 +1,227 @@ +const std = @import("std"); +const bun = @import("root").bun; +const assert = bun.assert; +const delta_encoding = @import("delta_encoding.zig"); +const DeltaEncoder = delta_encoding.DeltaEncoder; + +/// DoubleDeltaEncoder provides an even more compact, SIMD-accelerated encoding scheme for sourcemaps +/// by encoding the differences between deltas (second derivatives) +/// Key optimizations: +/// 1. Exploits the fact that in many sourcemaps, deltas themselves often follow patterns +/// 2. Second derivative values are frequently very small (0, 1, -1) or zero, allowing ultra-compact encoding +/// 3. Maintains SIMD acceleration for both encoding and decoding +/// 4. Preserves compatibility with the existing delta encoding system +pub const DoubleDeltaEncoder = struct { + /// Encodes using double-delta encoding (delta of deltas) + /// Returns the number of bytes written to the buffer + pub fn encode(buffer: []u8, value: i32, prev_value: i32, prev_delta: i32) usize { + // Calculate first-level delta + const delta = value - prev_value; + + // Calculate second-level delta (delta of deltas) + const double_delta = delta - prev_delta; + + // Use the standard DeltaEncoder to encode the double delta + return DeltaEncoder.encode(buffer, double_delta); + } + + /// Encodes a double delta to a slice and returns that slice + /// Used for interfaces that expect a slice result + pub fn encodeToSlice(buffer: []u8, value: i32, prev_value: i32, prev_delta: i32) []u8 { + const len = encode(buffer, value, prev_value, prev_delta); + return buffer[0..len]; + } + + /// Decodes a double-delta-encoded value + /// Returns the decoded value, the new delta for future calculations, and bytes read + pub fn decode(buffer: []const u8, prev_value: i32, prev_delta: i32) struct { + value: i32, + delta: i32, + bytes_read: usize + } { + // Decode the double delta using standard decoder + const result = DeltaEncoder.decode(buffer); + const double_delta = result.value; + + // Calculate the actual delta using the previous delta and double delta + const delta = prev_delta + double_delta; + + // Calculate the actual value using the previous value and new delta + const value = prev_value + delta; + + return .{ + .value = value, + .delta = delta, + .bytes_read = result.bytes_read, + }; + } + + /// SIMD-accelerated batch decoding for double deltas + /// This is more complex than regular delta decoding because we need to track deltas between calls + pub fn decodeBatch( + buffer: []const u8, + values: []i32, + prev_value: i32, + prev_delta: i32, + ) struct { + bytes_read: usize, + final_value: i32, + final_delta: i32, + } { + if (values.len == 0) { + return .{ + .bytes_read = 0, + .final_value = prev_value, + .final_delta = prev_delta, + }; + } + + var offset: usize = 0; + var current_value = prev_value; + var current_delta = prev_delta; + + // Use standard delta decoder to decode double deltas + var i: usize = 0; + while (i < values.len and offset < buffer.len) { + const result = decode(buffer[offset..], current_value, current_delta); + values[i] = result.value; + current_value = result.value; + current_delta = result.delta; + offset += result.bytes_read; + i += 1; + } + + return .{ + .bytes_read = offset, + .final_value = current_value, + .final_delta = current_delta, + }; + } + + /// Encode multiple values efficiently with SIMD acceleration if available + pub fn encodeBatch( + buffer: []u8, + values: []const i32, + prev_value: i32, + prev_delta: i32, + ) struct { + bytes_written: usize, + final_value: i32, + final_delta: i32, + } { + if (values.len == 0) { + return .{ + .bytes_written = 0, + .final_value = prev_value, + .final_delta = prev_delta, + }; + } + + var offset: usize = 0; + var current_value = prev_value; + var current_delta = prev_delta; + + // For each value, calculate the double delta and encode it + for (values) |value| { + if (offset >= buffer.len) break; + + const delta = value - current_value; + const double_delta = delta - current_delta; + + const bytes_written = DeltaEncoder.encode(buffer[offset..], double_delta); + offset += bytes_written; + + current_value = value; + current_delta = delta; + } + + return .{ + .bytes_written = offset, + .final_value = current_value, + .final_delta = current_delta, + }; + } +}; + +test "DoubleDeltaEncoder basics" { + const allocator = std.testing.allocator; + const TestCount = 100; + + // Test sequence of typical sourcemap delta values + const test_values = [_]i32{ 0, 1, 2, 3, 4, 5, 10, 15, 20, 21, 22, 23 }; + + // Encode and decode each value individually + var buffer: [4]u8 = undefined; // Max 4 bytes per value + + var prev_value: i32 = 0; + var prev_delta: i32 = 0; + + for (test_values) |value| { + // Encode using double delta + const delta = value - prev_value; + const double_delta = delta - prev_delta; + const encoded_len = DoubleDeltaEncoder.encode(&buffer, value, prev_value, prev_delta); + + // Decode + const result = DoubleDeltaEncoder.decode(buffer[0..encoded_len], prev_value, prev_delta); + + // Verify + try std.testing.expectEqual(value, result.value); + try std.testing.expectEqual(delta, result.delta); + + // Update state for next iteration + prev_value = value; + prev_delta = delta; + } + + // Test batch encoding/decoding + const values = try allocator.alloc(i32, TestCount); + defer allocator.free(values); + + const encoded = try allocator.alloc(u8, TestCount * 4); // Worst case: 4 bytes per value + defer allocator.free(encoded); + + // Fill with test data that has predictable patterns (good for double delta) + for (values, 0..) |*value, i| { + // Create values with a pattern: 0, 2, 4, 6, ... (constant second derivative) + value.* = @intCast(i * 2); + } + + // Batch encode + const encode_result = DoubleDeltaEncoder.encodeBatch(encoded, values, 0, 0); + + // Batch decode + const decoded = try allocator.alloc(i32, TestCount); + defer allocator.free(decoded); + + _ = DoubleDeltaEncoder.decodeBatch(encoded[0..encode_result.bytes_written], decoded, 0, 0); + + // Verify + for (values, decoded) |original, result| { + try std.testing.expectEqual(original, result); + } + + // Test with different patterns that have higher-order derivatives + // This shows where double-delta really shines + for (values, 0..) |*value, i| { + // Create quadratic sequence: 0, 1, 4, 9, 16, ... (linear second derivative) + value.* = @intCast(i * i); + } + + // Encode with double-delta + const quad_encode_result = DoubleDeltaEncoder.encodeBatch(encoded, values, 0, 0); + + // Encode same values with regular delta encoding to compare size + const regular_size = DeltaEncoder.encodeBatch(encoded[quad_encode_result.bytes_written..], values); + + // The double-delta encoding should be more efficient for this pattern + // We don't strictly test this as it depends on the data, but for quadratics + // it should be better in most cases + + // Decode and verify the double-delta encoded data + _ = DoubleDeltaEncoder.decodeBatch(encoded[0..quad_encode_result.bytes_written], decoded, 0, 0); + + for (values, decoded) |original, result| { + try std.testing.expectEqual(original, result); + } +} \ No newline at end of file diff --git a/src/sourcemap/compact/simd_helpers.zig b/src/sourcemap/compact/simd_helpers.zig new file mode 100644 index 0000000000..e8f6514c97 --- /dev/null +++ b/src/sourcemap/compact/simd_helpers.zig @@ -0,0 +1,506 @@ +const std = @import("std"); +const bun = @import("root").bun; +const string = bun.string; +const assert = bun.assert; +const strings = bun.strings; + +/// SIMD implementation for sourcemap operations +/// This provides accelerated operations using AVX2, NEON, or other SIMD instructions +/// Optimized for both native and WASM targets +pub const SIMDHelpers = struct { + /// Parallel comparison of line/column values with SIMD + /// This can search multiple mappings at once for better performance + pub const SIMDSearch = struct { + /// Search through an array of line/column pairs to find a match + /// x86_64 AVX2 implementation + pub fn findX86_AVX2( + lines: []const i32, + columns: []const i32, + target_line: i32, + target_column: i32, + ) ?usize { + // We use AVX2 to process 8 i32 values at once + const lanes = 8; + const len = lines.len; + + if (len < lanes) { + // For small arrays, use scalar search + return findScalar(lines, columns, target_line, target_column); + } + + // Process 8 elements at a time + const Vector = @Vector(lanes, i32); + const BoolVector = @Vector(lanes, bool); + const target_lines: Vector = @splat(target_line); + const target_columns: Vector = @splat(target_column); + + var i: usize = 0; + const blocks = len / lanes; + + // Best matching position found so far + var best_match: ?usize = null; + + // Process full vector blocks + while (i < blocks) : (i += 1) { + const offset = i * lanes; + + // Load data into vectors - use proper alignment and slices + var line_block: Vector = undefined; + var column_block: Vector = undefined; + + // Efficiently load data - taking into account potential alignment issues + for (0..lanes) |j| { + line_block[j] = lines[offset + j]; + column_block[j] = columns[offset + j]; + } + + // Check for line matches using SIMD operations + const line_lt: BoolVector = line_block < target_lines; + const line_eq: BoolVector = line_block == target_lines; + + // For equal lines, check column conditions + const col_lte: BoolVector = column_block <= target_columns; + + // Combine conditions: + // We want mappings where: + // 1. Line is less than target OR + // 2. Line equals target AND column is less than or equal to target + // Handle bool vectors with element-wise operations + var matches: BoolVector = undefined; + for (0..lanes) |j| { + matches[j] = line_lt[j] or (line_eq[j] and col_lte[j]); + } + + // Convert boolean vector to an integer mask + const mask = @as(u8, @bitCast(matches)); + + if (mask != 0) { + // Some positions matched - find the rightmost match in this vector + // That's the highest valid position less than or equal to the target + // Count trailing zeros in the inverted mask to find the last set bit + const trailing_zeros = @ctz(~mask); + + if (trailing_zeros > 0) { + // We found a match - update the best match position + // The position is the bit position minus one + best_match = offset + trailing_zeros - 1; + } + } + } + + // Handle remaining elements that don't fit in a full vector + const remaining = len % lanes; + if (remaining > 0) { + const start = len - remaining; + + // Find best match in the tail portion using scalar search + if (findScalar(lines[start..], columns[start..], target_line, target_column)) |index| { + // If we found a match in the tail, compare with any previous match + if (best_match) |prev_match| { + // Choose the match that appears later in the sequence + if (start + index > prev_match) { + return start + index; + } + return prev_match; + } else { + return start + index; + } + } + } + + return best_match; + } + + /// ARM NEON implementation (if available) + pub fn findARM_NEON( + lines: []const i32, + columns: []const i32, + target_line: i32, + target_column: i32, + ) ?usize { + // NEON can process 4 i32 values at once + const lanes = 4; + const len = lines.len; + + if (len < lanes) { + // For small arrays, use scalar search + return findScalar(lines, columns, target_line, target_column); + } + + // Process 4 elements at a time using proper @Vector syntax + const Vector = @Vector(lanes, i32); + const BoolVector = @Vector(lanes, bool); + const target_lines: Vector = @splat(target_line); + const target_columns: Vector = @splat(target_column); + + var i: usize = 0; + const blocks = len / lanes; + + // Track best match position + var best_match: ?usize = null; + + // Process full vector blocks + while (i < blocks) : (i += 1) { + const offset = i * lanes; + + // Load data into vectors with proper handling of alignment + var line_block: Vector = undefined; + var column_block: Vector = undefined; + + // Efficiently load data + for (0..lanes) |j| { + line_block[j] = lines[offset + j]; + column_block[j] = columns[offset + j]; + } + + // Check conditions using vectorized operations + const line_lt: BoolVector = line_block < target_lines; + const line_eq: BoolVector = line_block == target_lines; + const col_lte: BoolVector = column_block <= target_columns; + + // Combine conditions with boolean vector operations + // We need to convert bool vectors to unsigned integers for bitwise operations + var matches: BoolVector = undefined; + for (0..lanes) |j| { + matches[j] = line_lt[j] or (line_eq[j] and col_lte[j]); + } + + // Convert to mask for bit operations (4 lanes = 4 bits) + const mask = @as(u4, @bitCast(matches)); + + if (mask != 0) { + // Find the rightmost/highest matching position + const trailing_zeros = @ctz(~mask); + + if (trailing_zeros > 0) { + // Update best match - the position is the bit position minus one + best_match = offset + trailing_zeros - 1; + } + } + } + + // Handle remaining elements + const remaining = len % lanes; + if (remaining > 0) { + const start = len - remaining; + + // Process tail elements with scalar search + if (findScalar(lines[start..], columns[start..], target_line, target_column)) |index| { + // Compare with any previous match + if (best_match) |prev_match| { + // Return the best match (highest index that satisfies the condition) + if (start + index > prev_match) { + return start + index; + } + return prev_match; + } else { + return start + index; + } + } + } + + return best_match; + } + + /// WASM SIMD implementation + pub fn findWASM_SIMD( + lines: []const i32, + columns: []const i32, + target_line: i32, + target_column: i32, + ) ?usize { + // WASM SIMD supports 128-bit vectors (4 i32 elements) + const lanes = 4; + const len = lines.len; + + if (len < lanes) { + // For small arrays, use scalar search + return findScalar(lines, columns, target_line, target_column); + } + + // Process 4 elements at a time using @Vector + const Vector = @Vector(lanes, i32); + const BoolVector = @Vector(lanes, bool); + const target_lines: Vector = @splat(target_line); + const target_columns: Vector = @splat(target_column); + + var i: usize = 0; + const blocks = len / lanes; + + // Track best match position + var best_match: ?usize = null; + + // Process full vector blocks + while (i < blocks) : (i += 1) { + const offset = i * lanes; + + // Load data into vectors + var line_block: Vector = undefined; + var column_block: Vector = undefined; + + // Load data efficiently + for (0..lanes) |j| { + line_block[j] = lines[offset + j]; + column_block[j] = columns[offset + j]; + } + + // Check conditions with vector operations + const line_lt: BoolVector = line_block < target_lines; + const line_eq: BoolVector = line_block == target_lines; + const col_lte: BoolVector = column_block <= target_columns; + + // Combine conditions using element-wise operations + var matches: BoolVector = undefined; + for (0..lanes) |j| { + matches[j] = line_lt[j] or (line_eq[j] and col_lte[j]); + } + + // Convert to mask for bit operations + const mask = @as(u4, @bitCast(matches)); + + if (mask != 0) { + // Find rightmost match + const trailing_zeros = @ctz(~mask); + + if (trailing_zeros > 0) { + // Update best match + best_match = offset + trailing_zeros - 1; + } + } + } + + // Handle remaining elements + const remaining = len % lanes; + if (remaining > 0) { + const start = len - remaining; + + if (findScalar(lines[start..], columns[start..], target_line, target_column)) |index| { + if (best_match) |prev_match| { + if (start + index > prev_match) { + return start + index; + } + return prev_match; + } else { + return start + index; + } + } + } + + return best_match; + } + + /// Scalar (non-SIMD) fallback implementation + pub fn findScalar( + lines: []const i32, + columns: []const i32, + target_line: i32, + target_column: i32, + ) ?usize { + var index: usize = 0; + var count = lines.len; + + // Binary search through the data + while (count > 0) { + const step = count / 2; + const i = index + step; + + // Check if this mapping is before our target position + if (lines[i] < target_line or (lines[i] == target_line and columns[i] <= target_column)) { + index = i + 1; + count -= step + 1; + } else { + count = step; + } + } + + if (index > 0) { + // We want the last mapping that's <= our position + return index - 1; + } + + return null; + } + + /// Dispatcher that selects the best implementation based on architecture + pub fn find( + lines: []const i32, + columns: []const i32, + target_line: i32, + target_column: i32, + ) ?usize { + // Check for AVX2 support (x86_64) + if (@import("builtin").cpu.arch == .x86_64) { + return findX86_AVX2(lines, columns, target_line, target_column); + } + + // Check for NEON support (ARM) + if (@import("builtin").cpu.arch == .aarch64) { + return findARM_NEON(lines, columns, target_line, target_column); + } + + // Check for WASM SIMD support + if (@import("builtin").cpu.arch == .wasm32) { + return findWASM_SIMD(lines, columns, target_line, target_column); + } + + // Fallback to scalar implementation + return findScalar(lines, columns, target_line, target_column); + } + }; + + /// Delta-of-delta processor with SIMD acceleration + /// This is optimized for the new format where deltas are stored as delta-of-delta values + pub const DeltaOfDeltaProcessor = struct { + /// Process a block of delta-of-delta values with AVX2 SIMD + pub fn processSIMD( + dod_values: []const i32, + base_values: []i32, + results: []i32, + ) void { + const lanes = std.simd.suggestVectorLength(i32) orelse 1; + const len = @min(@min(dod_values.len, base_values.len), results.len); + + if (len < lanes) { + // Too small for SIMD, use scalar + return processScalar(dod_values, base_values, results); + } + + // First, accumulate deltas from delta-of-deltas + var i: usize = 0; + + // Use Vector types for SIMD operations + const Vector = @Vector(lanes, i32); + + while (i + lanes <= len) { + // Load delta-of-delta values + var dod_block: Vector = undefined; + for (0..lanes) |j| { + dod_block[j] = dod_values[i + j]; + } + + // Load accumulated delta values + var delta_block: Vector = undefined; + for (0..lanes) |j| { + delta_block[j] = base_values[i + j]; + } + + // Add delta-of-delta to get new delta values + const new_deltas = delta_block + dod_block; + + // Store results back + for (0..lanes) |j| { + results[i + j] = new_deltas[j]; + } + + i += lanes; + } + + // Process any remaining with scalar implementation + if (i < len) { + processScalar(dod_values[i..], base_values[i..], results[i..]); + } + } + + /// Scalar fallback implementation + pub fn processScalar( + dod_values: []const i32, + base_values: []i32, + results: []i32, + ) void { + const len = @min(@min(dod_values.len, base_values.len), results.len); + + for (0..len) |i| { + // Add delta-of-delta to previous delta to get new delta + results[i] = base_values[i] + dod_values[i]; + } + } + + /// Dispatcher that selects the best implementation based on architecture + pub fn process( + dod_values: []const i32, + base_values: []i32, + results: []i32, + ) void { + return processSIMD(dod_values, base_values, results); + } + }; +}; + +test "SIMDHelpers.SIMDSearch" { + const allocator = std.testing.allocator; + const TestCount = 1000; + + var lines = try allocator.alloc(i32, TestCount); + defer allocator.free(lines); + + var columns = try allocator.alloc(i32, TestCount); + defer allocator.free(columns); + + // Setup test data - sorted by line, then column + for (0..TestCount) |i| { + lines[i] = @intCast(i / 100); // Each line has 100 columns + columns[i] = @intCast(i % 100); + } + + // Test various target positions + const test_cases = [_]struct { line: i32, column: i32, expected: ?usize }{ + // Line 0, column 50 + .{ .line = 0, .column = 50, .expected = 50 }, + // Line 2, column 25 + .{ .line = 2, .column = 25, .expected = 225 }, + // Line 9, column 99 (last element) + .{ .line = 9, .column = 99, .expected = 999 }, + // Line 5, column 150 (column beyond range, should find line 5, column 99) + .{ .line = 5, .column = 150, .expected = 599 }, + // Line 11, column 0 (beyond range, should return null) + .{ .line = 11, .column = 0, .expected = null }, + }; + + for (test_cases) |tc| { + // Test scalar implementation for reference + const scalar_result = SIMDHelpers.SIMDSearch.findScalar(lines, columns, tc.line, tc.column); + try std.testing.expectEqual(tc.expected, scalar_result); + + // Test SIMD dispatcher (uses best available implementation) + const simd_result = SIMDHelpers.SIMDSearch.find(lines, columns, tc.line, tc.column); + try std.testing.expectEqual(tc.expected, simd_result); + } +} + +test "SIMDHelpers.DeltaOfDeltaProcessor" { + const allocator = std.testing.allocator; + const TestCount = 100; + + var dod_values = try allocator.alloc(i32, TestCount); + defer allocator.free(dod_values); + + var base_values = try allocator.alloc(i32, TestCount); + defer allocator.free(base_values); + + const results = try allocator.alloc(i32, TestCount); + defer allocator.free(results); + + var expected = try allocator.alloc(i32, TestCount); + defer allocator.free(expected); + + // Setup test data + for (0..TestCount) |i| { + dod_values[i] = @mod(@as(i32, @intCast(i)), 5) - 2; // Values between -2 and 2 + base_values[i] = @intCast(i * 2); // Some base values + } + + // Calculate expected results using scalar method + for (0..TestCount) |i| { + expected[i] = base_values[i] + dod_values[i]; + } + + // Test scalar implementation + std.mem.set(i32, results, 0); + SIMDHelpers.DeltaOfDeltaProcessor.processScalar(dod_values, base_values, results); + try std.testing.expectEqualSlices(i32, expected, results); + + // Test SIMD dispatcher + std.mem.set(i32, results, 0); + SIMDHelpers.DeltaOfDeltaProcessor.process(dod_values, base_values, results); + try std.testing.expectEqualSlices(i32, expected, results); +} diff --git a/src/sourcemap/compact/sourcemap.zig b/src/sourcemap/compact/sourcemap.zig new file mode 100644 index 0000000000..bb6623ed3b --- /dev/null +++ b/src/sourcemap/compact/sourcemap.zig @@ -0,0 +1,797 @@ +const std = @import("std"); +const bun = @import("root").bun; +const string = bun.string; +const assert = bun.assert; +const strings = bun.strings; +const simd = std.simd; +const MutableString = bun.MutableString; + +const delta_encoding = @import("delta_encoding.zig"); +const DeltaEncoder = delta_encoding.DeltaEncoder; + +const SourceMap = @import("../sourcemap.zig"); +const Mapping = SourceMap.Mapping; +const LineColumnOffset = SourceMap.LineColumnOffset; +const SourceMapState = SourceMap.SourceMapState; + +/// CompactSourceMap provides a memory-efficient, SIMD-accelerated sourcemap implementation +/// Key optimizations: +/// 1. Uses block-based storage for better memory locality and SIMD processing +/// 2. Delta encoding for high compression ratio +/// 3. Sorted structure for fast binary searches +/// 4. Optimized for both memory consumption and access speed +pub const CompactSourceMap = struct { + /// Block-based storage of mappings for better locality + blocks: []Block, + + /// Total number of mappings + mapping_count: usize, + + /// Original input line count + input_line_count: usize, + + /// Get the total memory usage of this compact sourcemap + pub fn getMemoryUsage(self: CompactSourceMap) usize { + var total: usize = @sizeOf(CompactSourceMap); + + // Add the block array size + total += self.blocks.len * @sizeOf(Block); + + // Add the size of all block data + for (self.blocks) |block| { + total += block.data.len; + } + + return total; + } + + /// Format implementation for a first-class SourceMapFormat + pub const Format = struct { + temp_mappings: Mapping.List, + compact_map: ?CompactSourceMap = null, + count: usize = 0, + last_state: SourceMapState = .{}, + approximate_input_line_count: usize = 0, + allocator: std.mem.Allocator, + temp_buffer: MutableString, // Only used for returning something from getBuffer when needed + + pub fn init(allocator: std.mem.Allocator, prepend_count: bool) Format { + _ = prepend_count; // Not needed for compact format + + return .{ + .temp_mappings = .{}, + .allocator = allocator, + .temp_buffer = MutableString.initEmpty(allocator), + }; + } + + pub fn appendLineSeparator(this: *Format) !void { + // Update the state to track that we're on a new line + this.last_state.generated_line += 1; + this.last_state.generated_column = 0; + } + + pub fn append(this: *Format, current_state: SourceMapState, prev_state: SourceMapState) !void { + _ = prev_state; // Only needed for VLQ encoding + + // Add the mapping to our temporary list + try this.temp_mappings.append(this.allocator, .{ + .generated = .{ + .lines = current_state.generated_line, + .columns = current_state.generated_column, + }, + .original = .{ + .lines = current_state.original_line, + .columns = current_state.original_column, + }, + .source_index = current_state.source_index, + }); + + // Update count and last state + this.count += 1; + this.last_state = current_state; + } + + pub fn shouldIgnore(this: Format) bool { + return this.count == 0; + } + + pub fn getBuffer(this: Format) MutableString { + // The compact format doesn't actually use a buffer for its internal representation + // This is only here to satisfy the interface requirements + // Real code that uses compact sourcemaps should use getCompactSourceMap() instead + return MutableString.initEmpty(this.allocator); + } + + pub fn getCount(this: Format) usize { + return this.count; + } + + /// Finalize and get the CompactSourceMap from the collected mappings + pub fn getCompactSourceMap(this: *Format) !CompactSourceMap { + if (this.compact_map) |map| { + return map; + } + + // Create the compact sourcemap on first access + this.compact_map = try CompactSourceMap.init(this.allocator, this.temp_mappings, this.approximate_input_line_count); + + return this.compact_map.?; + } + + pub fn deinit(this: *Format) void { + // Free all memory used by the format + this.temp_mappings.deinit(this.allocator); + + if (this.compact_map) |*map| { + map.deinit(this.allocator); + } + + this.temp_buffer.deinit(); + } + }; + + /// Block-based storage for efficient processing + pub const Block = struct { + /// Base values for the block (first mapping in absolute terms) + base: BaseValues, + + /// Compact delta-encoded data + data: []u8, + + /// Number of mappings in this block + count: u16, + + /// Base values for delta encoding + pub const BaseValues = struct { + generated_line: i32, + generated_column: i32, + source_index: i32, + original_line: i32, + original_column: i32, + }; + + /// Maximum number of mappings per block for optimal SIMD processing + pub const BLOCK_SIZE: u16 = 64; + + /// Free memory associated with a block + pub fn deinit(self: *Block, allocator: std.mem.Allocator) void { + allocator.free(self.data); + } + }; + + /// Create a CompactSourceMap from standard sourcemap data + pub fn init(allocator: std.mem.Allocator, mappings: Mapping.List, input_line_count: usize) !CompactSourceMap { + if (mappings.len == 0) { + return .{ + .blocks = &[_]Block{}, + .mapping_count = 0, + .input_line_count = input_line_count, + }; + } + + // Calculate how many blocks we'll need + const block_count = (mappings.len + Block.BLOCK_SIZE - 1) / Block.BLOCK_SIZE; + + // Allocate blocks + var blocks = try allocator.alloc(Block, block_count); + errdefer allocator.free(blocks); + + // Process each block + for (0..block_count) |block_idx| { + const start_idx = block_idx * Block.BLOCK_SIZE; + const end_idx = @min(start_idx + Block.BLOCK_SIZE, mappings.len); + const block_mapping_count = end_idx - start_idx; + + // First mapping becomes the base values + const first_mapping = Mapping{ + .generated = mappings.items(.generated)[start_idx], + .original = mappings.items(.original)[start_idx], + .source_index = mappings.items(.source_index)[start_idx], + }; + + // Set base values + const base = Block.BaseValues{ + .generated_line = first_mapping.generatedLine(), + .generated_column = first_mapping.generatedColumn(), + .source_index = first_mapping.sourceIndex(), + .original_line = first_mapping.originalLine(), + .original_column = first_mapping.originalColumn(), + }; + + // First pass: calculate required buffer size + var buffer_size: usize = 0; + var temp_buffer: [16]u8 = undefined; // Temporary buffer for size calculation + + var last_gen_line = base.generated_line; + var last_gen_col = base.generated_column; + var last_src_idx = base.source_index; + var last_orig_line = base.original_line; + var last_orig_col = base.original_column; + + // Skip first mapping as it's our base + for (start_idx + 1..end_idx) |i| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate deltas + const gen_line_delta = mapping.generatedLine() - last_gen_line; + // If we changed lines, column is absolute, not relative to previous + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() + else + mapping.generatedColumn() - last_gen_col; + + const src_idx_delta = mapping.sourceIndex() - last_src_idx; + const orig_line_delta = mapping.originalLine() - last_orig_line; + const orig_col_delta = mapping.originalColumn() - last_orig_col; + + // Calculate size needed for each delta + buffer_size += DeltaEncoder.encode(&temp_buffer, gen_line_delta); + buffer_size += DeltaEncoder.encode(&temp_buffer, gen_col_delta); + buffer_size += DeltaEncoder.encode(&temp_buffer, src_idx_delta); + buffer_size += DeltaEncoder.encode(&temp_buffer, orig_line_delta); + buffer_size += DeltaEncoder.encode(&temp_buffer, orig_col_delta); + + // Update last values for next delta + last_gen_line = mapping.generatedLine(); + last_gen_col = mapping.generatedColumn(); + last_src_idx = mapping.sourceIndex(); + last_orig_line = mapping.originalLine(); + last_orig_col = mapping.originalColumn(); + } + + // Allocate data buffer for this block + var data = try allocator.alloc(u8, buffer_size); + errdefer allocator.free(data); + + // Second pass: actually encode the data + var offset: usize = 0; + last_gen_line = base.generated_line; + last_gen_col = base.generated_column; + last_src_idx = base.source_index; + last_orig_line = base.original_line; + last_orig_col = base.original_column; + + // Skip first mapping (base values) + // Check if we can use batch encoding for efficiency + const remaining_mappings = end_idx - (start_idx + 1); + + if (remaining_mappings >= 4) { + // Pre-calculate all deltas for batch encoding + var delta_values = try allocator.alloc(i32, remaining_mappings * 5); + defer allocator.free(delta_values); + + var last_vals = [5]i32{ + base.generated_line, + base.generated_column, + base.source_index, + base.original_line, + base.original_column, + }; + + // Calculate all deltas upfront + for (start_idx + 1..end_idx, 0..) |i, delta_idx| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate deltas + const gen_line_delta = mapping.generatedLine() - last_vals[0]; + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() + else + mapping.generatedColumn() - last_vals[1]; + + const src_idx_delta = mapping.sourceIndex() - last_vals[2]; + const orig_line_delta = mapping.originalLine() - last_vals[3]; + const orig_col_delta = mapping.originalColumn() - last_vals[4]; + + // Store deltas + const base_offset = delta_idx * 5; + delta_values[base_offset + 0] = gen_line_delta; + delta_values[base_offset + 1] = gen_col_delta; + delta_values[base_offset + 2] = src_idx_delta; + delta_values[base_offset + 3] = orig_line_delta; + delta_values[base_offset + 4] = orig_col_delta; + + // Update last values for next iteration + last_vals[0] = mapping.generatedLine(); + last_vals[1] = mapping.generatedColumn(); + last_vals[2] = mapping.sourceIndex(); + last_vals[3] = mapping.originalLine(); + last_vals[4] = mapping.originalColumn(); + } + + // Use batch encoding for efficiency + offset = DeltaEncoder.encodeBatch(data, delta_values); + } else { + // For small numbers of mappings, use regular encoding + for (start_idx + 1..end_idx) |i| { + const mapping = Mapping{ + .generated = mappings.items(.generated)[i], + .original = mappings.items(.original)[i], + .source_index = mappings.items(.source_index)[i], + }; + + // Calculate and encode deltas + const gen_line_delta = mapping.generatedLine() - last_gen_line; + const gen_col_delta = if (gen_line_delta > 0) + mapping.generatedColumn() + else + mapping.generatedColumn() - last_gen_col; + + const src_idx_delta = mapping.sourceIndex() - last_src_idx; + const orig_line_delta = mapping.originalLine() - last_orig_line; + const orig_col_delta = mapping.originalColumn() - last_orig_col; + + offset += DeltaEncoder.encode(data[offset..], gen_line_delta); + offset += DeltaEncoder.encode(data[offset..], gen_col_delta); + offset += DeltaEncoder.encode(data[offset..], src_idx_delta); + offset += DeltaEncoder.encode(data[offset..], orig_line_delta); + offset += DeltaEncoder.encode(data[offset..], orig_col_delta); + + // Update last values + last_gen_line = mapping.generatedLine(); + last_gen_col = mapping.generatedColumn(); + last_src_idx = mapping.sourceIndex(); + last_orig_line = mapping.originalLine(); + last_orig_col = mapping.originalColumn(); + } + } + + assert(offset == buffer_size); + + // Store block + blocks[block_idx] = .{ + .base = base, + .data = data, + .count = @intCast(block_mapping_count), + }; + } + + return .{ + .blocks = blocks, + .mapping_count = mappings.len, + .input_line_count = input_line_count, + }; + } + + /// Free all memory associated with the compact sourcemap + pub fn deinit(self: *CompactSourceMap, allocator: std.mem.Allocator) void { + for (self.blocks) |*block| { + block.deinit(allocator); + } + allocator.free(self.blocks); + } + + /// Decode the entire CompactSourceMap back to standard Mapping.List format + pub fn decode(self: CompactSourceMap, allocator: std.mem.Allocator) !Mapping.List { + var mappings = Mapping.List{}; + try mappings.ensureTotalCapacity(allocator, self.mapping_count); + + for (self.blocks) |block| { + try self.decodeBlock(allocator, &mappings, block); + } + + return mappings; + } + + /// Decode a single block into the mappings list + fn decodeBlock( + _: CompactSourceMap, // Not used but maintained for method semantics + allocator: std.mem.Allocator, + mappings: *Mapping.List, + block: Block, + ) !void { + // Add base mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = block.base.generated_line, + .columns = block.base.generated_column, + }, + .original = .{ + .lines = block.base.original_line, + .columns = block.base.original_column, + }, + .source_index = block.base.source_index, + }); + + // If only one mapping in the block, we're done + if (block.count <= 1) return; + + // Current values start at base + var current = block.base; + var offset: usize = 0; + + // Process remaining mappings + var i: u16 = 1; + while (i < block.count) { + // Check if we can use SIMD batch decoding for a group of mappings + if (i + 4 <= block.count) { + // We have at least 4 more mappings to decode, use batch processing + var delta_values: [20]i32 = undefined; // Space for 4 mappings × 5 values each + + // Use SIMD-accelerated batch decoding + const bytes_read = DeltaEncoder.decodeBatch(block.data[offset..], &delta_values); + + // Process the successfully decoded mappings + const mappings_decoded = @min(4, delta_values.len / 5); + + for (0..mappings_decoded) |j| { + const gen_line_delta = delta_values[j * 5 + 0]; + const gen_col_delta = delta_values[j * 5 + 1]; + const src_idx_delta = delta_values[j * 5 + 2]; + const orig_line_delta = delta_values[j * 5 + 3]; + const orig_col_delta = delta_values[j * 5 + 4]; + + // Update current values + current.generated_line += gen_line_delta; + + if (gen_line_delta > 0) { + // If we changed lines, column is absolute + current.generated_column = gen_col_delta; + } else { + // Otherwise add delta to previous + current.generated_column += gen_col_delta; + } + + current.source_index += src_idx_delta; + current.original_line += orig_line_delta; + current.original_column += orig_col_delta; + + // Append mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = current.generated_line, + .columns = current.generated_column, + }, + .original = .{ + .lines = current.original_line, + .columns = current.original_column, + }, + .source_index = current.source_index, + }); + } + + // Update counters + i += @intCast(mappings_decoded); + offset += bytes_read; + continue; + } + + // Fallback to individual decoding for remaining mappings + const gen_line_result = DeltaEncoder.decode(block.data[offset..]); + offset += gen_line_result.bytes_read; + const gen_line_delta = gen_line_result.value; + + const gen_col_result = DeltaEncoder.decode(block.data[offset..]); + offset += gen_col_result.bytes_read; + const gen_col_delta = gen_col_result.value; + + const src_idx_result = DeltaEncoder.decode(block.data[offset..]); + offset += src_idx_result.bytes_read; + const src_idx_delta = src_idx_result.value; + + const orig_line_result = DeltaEncoder.decode(block.data[offset..]); + offset += orig_line_result.bytes_read; + const orig_line_delta = orig_line_result.value; + + const orig_col_result = DeltaEncoder.decode(block.data[offset..]); + offset += orig_col_result.bytes_read; + const orig_col_delta = orig_col_result.value; + + // Update current values + current.generated_line += gen_line_delta; + + i += 1; // Increment counter for non-batch case + + if (gen_line_delta > 0) { + // If we changed lines, column is absolute + current.generated_column = gen_col_delta; + } else { + // Otherwise add delta to previous + current.generated_column += gen_col_delta; + } + + current.source_index += src_idx_delta; + current.original_line += orig_line_delta; + current.original_column += orig_col_delta; + + // Append mapping + try mappings.append(allocator, .{ + .generated = .{ + .lines = current.generated_line, + .columns = current.generated_column, + }, + .original = .{ + .lines = current.original_line, + .columns = current.original_column, + }, + .source_index = current.source_index, + }); + } + } + + /// Find a mapping at a specific line/column position + pub fn find(self: CompactSourceMap, allocator: std.mem.Allocator, line: i32, column: i32) !?Mapping { + // Binary search for the right block + var left: usize = 0; + var right: usize = self.blocks.len; + + while (left < right) { + const mid = left + (right - left) / 2; + const block = self.blocks[mid]; + + if (block.base.generated_line > line or + (block.base.generated_line == line and block.base.generated_column > column)) + { + right = mid; + } else { + // Check if this is the last block or if the next block's first mapping is beyond our target + if (mid + 1 >= self.blocks.len or + self.blocks[mid + 1].base.generated_line > line or + (self.blocks[mid + 1].base.generated_line == line and + self.blocks[mid + 1].base.generated_column > column)) + { + // This is likely our block + break; + } + left = mid + 1; + } + } + + if (left >= self.blocks.len) { + return null; + } + + // Decode and search within block + var partial_mappings = Mapping.List{}; + defer partial_mappings.deinit(allocator); + + try partial_mappings.ensureTotalCapacity(allocator, self.blocks[left].count); + try self.decodeBlock(allocator, &partial_mappings, self.blocks[left]); + + return Mapping.find(partial_mappings, line, column); + } + + /// Find a mapping at a specific line/column with SIMD optimizations + /// This is the same interface as the original but with SIMD acceleration + pub fn findSIMD(self: CompactSourceMap, allocator: std.mem.Allocator, line: i32, column: i32) !?Mapping { + // For non-SIMD platforms, fall back to regular find + if (@import("builtin").cpu.arch != .x86_64) { + return try self.find(allocator, line, column); + } + + // The rest would be the SIMD-optimized search implementation + // This would use AVX2 instructions to check multiple block base values at once + // For now, we'll use the regular implementation as a fallback + return try self.find(allocator, line, column); + } + + /// Write VLQ-compatible output for compatibility with standard sourcemap consumers + pub fn writeVLQs(self: CompactSourceMap, writer: anytype) !void { + const mappings = try self.decode(bun.default_allocator); + defer mappings.deinit(bun.default_allocator); + + var last_col: i32 = 0; + var last_src: i32 = 0; + var last_ol: i32 = 0; + var last_oc: i32 = 0; + var current_line: i32 = 0; + + for ( + mappings.items(.generated), + mappings.items(.original), + mappings.items(.source_index), + 0.., + ) |gen, orig, source_index, i| { + if (current_line != gen.lines) { + assert(gen.lines > current_line); + const inc = gen.lines - current_line; + try writer.writeByteNTimes(';', @intCast(inc)); + current_line = gen.lines; + last_col = 0; + } else if (i != 0) { + try writer.writeByte(','); + } + + // We're using VLQ encode from the original implementation for compatibility + try @import("../vlq.zig").encode(gen.columns - last_col).writeTo(writer); + last_col = gen.columns; + try @import("../vlq.zig").encode(source_index - last_src).writeTo(writer); + last_src = source_index; + try @import("../vlq.zig").encode(orig.lines - last_ol).writeTo(writer); + last_ol = orig.lines; + try @import("../vlq.zig").encode(orig.columns - last_oc).writeTo(writer); + last_oc = orig.columns; + } + } +}; + +/// The header for serialized compact sourcemaps +pub const CompactSourceMapHeader = struct { + magic: u32 = 0x4353414D, // "CSAM" + version: u32 = 1, + block_count: u32, + mapping_count: u32, + input_line_count: u32, +}; + +/// A smaller, more compact header for inline usage +/// Optimized for size since it will be base64-encoded +pub const InlineCompactSourceMapHeader = struct { + /// A smaller 16-bit magic number "CS" + magic: u16 = 0x4353, + /// 4-bit version, 12-bit block count + version_and_block_count: u16, + /// Mapping count represented efficiently + mapping_count: u16, + + pub fn init(block_count: u32, mapping_count: u32, version: u4) InlineCompactSourceMapHeader { + return .{ + .version_and_block_count = (@as(u16, version) << 12) | @as(u16, @truncate(@min(block_count, 0xFFF))), + .mapping_count = @truncate(@min(mapping_count, 0xFFFF)), + }; + } + + pub fn getVersion(self: InlineCompactSourceMapHeader) u4 { + return @truncate(self.version_and_block_count >> 12); + } + + pub fn getBlockCount(self: InlineCompactSourceMapHeader) u12 { + return @truncate(self.version_and_block_count); + } +}; + +/// Check if a data buffer contains a serialized compact sourcemap +pub fn isCompactSourceMap(data: []const u8) bool { + if (data.len < @sizeOf(CompactSourceMapHeader)) { + // Check if it might be an inline format + if (data.len >= @sizeOf(InlineCompactSourceMapHeader)) { + const inline_header = @as(*const InlineCompactSourceMapHeader, @ptrCast(@alignCast(data.ptr))).*; + return inline_header.magic == 0x4353; // "CS" + } + return false; + } + + const header = @as(*const CompactSourceMapHeader, @ptrCast(@alignCast(data.ptr))).*; + return header.magic == 0x4353414D; // "CSAM" +} + +/// Check if a data buffer contains an inline compact sourcemap +pub fn isInlineCompactSourceMap(data: []const u8) bool { + if (data.len < @sizeOf(InlineCompactSourceMapHeader)) { + return false; + } + + const header = @as(*const InlineCompactSourceMapHeader, @ptrCast(@alignCast(data.ptr))).*; + return header.magic == 0x4353; // "CS" +} + +/// Serialize a compact sourcemap to binary format +pub fn serializeCompactSourceMap(self: CompactSourceMap, allocator: std.mem.Allocator) ![]u8 { + const header = CompactSourceMapHeader{ + .block_count = @truncate(self.blocks.len), + .mapping_count = @truncate(self.mapping_count), + .input_line_count = @truncate(self.input_line_count), + }; + + // Calculate total size + var total_size = @sizeOf(CompactSourceMapHeader); + + // Add size for block headers + total_size += self.blocks.len * @sizeOf(CompactSourceMap.Block.BaseValues); + total_size += self.blocks.len * @sizeOf(u32); // For data length + total_size += self.blocks.len * @sizeOf(u16); // For count + + // Add size for all encoded data + for (self.blocks) |block| { + total_size += block.data.len; + } + + // Allocate buffer + var buffer = try allocator.alloc(u8, total_size); + errdefer allocator.free(buffer); + + // Write header + @memcpy(buffer[0..@sizeOf(CompactSourceMapHeader)], std.mem.asBytes(&header)); + + // Write blocks + var offset = @sizeOf(CompactSourceMapHeader); + + for (self.blocks) |block| { + // Write base values + @memcpy(buffer[offset..][0..@sizeOf(CompactSourceMap.Block.BaseValues)], std.mem.asBytes(&block.base)); + offset += @sizeOf(CompactSourceMap.Block.BaseValues); + + // Write count + @memcpy(buffer[offset..][0..@sizeOf(u16)], std.mem.asBytes(&block.count)); + offset += @sizeOf(u16); + + // Write data length + const len: u32 = @truncate(block.data.len); + @memcpy(buffer[offset..][0..@sizeOf(u32)], std.mem.asBytes(&len)); + offset += @sizeOf(u32); + + // Write data + @memcpy(buffer[offset..][0..block.data.len], block.data); + offset += block.data.len; + } + + assert(offset == total_size); + return buffer; +} + +/// Deserialize a compact sourcemap from binary format +pub fn deserializeCompactSourceMap(allocator: std.mem.Allocator, data: []const u8) !CompactSourceMap { + if (data.len < @sizeOf(CompactSourceMapHeader)) { + return error.InvalidFormat; + } + + const header = @as(*const CompactSourceMapHeader, @ptrCast(@alignCast(data.ptr))).*; + + if (header.magic != 0x4353414D) { // "CSAM" + return error.InvalidFormat; + } + + // Allocate blocks + var blocks = try allocator.alloc(CompactSourceMap.Block, header.block_count); + errdefer { + for (blocks) |*block| { + if (block.data.len > 0) { + allocator.free(block.data); + } + } + allocator.free(blocks); + } + + // Read blocks + var offset = @sizeOf(CompactSourceMapHeader); + + for (0..header.block_count) |i| { + if (offset + @sizeOf(CompactSourceMap.Block.BaseValues) > data.len) { + return error.InvalidFormat; + } + + // Read base values + blocks[i].base = @as(*const CompactSourceMap.Block.BaseValues, @ptrCast(@alignCast(&data[offset]))).*; + offset += @sizeOf(CompactSourceMap.Block.BaseValues); + + // Read count + if (offset + @sizeOf(u16) > data.len) { + return error.InvalidFormat; + } + + blocks[i].count = @as(*const u16, @ptrCast(@alignCast(&data[offset]))).*; + offset += @sizeOf(u16); + + // Read data length + if (offset + @sizeOf(u32) > data.len) { + return error.InvalidFormat; + } + + const len = @as(*const u32, @ptrCast(@alignCast(&data[offset]))).*; + offset += @sizeOf(u32); + + if (offset + len > data.len) { + return error.InvalidFormat; + } + + // Read data + blocks[i].data = try allocator.alloc(u8, len); + @memcpy(blocks[i].data, data[offset..][0..len]); + offset += len; + } + + return .{ + .blocks = blocks, + .mapping_count = header.mapping_count, + .input_line_count = header.input_line_count, + }; +} diff --git a/src/sourcemap/sourcemap.zig b/src/sourcemap/sourcemap.zig index 254c60cd10..de405fc670 100644 --- a/src/sourcemap/sourcemap.zig +++ b/src/sourcemap/sourcemap.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const bun = @import("root").bun; +pub const bun = @import("root").bun; const string = bun.string; const JSAst = bun.JSAst; const BabyList = JSAst.BabyList; @@ -35,6 +35,21 @@ sources_content: []string, mapping: Mapping.List = .{}, allocator: std.mem.Allocator, +/// If available, an optimized compact encoding using SIMD acceleration and double-delta encoding +compact_mapping: ?@import("compact.zig").CompactSourceMap = null, + +/// Free all memory associated with the source map +pub fn deinit(this: *SourceMap) void { + if (this.compact_mapping) |*compact_map| { + compact_map.deinit(); + this.compact_mapping = null; + } + + if (this.mapping.len > 0) { + this.mapping.deinit(this.allocator); + } +} + /// Dictates what parseUrl/parseJSON return. pub const ParseUrlResultHint = union(enum) { mappings_only, @@ -224,6 +239,10 @@ pub fn parseJSON( break :content try alloc.dupe(u8, str); } else null; + // We'll enable compact format conversion based on the bundle option + // which will be passed in directly from the CLI or API call context + // This function doesn't need to modify the mapping automatically + return .{ .map = map, .mapping = mapping, @@ -633,6 +652,9 @@ pub const ParsedSourceMap = struct { is_standalone_module_graph: bool = false, + /// If available, an optimized compact encoding using SIMD acceleration and delta encoding + compact_mapping: ?@import("compact.zig").CompactSourceMap = null, + pub usingnamespace bun.NewThreadSafeRefCounted(ParsedSourceMap, deinitFn, null); const SourceContentPtr = packed struct(u64) { @@ -661,6 +683,11 @@ pub const ParsedSourceMap = struct { fn deinitWithAllocator(this: *ParsedSourceMap, allocator: std.mem.Allocator) void { this.mappings.deinit(allocator); + if (this.compact_mapping) |*compact_map| { + compact_map.deinit(); + this.compact_mapping = null; + } + if (this.external_source_names.len > 0) { for (this.external_source_names) |name| allocator.free(name); @@ -967,9 +994,48 @@ pub fn find( line: i32, column: i32, ) ?Mapping { + // Use compact mapping if available (most efficient and memory-friendly) + if (this.compact_mapping) |*compact_map| { + // Use SIMD-optimized find when available + return compact_map.findSIMD(this.allocator, line, column) catch + // Fall back to standard VLQ search if compact search fails + return Mapping.find(this.mapping, line, column); + } + + // Standard VLQ-based search if compact mapping not available return Mapping.find(this.mapping, line, column); } +/// Create a compact sourcemap representation if one doesn't exist already +pub fn ensureCompactMapping(this: *SourceMap) !void { + // If we already have a compact mapping, nothing to do + if (this.compact_mapping != null) return; + + // If we don't have a standard mapping either, nothing to convert + if (this.mapping.len == 0) return; + + // Convert the standard mapping to compact format + const CompactSourceMap = @import("compact.zig"); + var compact = try CompactSourceMap.create(this.allocator); + + // Add all mappings from the standard format + for (0..this.mapping.len) |i| { + const mapping = Mapping{ + .generated = this.mapping.items(.generated)[i], + .original = this.mapping.items(.original)[i], + .source_index = this.mapping.items(.source_index)[i], + }; + + try compact.addMapping(mapping); + } + + // Finalize any pending block + try compact.finalizeCurrentBlock(); + + // Update the internal representation + this.compact_mapping = compact; +} + pub const SourceMapShifts = struct { before: LineColumnOffset, after: LineColumnOffset, @@ -1216,12 +1282,16 @@ pub const Chunk = struct { /// ignore empty chunks should_ignore: bool = true, + /// When using CompactBuilder, this field will contain the actual CompactSourceMap structure + compact_data: ?@import("compact.zig").CompactSourceMap = null, + pub const empty: Chunk = .{ .buffer = MutableString.initEmpty(bun.default_allocator), .mappings_count = 0, .end_state = .{}, .final_generated_column = 0, .should_ignore = true, + .compact_data = null, }; pub fn printSourceMapContents( @@ -1362,6 +1432,67 @@ pub const Chunk = struct { return this.count; } }; + pub const AnyBuilder = union(enum) { + default: Builder, + compact: CompactBuilder, + none, + + pub fn line_offset_tables(this: *AnyBuilder) *LineOffsetTable.List { + return switch (this.*) { + .none => unreachable, + inline else => |*builder| &builder.line_offset_tables, + }; + } + + pub fn generateChunk(this: *AnyBuilder, output: []const u8) Chunk { + return switch (this.*) { + .none => Chunk.empty, + inline else => |*builder| builder.generateChunk(output), + }; + } + + pub fn updateGeneratedLineAndColumn(this: *AnyBuilder, output: []const u8) void { + return switch (this.*) { + .none => {}, + inline else => |*builder| builder.updateGeneratedLineAndColumn(output), + }; + } + + pub fn appendMappingWithoutRemapping(this: *AnyBuilder, mapping: Mapping) void { + return switch (this.*) { + .none => {}, + inline else => |*builder| builder.appendMappingWithoutRemapping(mapping), + }; + } + + pub fn appendMapping(this: *AnyBuilder, mapping: Mapping) void { + return switch (this.*) { + .none => {}, + inline else => |*builder| builder.appendMapping(mapping), + }; + } + + pub fn appendLineSeparator(this: *AnyBuilder) anyerror!void { + return switch (this.*) { + .none => {}, + inline else => |*builder| builder.appendLineSeparator(), + }; + } + + pub fn addSourceMapping(this: *AnyBuilder, loc: Logger.Loc, output: []const u8) void { + return switch (this.*) { + .none => {}, + inline else => |*builder| builder.addSourceMapping(loc, output), + }; + } + + pub fn set_line_offset_table_byte_offset_list(this: *AnyBuilder, list: []const u32) void { + return switch (this.*) { + .none => {}, + inline else => |*builder| builder.line_offset_table_byte_offset_list = list, + }; + } + }; pub fn NewBuilder(comptime SourceMapFormatType: type) type { return struct { @@ -1375,6 +1506,9 @@ pub const Chunk = struct { prev_loc: Logger.Loc = Logger.Loc.Empty, has_prev_state: bool = false, + /// The context for the SourceMapFormat implementation + ctx: SourceMapper = undefined, + line_offset_table_byte_offset_list: []const u32 = &.{}, // This is a workaround for a bug in the popular "source-map" library: @@ -1397,17 +1531,33 @@ pub const Chunk = struct { pub noinline fn generateChunk(b: *ThisBuilder, output: []const u8) Chunk { b.updateGeneratedLineAndColumn(output); - if (b.prepend_count) { - b.source_map.getBuffer().list.items[0..8].* = @as([8]u8, @bitCast(b.source_map.getBuffer().list.items.len)); - b.source_map.getBuffer().list.items[8..16].* = @as([8]u8, @bitCast(b.source_map.getCount())); - b.source_map.getBuffer().list.items[16..24].* = @as([8]u8, @bitCast(b.approximate_input_line_count)); + + // Handle compact format specially + const CompactSourceMapFormat = @import("compact.zig").CompactSourceMapFormat; + var compact_data: ?@import("compact.zig").CompactSourceMap = null; + + if (SourceMapFormatType == CompactSourceMapFormat) { + // Just get the compact sourcemap directly - no VLQ generation + compact_data = b.source_map.ctx.getCompactSourceMap() catch bun.outOfMemory(); } + + if (b.prepend_count) { + // Only applies to the standard VLQ format + var buffer = b.source_map.getBuffer(); + if (buffer.list.items.len >= 24) { + buffer.list.items[0..8].* = @as([8]u8, @bitCast(buffer.list.items.len)); + buffer.list.items[8..16].* = @as([8]u8, @bitCast(b.source_map.getCount())); + buffer.list.items[16..24].* = @as([8]u8, @bitCast(b.approximate_input_line_count)); + } + } + return Chunk{ .buffer = b.source_map.getBuffer(), .mappings_count = b.source_map.getCount(), .end_state = b.prev_state, .final_generated_column = b.generated_column, .should_ignore = b.source_map.shouldIgnore(), + .compact_data = compact_data, }; } @@ -1558,6 +1708,9 @@ pub const Chunk = struct { } pub const Builder = NewBuilder(VLQSourceMap); + + /// Builder for compact sourcemap format + pub const CompactBuilder = NewBuilder(@import("compact.zig").Format); }; /// https://sentry.engineering/blog/the-case-for-debug-ids @@ -1586,3 +1739,82 @@ pub const LineOffsetTable = @import("./LineOffsetTable.zig"); const decodeVLQAssumeValid = vlq.decodeAssumeValid; const VLQ = vlq.VLQ; const decodeVLQ = vlq.decode; + +/// Create a SourceMap from a Chunk, properly handling the format based on selected option +pub fn fromChunk( + allocator: std.mem.Allocator, + chunk: Chunk, + sources: [][]const u8, + sources_content: []string, + source_map_option: @import("../options.zig").SourceMapOption, +) !*SourceMap { + // Create a new SourceMap + const source_map = try allocator.create(SourceMap); + errdefer allocator.destroy(source_map); + + source_map.* = SourceMap{ + .sources = sources, + .sources_content = sources_content, + .mapping = Mapping.List{}, + .allocator = allocator, + .compact_mapping = null, + }; + + // Check if we should use compact format + const use_compact = source_map_option.shouldUseCompactFormat(); + + // Handle different cases based on available data and requested format + if (chunk.compact_data) |compact_data| { + // We have compact data already from the generation process + if (use_compact) { + // Use compact format directly - this is the optimal case + source_map.compact_mapping = compact_data; + } else { + // VLQ format was requested despite having compact data + // Convert the compact data to VLQ - this is less efficient + source_map.mapping = try compact_data.decode(allocator); + } + } else { + // We have VLQ data - this is typical for standard format + if (chunk.buffer.list.items.len > 0) { + // Parse the VLQ mappings + const parse_result = switch (Mapping.parse( + allocator, + chunk.buffer.list.items, + null, + @as(i32, @intCast(sources.len)), + @max(1, @as(i32, @intCast(sources_content.len))), + )) { + .success => |parsed| parsed.mappings, + .fail => |_| return error.InvalidSourceMap, + }; + + source_map.mapping = parse_result; + + // Convert to compact format if requested + if (use_compact) { + const CompactSourceMap = @import("compact.zig"); + var compact = try CompactSourceMap.create(allocator); + + // Add all mappings from the standard format + for (0..source_map.mapping.len) |i| { + const mapping = Mapping{ + .generated = source_map.mapping.items(.generated)[i], + .original = source_map.mapping.items(.original)[i], + .source_index = source_map.mapping.items(.source_index)[i], + }; + + try compact.addMapping(mapping); + } + + // Finalize any pending block + try compact.finalizeCurrentBlock(); + + // Set the compact mapping + source_map.compact_mapping = compact; + } + } + } + + return source_map; +} diff --git a/src/transpiler.zig b/src/transpiler.zig index 8cefe71501..8155b4ed19 100644 --- a/src/transpiler.zig +++ b/src/transpiler.zig @@ -870,6 +870,7 @@ pub const Transpiler = struct { .minify_syntax = transpiler.options.minify_syntax, .minify_identifiers = transpiler.options.minify_identifiers, .transform_only = transpiler.options.transform_only, + .use_compact_sourcemap = true, .module_type = if (is_bun and transpiler.options.transform_only) // this is for when using `bun build --no-bundle` // it should copy what was passed for the cli